##// END OF EJS Templates
revlogv2: no longer attempt to use inline for new revlog...
marmoute -
r48035:80164d50 default
parent child Browse files
Show More
@@ -1,3439 +1,3439 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 from __future__ import absolute_import
14 from __future__ import absolute_import
15
15
16 import binascii
16 import binascii
17 import collections
17 import collections
18 import contextlib
18 import contextlib
19 import errno
19 import errno
20 import io
20 import io
21 import os
21 import os
22 import struct
22 import struct
23 import zlib
23 import zlib
24
24
25 # import stuff from node for others to import from revlog
25 # import stuff from node for others to import from revlog
26 from .node import (
26 from .node import (
27 bin,
27 bin,
28 hex,
28 hex,
29 nullrev,
29 nullrev,
30 sha1nodeconstants,
30 sha1nodeconstants,
31 short,
31 short,
32 wdirrev,
32 wdirrev,
33 )
33 )
34 from .i18n import _
34 from .i18n import _
35 from .pycompat import getattr
35 from .pycompat import getattr
36 from .revlogutils.constants import (
36 from .revlogutils.constants import (
37 ALL_KINDS,
37 ALL_KINDS,
38 COMP_MODE_DEFAULT,
38 COMP_MODE_DEFAULT,
39 COMP_MODE_INLINE,
39 COMP_MODE_INLINE,
40 COMP_MODE_PLAIN,
40 COMP_MODE_PLAIN,
41 FEATURES_BY_VERSION,
41 FEATURES_BY_VERSION,
42 FLAG_GENERALDELTA,
42 FLAG_GENERALDELTA,
43 FLAG_INLINE_DATA,
43 FLAG_INLINE_DATA,
44 INDEX_HEADER,
44 INDEX_HEADER,
45 REVLOGV0,
45 REVLOGV0,
46 REVLOGV1,
46 REVLOGV1,
47 REVLOGV1_FLAGS,
47 REVLOGV1_FLAGS,
48 REVLOGV2,
48 REVLOGV2,
49 REVLOGV2_FLAGS,
49 REVLOGV2_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
50 REVLOG_DEFAULT_FLAGS,
51 REVLOG_DEFAULT_FORMAT,
51 REVLOG_DEFAULT_FORMAT,
52 REVLOG_DEFAULT_VERSION,
52 REVLOG_DEFAULT_VERSION,
53 SUPPORTED_FLAGS,
53 SUPPORTED_FLAGS,
54 )
54 )
55 from .revlogutils.flagutil import (
55 from .revlogutils.flagutil import (
56 REVIDX_DEFAULT_FLAGS,
56 REVIDX_DEFAULT_FLAGS,
57 REVIDX_ELLIPSIS,
57 REVIDX_ELLIPSIS,
58 REVIDX_EXTSTORED,
58 REVIDX_EXTSTORED,
59 REVIDX_FLAGS_ORDER,
59 REVIDX_FLAGS_ORDER,
60 REVIDX_HASCOPIESINFO,
60 REVIDX_HASCOPIESINFO,
61 REVIDX_ISCENSORED,
61 REVIDX_ISCENSORED,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
62 REVIDX_RAWTEXT_CHANGING_FLAGS,
63 )
63 )
64 from .thirdparty import attr
64 from .thirdparty import attr
65 from . import (
65 from . import (
66 ancestor,
66 ancestor,
67 dagop,
67 dagop,
68 error,
68 error,
69 mdiff,
69 mdiff,
70 policy,
70 policy,
71 pycompat,
71 pycompat,
72 templatefilters,
72 templatefilters,
73 util,
73 util,
74 )
74 )
75 from .interfaces import (
75 from .interfaces import (
76 repository,
76 repository,
77 util as interfaceutil,
77 util as interfaceutil,
78 )
78 )
79 from .revlogutils import (
79 from .revlogutils import (
80 deltas as deltautil,
80 deltas as deltautil,
81 docket as docketutil,
81 docket as docketutil,
82 flagutil,
82 flagutil,
83 nodemap as nodemaputil,
83 nodemap as nodemaputil,
84 revlogv0,
84 revlogv0,
85 sidedata as sidedatautil,
85 sidedata as sidedatautil,
86 )
86 )
87 from .utils import (
87 from .utils import (
88 storageutil,
88 storageutil,
89 stringutil,
89 stringutil,
90 )
90 )
91
91
92 # blanked usage of all the name to prevent pyflakes constraints
92 # blanked usage of all the name to prevent pyflakes constraints
93 # We need these name available in the module for extensions.
93 # We need these name available in the module for extensions.
94
94
95 REVLOGV0
95 REVLOGV0
96 REVLOGV1
96 REVLOGV1
97 REVLOGV2
97 REVLOGV2
98 FLAG_INLINE_DATA
98 FLAG_INLINE_DATA
99 FLAG_GENERALDELTA
99 FLAG_GENERALDELTA
100 REVLOG_DEFAULT_FLAGS
100 REVLOG_DEFAULT_FLAGS
101 REVLOG_DEFAULT_FORMAT
101 REVLOG_DEFAULT_FORMAT
102 REVLOG_DEFAULT_VERSION
102 REVLOG_DEFAULT_VERSION
103 REVLOGV1_FLAGS
103 REVLOGV1_FLAGS
104 REVLOGV2_FLAGS
104 REVLOGV2_FLAGS
105 REVIDX_ISCENSORED
105 REVIDX_ISCENSORED
106 REVIDX_ELLIPSIS
106 REVIDX_ELLIPSIS
107 REVIDX_HASCOPIESINFO
107 REVIDX_HASCOPIESINFO
108 REVIDX_EXTSTORED
108 REVIDX_EXTSTORED
109 REVIDX_DEFAULT_FLAGS
109 REVIDX_DEFAULT_FLAGS
110 REVIDX_FLAGS_ORDER
110 REVIDX_FLAGS_ORDER
111 REVIDX_RAWTEXT_CHANGING_FLAGS
111 REVIDX_RAWTEXT_CHANGING_FLAGS
112
112
113 parsers = policy.importmod('parsers')
113 parsers = policy.importmod('parsers')
114 rustancestor = policy.importrust('ancestor')
114 rustancestor = policy.importrust('ancestor')
115 rustdagop = policy.importrust('dagop')
115 rustdagop = policy.importrust('dagop')
116 rustrevlog = policy.importrust('revlog')
116 rustrevlog = policy.importrust('revlog')
117
117
118 # Aliased for performance.
118 # Aliased for performance.
119 _zlibdecompress = zlib.decompress
119 _zlibdecompress = zlib.decompress
120
120
121 # max size of revlog with inline data
121 # max size of revlog with inline data
122 _maxinline = 131072
122 _maxinline = 131072
123 _chunksize = 1048576
123 _chunksize = 1048576
124
124
125 # Flag processors for REVIDX_ELLIPSIS.
125 # Flag processors for REVIDX_ELLIPSIS.
126 def ellipsisreadprocessor(rl, text):
126 def ellipsisreadprocessor(rl, text):
127 return text, False
127 return text, False
128
128
129
129
130 def ellipsiswriteprocessor(rl, text):
130 def ellipsiswriteprocessor(rl, text):
131 return text, False
131 return text, False
132
132
133
133
134 def ellipsisrawprocessor(rl, text):
134 def ellipsisrawprocessor(rl, text):
135 return False
135 return False
136
136
137
137
138 ellipsisprocessor = (
138 ellipsisprocessor = (
139 ellipsisreadprocessor,
139 ellipsisreadprocessor,
140 ellipsiswriteprocessor,
140 ellipsiswriteprocessor,
141 ellipsisrawprocessor,
141 ellipsisrawprocessor,
142 )
142 )
143
143
144
144
145 def offset_type(offset, type):
145 def offset_type(offset, type):
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
146 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
147 raise ValueError(b'unknown revlog index flags')
147 raise ValueError(b'unknown revlog index flags')
148 return int(int(offset) << 16 | type)
148 return int(int(offset) << 16 | type)
149
149
150
150
151 def _verify_revision(rl, skipflags, state, node):
151 def _verify_revision(rl, skipflags, state, node):
152 """Verify the integrity of the given revlog ``node`` while providing a hook
152 """Verify the integrity of the given revlog ``node`` while providing a hook
153 point for extensions to influence the operation."""
153 point for extensions to influence the operation."""
154 if skipflags:
154 if skipflags:
155 state[b'skipread'].add(node)
155 state[b'skipread'].add(node)
156 else:
156 else:
157 # Side-effect: read content and verify hash.
157 # Side-effect: read content and verify hash.
158 rl.revision(node)
158 rl.revision(node)
159
159
160
160
161 # True if a fast implementation for persistent-nodemap is available
161 # True if a fast implementation for persistent-nodemap is available
162 #
162 #
163 # We also consider we have a "fast" implementation in "pure" python because
163 # We also consider we have a "fast" implementation in "pure" python because
164 # people using pure don't really have performance consideration (and a
164 # people using pure don't really have performance consideration (and a
165 # wheelbarrow of other slowness source)
165 # wheelbarrow of other slowness source)
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
166 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
167 parsers, 'BaseIndexObject'
167 parsers, 'BaseIndexObject'
168 )
168 )
169
169
170
170
171 @attr.s(slots=True, frozen=True)
171 @attr.s(slots=True, frozen=True)
172 class _revisioninfo(object):
172 class _revisioninfo(object):
173 """Information about a revision that allows building its fulltext
173 """Information about a revision that allows building its fulltext
174 node: expected hash of the revision
174 node: expected hash of the revision
175 p1, p2: parent revs of the revision
175 p1, p2: parent revs of the revision
176 btext: built text cache consisting of a one-element list
176 btext: built text cache consisting of a one-element list
177 cachedelta: (baserev, uncompressed_delta) or None
177 cachedelta: (baserev, uncompressed_delta) or None
178 flags: flags associated to the revision storage
178 flags: flags associated to the revision storage
179
179
180 One of btext[0] or cachedelta must be set.
180 One of btext[0] or cachedelta must be set.
181 """
181 """
182
182
183 node = attr.ib()
183 node = attr.ib()
184 p1 = attr.ib()
184 p1 = attr.ib()
185 p2 = attr.ib()
185 p2 = attr.ib()
186 btext = attr.ib()
186 btext = attr.ib()
187 textlen = attr.ib()
187 textlen = attr.ib()
188 cachedelta = attr.ib()
188 cachedelta = attr.ib()
189 flags = attr.ib()
189 flags = attr.ib()
190
190
191
191
192 @interfaceutil.implementer(repository.irevisiondelta)
192 @interfaceutil.implementer(repository.irevisiondelta)
193 @attr.s(slots=True)
193 @attr.s(slots=True)
194 class revlogrevisiondelta(object):
194 class revlogrevisiondelta(object):
195 node = attr.ib()
195 node = attr.ib()
196 p1node = attr.ib()
196 p1node = attr.ib()
197 p2node = attr.ib()
197 p2node = attr.ib()
198 basenode = attr.ib()
198 basenode = attr.ib()
199 flags = attr.ib()
199 flags = attr.ib()
200 baserevisionsize = attr.ib()
200 baserevisionsize = attr.ib()
201 revision = attr.ib()
201 revision = attr.ib()
202 delta = attr.ib()
202 delta = attr.ib()
203 sidedata = attr.ib()
203 sidedata = attr.ib()
204 protocol_flags = attr.ib()
204 protocol_flags = attr.ib()
205 linknode = attr.ib(default=None)
205 linknode = attr.ib(default=None)
206
206
207
207
208 @interfaceutil.implementer(repository.iverifyproblem)
208 @interfaceutil.implementer(repository.iverifyproblem)
209 @attr.s(frozen=True)
209 @attr.s(frozen=True)
210 class revlogproblem(object):
210 class revlogproblem(object):
211 warning = attr.ib(default=None)
211 warning = attr.ib(default=None)
212 error = attr.ib(default=None)
212 error = attr.ib(default=None)
213 node = attr.ib(default=None)
213 node = attr.ib(default=None)
214
214
215
215
216 def parse_index_v1(data, inline):
216 def parse_index_v1(data, inline):
217 # call the C implementation to parse the index data
217 # call the C implementation to parse the index data
218 index, cache = parsers.parse_index2(data, inline)
218 index, cache = parsers.parse_index2(data, inline)
219 return index, cache
219 return index, cache
220
220
221
221
222 def parse_index_v2(data, inline):
222 def parse_index_v2(data, inline):
223 # call the C implementation to parse the index data
223 # call the C implementation to parse the index data
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
224 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
225 return index, cache
225 return index, cache
226
226
227
227
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
228 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
229
229
230 def parse_index_v1_nodemap(data, inline):
230 def parse_index_v1_nodemap(data, inline):
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
231 index, cache = parsers.parse_index_devel_nodemap(data, inline)
232 return index, cache
232 return index, cache
233
233
234
234
235 else:
235 else:
236 parse_index_v1_nodemap = None
236 parse_index_v1_nodemap = None
237
237
238
238
239 def parse_index_v1_mixed(data, inline):
239 def parse_index_v1_mixed(data, inline):
240 index, cache = parse_index_v1(data, inline)
240 index, cache = parse_index_v1(data, inline)
241 return rustrevlog.MixedIndex(index), cache
241 return rustrevlog.MixedIndex(index), cache
242
242
243
243
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
244 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
245 # signed integer)
245 # signed integer)
246 _maxentrysize = 0x7FFFFFFF
246 _maxentrysize = 0x7FFFFFFF
247
247
248
248
249 class revlog(object):
249 class revlog(object):
250 """
250 """
251 the underlying revision storage object
251 the underlying revision storage object
252
252
253 A revlog consists of two parts, an index and the revision data.
253 A revlog consists of two parts, an index and the revision data.
254
254
255 The index is a file with a fixed record size containing
255 The index is a file with a fixed record size containing
256 information on each revision, including its nodeid (hash), the
256 information on each revision, including its nodeid (hash), the
257 nodeids of its parents, the position and offset of its data within
257 nodeids of its parents, the position and offset of its data within
258 the data file, and the revision it's based on. Finally, each entry
258 the data file, and the revision it's based on. Finally, each entry
259 contains a linkrev entry that can serve as a pointer to external
259 contains a linkrev entry that can serve as a pointer to external
260 data.
260 data.
261
261
262 The revision data itself is a linear collection of data chunks.
262 The revision data itself is a linear collection of data chunks.
263 Each chunk represents a revision and is usually represented as a
263 Each chunk represents a revision and is usually represented as a
264 delta against the previous chunk. To bound lookup time, runs of
264 delta against the previous chunk. To bound lookup time, runs of
265 deltas are limited to about 2 times the length of the original
265 deltas are limited to about 2 times the length of the original
266 version data. This makes retrieval of a version proportional to
266 version data. This makes retrieval of a version proportional to
267 its size, or O(1) relative to the number of revisions.
267 its size, or O(1) relative to the number of revisions.
268
268
269 Both pieces of the revlog are written to in an append-only
269 Both pieces of the revlog are written to in an append-only
270 fashion, which means we never need to rewrite a file to insert or
270 fashion, which means we never need to rewrite a file to insert or
271 remove data, and can use some simple techniques to avoid the need
271 remove data, and can use some simple techniques to avoid the need
272 for locking while reading.
272 for locking while reading.
273
273
274 If checkambig, indexfile is opened with checkambig=True at
274 If checkambig, indexfile is opened with checkambig=True at
275 writing, to avoid file stat ambiguity.
275 writing, to avoid file stat ambiguity.
276
276
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
277 If mmaplargeindex is True, and an mmapindexthreshold is set, the
278 index will be mmapped rather than read if it is larger than the
278 index will be mmapped rather than read if it is larger than the
279 configured threshold.
279 configured threshold.
280
280
281 If censorable is True, the revlog can have censored revisions.
281 If censorable is True, the revlog can have censored revisions.
282
282
283 If `upperboundcomp` is not None, this is the expected maximal gain from
283 If `upperboundcomp` is not None, this is the expected maximal gain from
284 compression for the data content.
284 compression for the data content.
285
285
286 `concurrencychecker` is an optional function that receives 3 arguments: a
286 `concurrencychecker` is an optional function that receives 3 arguments: a
287 file handle, a filename, and an expected position. It should check whether
287 file handle, a filename, and an expected position. It should check whether
288 the current position in the file handle is valid, and log/warn/fail (by
288 the current position in the file handle is valid, and log/warn/fail (by
289 raising).
289 raising).
290
290
291
291
292 Internal details
292 Internal details
293 ----------------
293 ----------------
294
294
295 A large part of the revlog logic deals with revisions' "index entries", tuple
295 A large part of the revlog logic deals with revisions' "index entries", tuple
296 objects that contains the same "items" whatever the revlog version.
296 objects that contains the same "items" whatever the revlog version.
297 Different versions will have different ways of storing these items (sometimes
297 Different versions will have different ways of storing these items (sometimes
298 not having them at all), but the tuple will always be the same. New fields
298 not having them at all), but the tuple will always be the same. New fields
299 are usually added at the end to avoid breaking existing code that relies
299 are usually added at the end to avoid breaking existing code that relies
300 on the existing order. The field are defined as follows:
300 on the existing order. The field are defined as follows:
301
301
302 [0] offset:
302 [0] offset:
303 The byte index of the start of revision data chunk.
303 The byte index of the start of revision data chunk.
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
304 That value is shifted up by 16 bits. use "offset = field >> 16" to
305 retrieve it.
305 retrieve it.
306
306
307 flags:
307 flags:
308 A flag field that carries special information or changes the behavior
308 A flag field that carries special information or changes the behavior
309 of the revision. (see `REVIDX_*` constants for details)
309 of the revision. (see `REVIDX_*` constants for details)
310 The flag field only occupies the first 16 bits of this field,
310 The flag field only occupies the first 16 bits of this field,
311 use "flags = field & 0xFFFF" to retrieve the value.
311 use "flags = field & 0xFFFF" to retrieve the value.
312
312
313 [1] compressed length:
313 [1] compressed length:
314 The size, in bytes, of the chunk on disk
314 The size, in bytes, of the chunk on disk
315
315
316 [2] uncompressed length:
316 [2] uncompressed length:
317 The size, in bytes, of the full revision once reconstructed.
317 The size, in bytes, of the full revision once reconstructed.
318
318
319 [3] base rev:
319 [3] base rev:
320 Either the base of the revision delta chain (without general
320 Either the base of the revision delta chain (without general
321 delta), or the base of the delta (stored in the data chunk)
321 delta), or the base of the delta (stored in the data chunk)
322 with general delta.
322 with general delta.
323
323
324 [4] link rev:
324 [4] link rev:
325 Changelog revision number of the changeset introducing this
325 Changelog revision number of the changeset introducing this
326 revision.
326 revision.
327
327
328 [5] parent 1 rev:
328 [5] parent 1 rev:
329 Revision number of the first parent
329 Revision number of the first parent
330
330
331 [6] parent 2 rev:
331 [6] parent 2 rev:
332 Revision number of the second parent
332 Revision number of the second parent
333
333
334 [7] node id:
334 [7] node id:
335 The node id of the current revision
335 The node id of the current revision
336
336
337 [8] sidedata offset:
337 [8] sidedata offset:
338 The byte index of the start of the revision's side-data chunk.
338 The byte index of the start of the revision's side-data chunk.
339
339
340 [9] sidedata chunk length:
340 [9] sidedata chunk length:
341 The size, in bytes, of the revision's side-data chunk.
341 The size, in bytes, of the revision's side-data chunk.
342
342
343 [10] data compression mode:
343 [10] data compression mode:
344 two bits that detail the way the data chunk is compressed on disk.
344 two bits that detail the way the data chunk is compressed on disk.
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
345 (see "COMP_MODE_*" constants for details). For revlog version 0 and
346 1 this will always be COMP_MODE_INLINE.
346 1 this will always be COMP_MODE_INLINE.
347
347
348 [11] side-data compression mode:
348 [11] side-data compression mode:
349 two bits that detail the way the sidedata chunk is compressed on disk.
349 two bits that detail the way the sidedata chunk is compressed on disk.
350 (see "COMP_MODE_*" constants for details)
350 (see "COMP_MODE_*" constants for details)
351 """
351 """
352
352
353 _flagserrorclass = error.RevlogError
353 _flagserrorclass = error.RevlogError
354
354
355 def __init__(
355 def __init__(
356 self,
356 self,
357 opener,
357 opener,
358 target,
358 target,
359 radix,
359 radix,
360 postfix=None, # only exist for `tmpcensored` now
360 postfix=None, # only exist for `tmpcensored` now
361 checkambig=False,
361 checkambig=False,
362 mmaplargeindex=False,
362 mmaplargeindex=False,
363 censorable=False,
363 censorable=False,
364 upperboundcomp=None,
364 upperboundcomp=None,
365 persistentnodemap=False,
365 persistentnodemap=False,
366 concurrencychecker=None,
366 concurrencychecker=None,
367 trypending=False,
367 trypending=False,
368 ):
368 ):
369 """
369 """
370 create a revlog object
370 create a revlog object
371
371
372 opener is a function that abstracts the file opening operation
372 opener is a function that abstracts the file opening operation
373 and can be used to implement COW semantics or the like.
373 and can be used to implement COW semantics or the like.
374
374
375 `target`: a (KIND, ID) tuple that identify the content stored in
375 `target`: a (KIND, ID) tuple that identify the content stored in
376 this revlog. It help the rest of the code to understand what the revlog
376 this revlog. It help the rest of the code to understand what the revlog
377 is about without having to resort to heuristic and index filename
377 is about without having to resort to heuristic and index filename
378 analysis. Note: that this must be reliably be set by normal code, but
378 analysis. Note: that this must be reliably be set by normal code, but
379 that test, debug, or performance measurement code might not set this to
379 that test, debug, or performance measurement code might not set this to
380 accurate value.
380 accurate value.
381 """
381 """
382 self.upperboundcomp = upperboundcomp
382 self.upperboundcomp = upperboundcomp
383
383
384 self.radix = radix
384 self.radix = radix
385
385
386 self._docket_file = None
386 self._docket_file = None
387 self._indexfile = None
387 self._indexfile = None
388 self._datafile = None
388 self._datafile = None
389 self._nodemap_file = None
389 self._nodemap_file = None
390 self.postfix = postfix
390 self.postfix = postfix
391 self._trypending = trypending
391 self._trypending = trypending
392 self.opener = opener
392 self.opener = opener
393 if persistentnodemap:
393 if persistentnodemap:
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
394 self._nodemap_file = nodemaputil.get_nodemap_file(self)
395
395
396 assert target[0] in ALL_KINDS
396 assert target[0] in ALL_KINDS
397 assert len(target) == 2
397 assert len(target) == 2
398 self.target = target
398 self.target = target
399 # When True, indexfile is opened with checkambig=True at writing, to
399 # When True, indexfile is opened with checkambig=True at writing, to
400 # avoid file stat ambiguity.
400 # avoid file stat ambiguity.
401 self._checkambig = checkambig
401 self._checkambig = checkambig
402 self._mmaplargeindex = mmaplargeindex
402 self._mmaplargeindex = mmaplargeindex
403 self._censorable = censorable
403 self._censorable = censorable
404 # 3-tuple of (node, rev, text) for a raw revision.
404 # 3-tuple of (node, rev, text) for a raw revision.
405 self._revisioncache = None
405 self._revisioncache = None
406 # Maps rev to chain base rev.
406 # Maps rev to chain base rev.
407 self._chainbasecache = util.lrucachedict(100)
407 self._chainbasecache = util.lrucachedict(100)
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
408 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
409 self._chunkcache = (0, b'')
409 self._chunkcache = (0, b'')
410 # How much data to read and cache into the raw revlog data cache.
410 # How much data to read and cache into the raw revlog data cache.
411 self._chunkcachesize = 65536
411 self._chunkcachesize = 65536
412 self._maxchainlen = None
412 self._maxchainlen = None
413 self._deltabothparents = True
413 self._deltabothparents = True
414 self.index = None
414 self.index = None
415 self._docket = None
415 self._docket = None
416 self._nodemap_docket = None
416 self._nodemap_docket = None
417 # Mapping of partial identifiers to full nodes.
417 # Mapping of partial identifiers to full nodes.
418 self._pcache = {}
418 self._pcache = {}
419 # Mapping of revision integer to full node.
419 # Mapping of revision integer to full node.
420 self._compengine = b'zlib'
420 self._compengine = b'zlib'
421 self._compengineopts = {}
421 self._compengineopts = {}
422 self._maxdeltachainspan = -1
422 self._maxdeltachainspan = -1
423 self._withsparseread = False
423 self._withsparseread = False
424 self._sparserevlog = False
424 self._sparserevlog = False
425 self.hassidedata = False
425 self.hassidedata = False
426 self._srdensitythreshold = 0.50
426 self._srdensitythreshold = 0.50
427 self._srmingapsize = 262144
427 self._srmingapsize = 262144
428
428
429 # Make copy of flag processors so each revlog instance can support
429 # Make copy of flag processors so each revlog instance can support
430 # custom flags.
430 # custom flags.
431 self._flagprocessors = dict(flagutil.flagprocessors)
431 self._flagprocessors = dict(flagutil.flagprocessors)
432
432
433 # 2-tuple of file handles being used for active writing.
433 # 2-tuple of file handles being used for active writing.
434 self._writinghandles = None
434 self._writinghandles = None
435 # prevent nesting of addgroup
435 # prevent nesting of addgroup
436 self._adding_group = None
436 self._adding_group = None
437
437
438 self._loadindex()
438 self._loadindex()
439
439
440 self._concurrencychecker = concurrencychecker
440 self._concurrencychecker = concurrencychecker
441
441
442 def _init_opts(self):
442 def _init_opts(self):
443 """process options (from above/config) to setup associated default revlog mode
443 """process options (from above/config) to setup associated default revlog mode
444
444
445 These values might be affected when actually reading on disk information.
445 These values might be affected when actually reading on disk information.
446
446
447 The relevant values are returned for use in _loadindex().
447 The relevant values are returned for use in _loadindex().
448
448
449 * newversionflags:
449 * newversionflags:
450 version header to use if we need to create a new revlog
450 version header to use if we need to create a new revlog
451
451
452 * mmapindexthreshold:
452 * mmapindexthreshold:
453 minimal index size for start to use mmap
453 minimal index size for start to use mmap
454
454
455 * force_nodemap:
455 * force_nodemap:
456 force the usage of a "development" version of the nodemap code
456 force the usage of a "development" version of the nodemap code
457 """
457 """
458 mmapindexthreshold = None
458 mmapindexthreshold = None
459 opts = self.opener.options
459 opts = self.opener.options
460
460
461 if b'revlogv2' in opts:
461 if b'revlogv2' in opts:
462 new_header = REVLOGV2 | FLAG_INLINE_DATA
462 new_header = REVLOGV2
463 elif b'revlogv1' in opts:
463 elif b'revlogv1' in opts:
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
464 new_header = REVLOGV1 | FLAG_INLINE_DATA
465 if b'generaldelta' in opts:
465 if b'generaldelta' in opts:
466 new_header |= FLAG_GENERALDELTA
466 new_header |= FLAG_GENERALDELTA
467 elif b'revlogv0' in self.opener.options:
467 elif b'revlogv0' in self.opener.options:
468 new_header = REVLOGV0
468 new_header = REVLOGV0
469 else:
469 else:
470 new_header = REVLOG_DEFAULT_VERSION
470 new_header = REVLOG_DEFAULT_VERSION
471
471
472 if b'chunkcachesize' in opts:
472 if b'chunkcachesize' in opts:
473 self._chunkcachesize = opts[b'chunkcachesize']
473 self._chunkcachesize = opts[b'chunkcachesize']
474 if b'maxchainlen' in opts:
474 if b'maxchainlen' in opts:
475 self._maxchainlen = opts[b'maxchainlen']
475 self._maxchainlen = opts[b'maxchainlen']
476 if b'deltabothparents' in opts:
476 if b'deltabothparents' in opts:
477 self._deltabothparents = opts[b'deltabothparents']
477 self._deltabothparents = opts[b'deltabothparents']
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
478 self._lazydelta = bool(opts.get(b'lazydelta', True))
479 self._lazydeltabase = False
479 self._lazydeltabase = False
480 if self._lazydelta:
480 if self._lazydelta:
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
481 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
482 if b'compengine' in opts:
482 if b'compengine' in opts:
483 self._compengine = opts[b'compengine']
483 self._compengine = opts[b'compengine']
484 if b'zlib.level' in opts:
484 if b'zlib.level' in opts:
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
485 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
486 if b'zstd.level' in opts:
486 if b'zstd.level' in opts:
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
487 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
488 if b'maxdeltachainspan' in opts:
488 if b'maxdeltachainspan' in opts:
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
489 self._maxdeltachainspan = opts[b'maxdeltachainspan']
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
490 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
491 mmapindexthreshold = opts[b'mmapindexthreshold']
491 mmapindexthreshold = opts[b'mmapindexthreshold']
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
492 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
493 withsparseread = bool(opts.get(b'with-sparse-read', False))
494 # sparse-revlog forces sparse-read
494 # sparse-revlog forces sparse-read
495 self._withsparseread = self._sparserevlog or withsparseread
495 self._withsparseread = self._sparserevlog or withsparseread
496 if b'sparse-read-density-threshold' in opts:
496 if b'sparse-read-density-threshold' in opts:
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
497 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
498 if b'sparse-read-min-gap-size' in opts:
498 if b'sparse-read-min-gap-size' in opts:
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
499 self._srmingapsize = opts[b'sparse-read-min-gap-size']
500 if opts.get(b'enableellipsis'):
500 if opts.get(b'enableellipsis'):
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
501 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
502
502
503 # revlog v0 doesn't have flag processors
503 # revlog v0 doesn't have flag processors
504 for flag, processor in pycompat.iteritems(
504 for flag, processor in pycompat.iteritems(
505 opts.get(b'flagprocessors', {})
505 opts.get(b'flagprocessors', {})
506 ):
506 ):
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
507 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
508
508
509 if self._chunkcachesize <= 0:
509 if self._chunkcachesize <= 0:
510 raise error.RevlogError(
510 raise error.RevlogError(
511 _(b'revlog chunk cache size %r is not greater than 0')
511 _(b'revlog chunk cache size %r is not greater than 0')
512 % self._chunkcachesize
512 % self._chunkcachesize
513 )
513 )
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
514 elif self._chunkcachesize & (self._chunkcachesize - 1):
515 raise error.RevlogError(
515 raise error.RevlogError(
516 _(b'revlog chunk cache size %r is not a power of 2')
516 _(b'revlog chunk cache size %r is not a power of 2')
517 % self._chunkcachesize
517 % self._chunkcachesize
518 )
518 )
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
519 force_nodemap = opts.get(b'devel-force-nodemap', False)
520 return new_header, mmapindexthreshold, force_nodemap
520 return new_header, mmapindexthreshold, force_nodemap
521
521
522 def _get_data(self, filepath, mmap_threshold, size=None):
522 def _get_data(self, filepath, mmap_threshold, size=None):
523 """return a file content with or without mmap
523 """return a file content with or without mmap
524
524
525 If the file is missing return the empty string"""
525 If the file is missing return the empty string"""
526 try:
526 try:
527 with self.opener(filepath) as fp:
527 with self.opener(filepath) as fp:
528 if mmap_threshold is not None:
528 if mmap_threshold is not None:
529 file_size = self.opener.fstat(fp).st_size
529 file_size = self.opener.fstat(fp).st_size
530 if file_size >= mmap_threshold:
530 if file_size >= mmap_threshold:
531 if size is not None:
531 if size is not None:
532 # avoid potentiel mmap crash
532 # avoid potentiel mmap crash
533 size = min(file_size, size)
533 size = min(file_size, size)
534 # TODO: should .close() to release resources without
534 # TODO: should .close() to release resources without
535 # relying on Python GC
535 # relying on Python GC
536 if size is None:
536 if size is None:
537 return util.buffer(util.mmapread(fp))
537 return util.buffer(util.mmapread(fp))
538 else:
538 else:
539 return util.buffer(util.mmapread(fp, size))
539 return util.buffer(util.mmapread(fp, size))
540 if size is None:
540 if size is None:
541 return fp.read()
541 return fp.read()
542 else:
542 else:
543 return fp.read(size)
543 return fp.read(size)
544 except IOError as inst:
544 except IOError as inst:
545 if inst.errno != errno.ENOENT:
545 if inst.errno != errno.ENOENT:
546 raise
546 raise
547 return b''
547 return b''
548
548
549 def _loadindex(self):
549 def _loadindex(self):
550
550
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
551 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
552
552
553 if self.postfix is not None:
553 if self.postfix is not None:
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
554 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
555 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
556 entry_point = b'%s.i.a' % self.radix
556 entry_point = b'%s.i.a' % self.radix
557 else:
557 else:
558 entry_point = b'%s.i' % self.radix
558 entry_point = b'%s.i' % self.radix
559
559
560 entry_data = b''
560 entry_data = b''
561 self._initempty = True
561 self._initempty = True
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
562 entry_data = self._get_data(entry_point, mmapindexthreshold)
563 if len(entry_data) > 0:
563 if len(entry_data) > 0:
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
564 header = INDEX_HEADER.unpack(entry_data[:4])[0]
565 self._initempty = False
565 self._initempty = False
566 else:
566 else:
567 header = new_header
567 header = new_header
568
568
569 self._format_flags = header & ~0xFFFF
569 self._format_flags = header & ~0xFFFF
570 self._format_version = header & 0xFFFF
570 self._format_version = header & 0xFFFF
571
571
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
572 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
573 if supported_flags is None:
573 if supported_flags is None:
574 msg = _(b'unknown version (%d) in revlog %s')
574 msg = _(b'unknown version (%d) in revlog %s')
575 msg %= (self._format_version, self.display_id)
575 msg %= (self._format_version, self.display_id)
576 raise error.RevlogError(msg)
576 raise error.RevlogError(msg)
577 elif self._format_flags & ~supported_flags:
577 elif self._format_flags & ~supported_flags:
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
578 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
579 display_flag = self._format_flags >> 16
579 display_flag = self._format_flags >> 16
580 msg %= (display_flag, self._format_version, self.display_id)
580 msg %= (display_flag, self._format_version, self.display_id)
581 raise error.RevlogError(msg)
581 raise error.RevlogError(msg)
582
582
583 features = FEATURES_BY_VERSION[self._format_version]
583 features = FEATURES_BY_VERSION[self._format_version]
584 self._inline = features[b'inline'](self._format_flags)
584 self._inline = features[b'inline'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
585 self._generaldelta = features[b'generaldelta'](self._format_flags)
586 self.hassidedata = features[b'sidedata']
586 self.hassidedata = features[b'sidedata']
587
587
588 if not features[b'docket']:
588 if not features[b'docket']:
589 self._indexfile = entry_point
589 self._indexfile = entry_point
590 index_data = entry_data
590 index_data = entry_data
591 else:
591 else:
592 self._docket_file = entry_point
592 self._docket_file = entry_point
593 if self._initempty:
593 if self._initempty:
594 self._docket = docketutil.default_docket(self, header)
594 self._docket = docketutil.default_docket(self, header)
595 else:
595 else:
596 self._docket = docketutil.parse_docket(
596 self._docket = docketutil.parse_docket(
597 self, entry_data, use_pending=self._trypending
597 self, entry_data, use_pending=self._trypending
598 )
598 )
599 self._indexfile = self._docket.index_filepath()
599 self._indexfile = self._docket.index_filepath()
600 index_data = b''
600 index_data = b''
601 index_size = self._docket.index_end
601 index_size = self._docket.index_end
602 if index_size > 0:
602 if index_size > 0:
603 index_data = self._get_data(
603 index_data = self._get_data(
604 self._indexfile, mmapindexthreshold, size=index_size
604 self._indexfile, mmapindexthreshold, size=index_size
605 )
605 )
606 if len(index_data) < index_size:
606 if len(index_data) < index_size:
607 msg = _(b'too few index data for %s: got %d, expected %d')
607 msg = _(b'too few index data for %s: got %d, expected %d')
608 msg %= (self.display_id, len(index_data), index_size)
608 msg %= (self.display_id, len(index_data), index_size)
609 raise error.RevlogError(msg)
609 raise error.RevlogError(msg)
610
610
611 self._inline = False
611 self._inline = False
612 # generaldelta implied by version 2 revlogs.
612 # generaldelta implied by version 2 revlogs.
613 self._generaldelta = True
613 self._generaldelta = True
614 # the logic for persistent nodemap will be dealt with within the
614 # the logic for persistent nodemap will be dealt with within the
615 # main docket, so disable it for now.
615 # main docket, so disable it for now.
616 self._nodemap_file = None
616 self._nodemap_file = None
617
617
618 if self.postfix is None:
618 if self.postfix is None:
619 self._datafile = b'%s.d' % self.radix
619 self._datafile = b'%s.d' % self.radix
620 else:
620 else:
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
621 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
622
622
623 self.nodeconstants = sha1nodeconstants
623 self.nodeconstants = sha1nodeconstants
624 self.nullid = self.nodeconstants.nullid
624 self.nullid = self.nodeconstants.nullid
625
625
626 # sparse-revlog can't be on without general-delta (issue6056)
626 # sparse-revlog can't be on without general-delta (issue6056)
627 if not self._generaldelta:
627 if not self._generaldelta:
628 self._sparserevlog = False
628 self._sparserevlog = False
629
629
630 self._storedeltachains = True
630 self._storedeltachains = True
631
631
632 devel_nodemap = (
632 devel_nodemap = (
633 self._nodemap_file
633 self._nodemap_file
634 and force_nodemap
634 and force_nodemap
635 and parse_index_v1_nodemap is not None
635 and parse_index_v1_nodemap is not None
636 )
636 )
637
637
638 use_rust_index = False
638 use_rust_index = False
639 if rustrevlog is not None:
639 if rustrevlog is not None:
640 if self._nodemap_file is not None:
640 if self._nodemap_file is not None:
641 use_rust_index = True
641 use_rust_index = True
642 else:
642 else:
643 use_rust_index = self.opener.options.get(b'rust.index')
643 use_rust_index = self.opener.options.get(b'rust.index')
644
644
645 self._parse_index = parse_index_v1
645 self._parse_index = parse_index_v1
646 if self._format_version == REVLOGV0:
646 if self._format_version == REVLOGV0:
647 self._parse_index = revlogv0.parse_index_v0
647 self._parse_index = revlogv0.parse_index_v0
648 elif self._format_version == REVLOGV2:
648 elif self._format_version == REVLOGV2:
649 self._parse_index = parse_index_v2
649 self._parse_index = parse_index_v2
650 elif devel_nodemap:
650 elif devel_nodemap:
651 self._parse_index = parse_index_v1_nodemap
651 self._parse_index = parse_index_v1_nodemap
652 elif use_rust_index:
652 elif use_rust_index:
653 self._parse_index = parse_index_v1_mixed
653 self._parse_index = parse_index_v1_mixed
654 try:
654 try:
655 d = self._parse_index(index_data, self._inline)
655 d = self._parse_index(index_data, self._inline)
656 index, _chunkcache = d
656 index, _chunkcache = d
657 use_nodemap = (
657 use_nodemap = (
658 not self._inline
658 not self._inline
659 and self._nodemap_file is not None
659 and self._nodemap_file is not None
660 and util.safehasattr(index, 'update_nodemap_data')
660 and util.safehasattr(index, 'update_nodemap_data')
661 )
661 )
662 if use_nodemap:
662 if use_nodemap:
663 nodemap_data = nodemaputil.persisted_data(self)
663 nodemap_data = nodemaputil.persisted_data(self)
664 if nodemap_data is not None:
664 if nodemap_data is not None:
665 docket = nodemap_data[0]
665 docket = nodemap_data[0]
666 if (
666 if (
667 len(d[0]) > docket.tip_rev
667 len(d[0]) > docket.tip_rev
668 and d[0][docket.tip_rev][7] == docket.tip_node
668 and d[0][docket.tip_rev][7] == docket.tip_node
669 ):
669 ):
670 # no changelog tampering
670 # no changelog tampering
671 self._nodemap_docket = docket
671 self._nodemap_docket = docket
672 index.update_nodemap_data(*nodemap_data)
672 index.update_nodemap_data(*nodemap_data)
673 except (ValueError, IndexError):
673 except (ValueError, IndexError):
674 raise error.RevlogError(
674 raise error.RevlogError(
675 _(b"index %s is corrupted") % self.display_id
675 _(b"index %s is corrupted") % self.display_id
676 )
676 )
677 self.index, self._chunkcache = d
677 self.index, self._chunkcache = d
678 if not self._chunkcache:
678 if not self._chunkcache:
679 self._chunkclear()
679 self._chunkclear()
680 # revnum -> (chain-length, sum-delta-length)
680 # revnum -> (chain-length, sum-delta-length)
681 self._chaininfocache = util.lrucachedict(500)
681 self._chaininfocache = util.lrucachedict(500)
682 # revlog header -> revlog compressor
682 # revlog header -> revlog compressor
683 self._decompressors = {}
683 self._decompressors = {}
684
684
685 @util.propertycache
685 @util.propertycache
686 def revlog_kind(self):
686 def revlog_kind(self):
687 return self.target[0]
687 return self.target[0]
688
688
689 @util.propertycache
689 @util.propertycache
690 def display_id(self):
690 def display_id(self):
691 """The public facing "ID" of the revlog that we use in message"""
691 """The public facing "ID" of the revlog that we use in message"""
692 # Maybe we should build a user facing representation of
692 # Maybe we should build a user facing representation of
693 # revlog.target instead of using `self.radix`
693 # revlog.target instead of using `self.radix`
694 return self.radix
694 return self.radix
695
695
696 def _get_decompressor(self, t):
696 def _get_decompressor(self, t):
697 try:
697 try:
698 compressor = self._decompressors[t]
698 compressor = self._decompressors[t]
699 except KeyError:
699 except KeyError:
700 try:
700 try:
701 engine = util.compengines.forrevlogheader(t)
701 engine = util.compengines.forrevlogheader(t)
702 compressor = engine.revlogcompressor(self._compengineopts)
702 compressor = engine.revlogcompressor(self._compengineopts)
703 self._decompressors[t] = compressor
703 self._decompressors[t] = compressor
704 except KeyError:
704 except KeyError:
705 raise error.RevlogError(
705 raise error.RevlogError(
706 _(b'unknown compression type %s') % binascii.hexlify(t)
706 _(b'unknown compression type %s') % binascii.hexlify(t)
707 )
707 )
708 return compressor
708 return compressor
709
709
710 @util.propertycache
710 @util.propertycache
711 def _compressor(self):
711 def _compressor(self):
712 engine = util.compengines[self._compengine]
712 engine = util.compengines[self._compengine]
713 return engine.revlogcompressor(self._compengineopts)
713 return engine.revlogcompressor(self._compengineopts)
714
714
715 @util.propertycache
715 @util.propertycache
716 def _decompressor(self):
716 def _decompressor(self):
717 """the default decompressor"""
717 """the default decompressor"""
718 if self._docket is None:
718 if self._docket is None:
719 return None
719 return None
720 t = self._docket.default_compression_header
720 t = self._docket.default_compression_header
721 c = self._get_decompressor(t)
721 c = self._get_decompressor(t)
722 return c.decompress
722 return c.decompress
723
723
724 def _indexfp(self):
724 def _indexfp(self):
725 """file object for the revlog's index file"""
725 """file object for the revlog's index file"""
726 return self.opener(self._indexfile, mode=b"r")
726 return self.opener(self._indexfile, mode=b"r")
727
727
728 def __index_write_fp(self):
728 def __index_write_fp(self):
729 # You should not use this directly and use `_writing` instead
729 # You should not use this directly and use `_writing` instead
730 try:
730 try:
731 f = self.opener(
731 f = self.opener(
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
732 self._indexfile, mode=b"r+", checkambig=self._checkambig
733 )
733 )
734 if self._docket is None:
734 if self._docket is None:
735 f.seek(0, os.SEEK_END)
735 f.seek(0, os.SEEK_END)
736 else:
736 else:
737 f.seek(self._docket.index_end, os.SEEK_SET)
737 f.seek(self._docket.index_end, os.SEEK_SET)
738 return f
738 return f
739 except IOError as inst:
739 except IOError as inst:
740 if inst.errno != errno.ENOENT:
740 if inst.errno != errno.ENOENT:
741 raise
741 raise
742 return self.opener(
742 return self.opener(
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
743 self._indexfile, mode=b"w+", checkambig=self._checkambig
744 )
744 )
745
745
746 def __index_new_fp(self):
746 def __index_new_fp(self):
747 # You should not use this unless you are upgrading from inline revlog
747 # You should not use this unless you are upgrading from inline revlog
748 return self.opener(
748 return self.opener(
749 self._indexfile,
749 self._indexfile,
750 mode=b"w",
750 mode=b"w",
751 checkambig=self._checkambig,
751 checkambig=self._checkambig,
752 atomictemp=True,
752 atomictemp=True,
753 )
753 )
754
754
755 def _datafp(self, mode=b'r'):
755 def _datafp(self, mode=b'r'):
756 """file object for the revlog's data file"""
756 """file object for the revlog's data file"""
757 return self.opener(self._datafile, mode=mode)
757 return self.opener(self._datafile, mode=mode)
758
758
759 @contextlib.contextmanager
759 @contextlib.contextmanager
760 def _datareadfp(self, existingfp=None):
760 def _datareadfp(self, existingfp=None):
761 """file object suitable to read data"""
761 """file object suitable to read data"""
762 # Use explicit file handle, if given.
762 # Use explicit file handle, if given.
763 if existingfp is not None:
763 if existingfp is not None:
764 yield existingfp
764 yield existingfp
765
765
766 # Use a file handle being actively used for writes, if available.
766 # Use a file handle being actively used for writes, if available.
767 # There is some danger to doing this because reads will seek the
767 # There is some danger to doing this because reads will seek the
768 # file. However, _writeentry() performs a SEEK_END before all writes,
768 # file. However, _writeentry() performs a SEEK_END before all writes,
769 # so we should be safe.
769 # so we should be safe.
770 elif self._writinghandles:
770 elif self._writinghandles:
771 if self._inline:
771 if self._inline:
772 yield self._writinghandles[0]
772 yield self._writinghandles[0]
773 else:
773 else:
774 yield self._writinghandles[1]
774 yield self._writinghandles[1]
775
775
776 # Otherwise open a new file handle.
776 # Otherwise open a new file handle.
777 else:
777 else:
778 if self._inline:
778 if self._inline:
779 func = self._indexfp
779 func = self._indexfp
780 else:
780 else:
781 func = self._datafp
781 func = self._datafp
782 with func() as fp:
782 with func() as fp:
783 yield fp
783 yield fp
784
784
785 def tiprev(self):
785 def tiprev(self):
786 return len(self.index) - 1
786 return len(self.index) - 1
787
787
788 def tip(self):
788 def tip(self):
789 return self.node(self.tiprev())
789 return self.node(self.tiprev())
790
790
791 def __contains__(self, rev):
791 def __contains__(self, rev):
792 return 0 <= rev < len(self)
792 return 0 <= rev < len(self)
793
793
794 def __len__(self):
794 def __len__(self):
795 return len(self.index)
795 return len(self.index)
796
796
797 def __iter__(self):
797 def __iter__(self):
798 return iter(pycompat.xrange(len(self)))
798 return iter(pycompat.xrange(len(self)))
799
799
800 def revs(self, start=0, stop=None):
800 def revs(self, start=0, stop=None):
801 """iterate over all rev in this revlog (from start to stop)"""
801 """iterate over all rev in this revlog (from start to stop)"""
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
802 return storageutil.iterrevs(len(self), start=start, stop=stop)
803
803
804 @property
804 @property
805 def nodemap(self):
805 def nodemap(self):
806 msg = (
806 msg = (
807 b"revlog.nodemap is deprecated, "
807 b"revlog.nodemap is deprecated, "
808 b"use revlog.index.[has_node|rev|get_rev]"
808 b"use revlog.index.[has_node|rev|get_rev]"
809 )
809 )
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
810 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
811 return self.index.nodemap
811 return self.index.nodemap
812
812
813 @property
813 @property
814 def _nodecache(self):
814 def _nodecache(self):
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
815 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
816 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
817 return self.index.nodemap
817 return self.index.nodemap
818
818
819 def hasnode(self, node):
819 def hasnode(self, node):
820 try:
820 try:
821 self.rev(node)
821 self.rev(node)
822 return True
822 return True
823 except KeyError:
823 except KeyError:
824 return False
824 return False
825
825
826 def candelta(self, baserev, rev):
826 def candelta(self, baserev, rev):
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
827 """whether two revisions (baserev, rev) can be delta-ed or not"""
828 # Disable delta if either rev requires a content-changing flag
828 # Disable delta if either rev requires a content-changing flag
829 # processor (ex. LFS). This is because such flag processor can alter
829 # processor (ex. LFS). This is because such flag processor can alter
830 # the rawtext content that the delta will be based on, and two clients
830 # the rawtext content that the delta will be based on, and two clients
831 # could have a same revlog node with different flags (i.e. different
831 # could have a same revlog node with different flags (i.e. different
832 # rawtext contents) and the delta could be incompatible.
832 # rawtext contents) and the delta could be incompatible.
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
833 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
834 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
835 ):
835 ):
836 return False
836 return False
837 return True
837 return True
838
838
839 def update_caches(self, transaction):
839 def update_caches(self, transaction):
840 if self._nodemap_file is not None:
840 if self._nodemap_file is not None:
841 if transaction is None:
841 if transaction is None:
842 nodemaputil.update_persistent_nodemap(self)
842 nodemaputil.update_persistent_nodemap(self)
843 else:
843 else:
844 nodemaputil.setup_persistent_nodemap(transaction, self)
844 nodemaputil.setup_persistent_nodemap(transaction, self)
845
845
846 def clearcaches(self):
846 def clearcaches(self):
847 self._revisioncache = None
847 self._revisioncache = None
848 self._chainbasecache.clear()
848 self._chainbasecache.clear()
849 self._chunkcache = (0, b'')
849 self._chunkcache = (0, b'')
850 self._pcache = {}
850 self._pcache = {}
851 self._nodemap_docket = None
851 self._nodemap_docket = None
852 self.index.clearcaches()
852 self.index.clearcaches()
853 # The python code is the one responsible for validating the docket, we
853 # The python code is the one responsible for validating the docket, we
854 # end up having to refresh it here.
854 # end up having to refresh it here.
855 use_nodemap = (
855 use_nodemap = (
856 not self._inline
856 not self._inline
857 and self._nodemap_file is not None
857 and self._nodemap_file is not None
858 and util.safehasattr(self.index, 'update_nodemap_data')
858 and util.safehasattr(self.index, 'update_nodemap_data')
859 )
859 )
860 if use_nodemap:
860 if use_nodemap:
861 nodemap_data = nodemaputil.persisted_data(self)
861 nodemap_data = nodemaputil.persisted_data(self)
862 if nodemap_data is not None:
862 if nodemap_data is not None:
863 self._nodemap_docket = nodemap_data[0]
863 self._nodemap_docket = nodemap_data[0]
864 self.index.update_nodemap_data(*nodemap_data)
864 self.index.update_nodemap_data(*nodemap_data)
865
865
866 def rev(self, node):
866 def rev(self, node):
867 try:
867 try:
868 return self.index.rev(node)
868 return self.index.rev(node)
869 except TypeError:
869 except TypeError:
870 raise
870 raise
871 except error.RevlogError:
871 except error.RevlogError:
872 # parsers.c radix tree lookup failed
872 # parsers.c radix tree lookup failed
873 if (
873 if (
874 node == self.nodeconstants.wdirid
874 node == self.nodeconstants.wdirid
875 or node in self.nodeconstants.wdirfilenodeids
875 or node in self.nodeconstants.wdirfilenodeids
876 ):
876 ):
877 raise error.WdirUnsupported
877 raise error.WdirUnsupported
878 raise error.LookupError(node, self.display_id, _(b'no node'))
878 raise error.LookupError(node, self.display_id, _(b'no node'))
879
879
880 # Accessors for index entries.
880 # Accessors for index entries.
881
881
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
882 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
883 # are flags.
883 # are flags.
884 def start(self, rev):
884 def start(self, rev):
885 return int(self.index[rev][0] >> 16)
885 return int(self.index[rev][0] >> 16)
886
886
887 def flags(self, rev):
887 def flags(self, rev):
888 return self.index[rev][0] & 0xFFFF
888 return self.index[rev][0] & 0xFFFF
889
889
890 def length(self, rev):
890 def length(self, rev):
891 return self.index[rev][1]
891 return self.index[rev][1]
892
892
893 def sidedata_length(self, rev):
893 def sidedata_length(self, rev):
894 if not self.hassidedata:
894 if not self.hassidedata:
895 return 0
895 return 0
896 return self.index[rev][9]
896 return self.index[rev][9]
897
897
898 def rawsize(self, rev):
898 def rawsize(self, rev):
899 """return the length of the uncompressed text for a given revision"""
899 """return the length of the uncompressed text for a given revision"""
900 l = self.index[rev][2]
900 l = self.index[rev][2]
901 if l >= 0:
901 if l >= 0:
902 return l
902 return l
903
903
904 t = self.rawdata(rev)
904 t = self.rawdata(rev)
905 return len(t)
905 return len(t)
906
906
907 def size(self, rev):
907 def size(self, rev):
908 """length of non-raw text (processed by a "read" flag processor)"""
908 """length of non-raw text (processed by a "read" flag processor)"""
909 # fast path: if no "read" flag processor could change the content,
909 # fast path: if no "read" flag processor could change the content,
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
910 # size is rawsize. note: ELLIPSIS is known to not change the content.
911 flags = self.flags(rev)
911 flags = self.flags(rev)
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
912 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
913 return self.rawsize(rev)
913 return self.rawsize(rev)
914
914
915 return len(self.revision(rev, raw=False))
915 return len(self.revision(rev, raw=False))
916
916
917 def chainbase(self, rev):
917 def chainbase(self, rev):
918 base = self._chainbasecache.get(rev)
918 base = self._chainbasecache.get(rev)
919 if base is not None:
919 if base is not None:
920 return base
920 return base
921
921
922 index = self.index
922 index = self.index
923 iterrev = rev
923 iterrev = rev
924 base = index[iterrev][3]
924 base = index[iterrev][3]
925 while base != iterrev:
925 while base != iterrev:
926 iterrev = base
926 iterrev = base
927 base = index[iterrev][3]
927 base = index[iterrev][3]
928
928
929 self._chainbasecache[rev] = base
929 self._chainbasecache[rev] = base
930 return base
930 return base
931
931
932 def linkrev(self, rev):
932 def linkrev(self, rev):
933 return self.index[rev][4]
933 return self.index[rev][4]
934
934
935 def parentrevs(self, rev):
935 def parentrevs(self, rev):
936 try:
936 try:
937 entry = self.index[rev]
937 entry = self.index[rev]
938 except IndexError:
938 except IndexError:
939 if rev == wdirrev:
939 if rev == wdirrev:
940 raise error.WdirUnsupported
940 raise error.WdirUnsupported
941 raise
941 raise
942 if entry[5] == nullrev:
942 if entry[5] == nullrev:
943 return entry[6], entry[5]
943 return entry[6], entry[5]
944 else:
944 else:
945 return entry[5], entry[6]
945 return entry[5], entry[6]
946
946
947 # fast parentrevs(rev) where rev isn't filtered
947 # fast parentrevs(rev) where rev isn't filtered
948 _uncheckedparentrevs = parentrevs
948 _uncheckedparentrevs = parentrevs
949
949
950 def node(self, rev):
950 def node(self, rev):
951 try:
951 try:
952 return self.index[rev][7]
952 return self.index[rev][7]
953 except IndexError:
953 except IndexError:
954 if rev == wdirrev:
954 if rev == wdirrev:
955 raise error.WdirUnsupported
955 raise error.WdirUnsupported
956 raise
956 raise
957
957
958 # Derived from index values.
958 # Derived from index values.
959
959
960 def end(self, rev):
960 def end(self, rev):
961 return self.start(rev) + self.length(rev)
961 return self.start(rev) + self.length(rev)
962
962
963 def parents(self, node):
963 def parents(self, node):
964 i = self.index
964 i = self.index
965 d = i[self.rev(node)]
965 d = i[self.rev(node)]
966 # inline node() to avoid function call overhead
966 # inline node() to avoid function call overhead
967 if d[5] == self.nullid:
967 if d[5] == self.nullid:
968 return i[d[6]][7], i[d[5]][7]
968 return i[d[6]][7], i[d[5]][7]
969 else:
969 else:
970 return i[d[5]][7], i[d[6]][7]
970 return i[d[5]][7], i[d[6]][7]
971
971
972 def chainlen(self, rev):
972 def chainlen(self, rev):
973 return self._chaininfo(rev)[0]
973 return self._chaininfo(rev)[0]
974
974
975 def _chaininfo(self, rev):
975 def _chaininfo(self, rev):
976 chaininfocache = self._chaininfocache
976 chaininfocache = self._chaininfocache
977 if rev in chaininfocache:
977 if rev in chaininfocache:
978 return chaininfocache[rev]
978 return chaininfocache[rev]
979 index = self.index
979 index = self.index
980 generaldelta = self._generaldelta
980 generaldelta = self._generaldelta
981 iterrev = rev
981 iterrev = rev
982 e = index[iterrev]
982 e = index[iterrev]
983 clen = 0
983 clen = 0
984 compresseddeltalen = 0
984 compresseddeltalen = 0
985 while iterrev != e[3]:
985 while iterrev != e[3]:
986 clen += 1
986 clen += 1
987 compresseddeltalen += e[1]
987 compresseddeltalen += e[1]
988 if generaldelta:
988 if generaldelta:
989 iterrev = e[3]
989 iterrev = e[3]
990 else:
990 else:
991 iterrev -= 1
991 iterrev -= 1
992 if iterrev in chaininfocache:
992 if iterrev in chaininfocache:
993 t = chaininfocache[iterrev]
993 t = chaininfocache[iterrev]
994 clen += t[0]
994 clen += t[0]
995 compresseddeltalen += t[1]
995 compresseddeltalen += t[1]
996 break
996 break
997 e = index[iterrev]
997 e = index[iterrev]
998 else:
998 else:
999 # Add text length of base since decompressing that also takes
999 # Add text length of base since decompressing that also takes
1000 # work. For cache hits the length is already included.
1000 # work. For cache hits the length is already included.
1001 compresseddeltalen += e[1]
1001 compresseddeltalen += e[1]
1002 r = (clen, compresseddeltalen)
1002 r = (clen, compresseddeltalen)
1003 chaininfocache[rev] = r
1003 chaininfocache[rev] = r
1004 return r
1004 return r
1005
1005
1006 def _deltachain(self, rev, stoprev=None):
1006 def _deltachain(self, rev, stoprev=None):
1007 """Obtain the delta chain for a revision.
1007 """Obtain the delta chain for a revision.
1008
1008
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1009 ``stoprev`` specifies a revision to stop at. If not specified, we
1010 stop at the base of the chain.
1010 stop at the base of the chain.
1011
1011
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1012 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1013 revs in ascending order and ``stopped`` is a bool indicating whether
1014 ``stoprev`` was hit.
1014 ``stoprev`` was hit.
1015 """
1015 """
1016 # Try C implementation.
1016 # Try C implementation.
1017 try:
1017 try:
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1018 return self.index.deltachain(rev, stoprev, self._generaldelta)
1019 except AttributeError:
1019 except AttributeError:
1020 pass
1020 pass
1021
1021
1022 chain = []
1022 chain = []
1023
1023
1024 # Alias to prevent attribute lookup in tight loop.
1024 # Alias to prevent attribute lookup in tight loop.
1025 index = self.index
1025 index = self.index
1026 generaldelta = self._generaldelta
1026 generaldelta = self._generaldelta
1027
1027
1028 iterrev = rev
1028 iterrev = rev
1029 e = index[iterrev]
1029 e = index[iterrev]
1030 while iterrev != e[3] and iterrev != stoprev:
1030 while iterrev != e[3] and iterrev != stoprev:
1031 chain.append(iterrev)
1031 chain.append(iterrev)
1032 if generaldelta:
1032 if generaldelta:
1033 iterrev = e[3]
1033 iterrev = e[3]
1034 else:
1034 else:
1035 iterrev -= 1
1035 iterrev -= 1
1036 e = index[iterrev]
1036 e = index[iterrev]
1037
1037
1038 if iterrev == stoprev:
1038 if iterrev == stoprev:
1039 stopped = True
1039 stopped = True
1040 else:
1040 else:
1041 chain.append(iterrev)
1041 chain.append(iterrev)
1042 stopped = False
1042 stopped = False
1043
1043
1044 chain.reverse()
1044 chain.reverse()
1045 return chain, stopped
1045 return chain, stopped
1046
1046
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1047 def ancestors(self, revs, stoprev=0, inclusive=False):
1048 """Generate the ancestors of 'revs' in reverse revision order.
1048 """Generate the ancestors of 'revs' in reverse revision order.
1049 Does not generate revs lower than stoprev.
1049 Does not generate revs lower than stoprev.
1050
1050
1051 See the documentation for ancestor.lazyancestors for more details."""
1051 See the documentation for ancestor.lazyancestors for more details."""
1052
1052
1053 # first, make sure start revisions aren't filtered
1053 # first, make sure start revisions aren't filtered
1054 revs = list(revs)
1054 revs = list(revs)
1055 checkrev = self.node
1055 checkrev = self.node
1056 for r in revs:
1056 for r in revs:
1057 checkrev(r)
1057 checkrev(r)
1058 # and we're sure ancestors aren't filtered as well
1058 # and we're sure ancestors aren't filtered as well
1059
1059
1060 if rustancestor is not None:
1060 if rustancestor is not None:
1061 lazyancestors = rustancestor.LazyAncestors
1061 lazyancestors = rustancestor.LazyAncestors
1062 arg = self.index
1062 arg = self.index
1063 else:
1063 else:
1064 lazyancestors = ancestor.lazyancestors
1064 lazyancestors = ancestor.lazyancestors
1065 arg = self._uncheckedparentrevs
1065 arg = self._uncheckedparentrevs
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1066 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1067
1067
1068 def descendants(self, revs):
1068 def descendants(self, revs):
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1069 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1070
1070
1071 def findcommonmissing(self, common=None, heads=None):
1071 def findcommonmissing(self, common=None, heads=None):
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1072 """Return a tuple of the ancestors of common and the ancestors of heads
1073 that are not ancestors of common. In revset terminology, we return the
1073 that are not ancestors of common. In revset terminology, we return the
1074 tuple:
1074 tuple:
1075
1075
1076 ::common, (::heads) - (::common)
1076 ::common, (::heads) - (::common)
1077
1077
1078 The list is sorted by revision number, meaning it is
1078 The list is sorted by revision number, meaning it is
1079 topologically sorted.
1079 topologically sorted.
1080
1080
1081 'heads' and 'common' are both lists of node IDs. If heads is
1081 'heads' and 'common' are both lists of node IDs. If heads is
1082 not supplied, uses all of the revlog's heads. If common is not
1082 not supplied, uses all of the revlog's heads. If common is not
1083 supplied, uses nullid."""
1083 supplied, uses nullid."""
1084 if common is None:
1084 if common is None:
1085 common = [self.nullid]
1085 common = [self.nullid]
1086 if heads is None:
1086 if heads is None:
1087 heads = self.heads()
1087 heads = self.heads()
1088
1088
1089 common = [self.rev(n) for n in common]
1089 common = [self.rev(n) for n in common]
1090 heads = [self.rev(n) for n in heads]
1090 heads = [self.rev(n) for n in heads]
1091
1091
1092 # we want the ancestors, but inclusive
1092 # we want the ancestors, but inclusive
1093 class lazyset(object):
1093 class lazyset(object):
1094 def __init__(self, lazyvalues):
1094 def __init__(self, lazyvalues):
1095 self.addedvalues = set()
1095 self.addedvalues = set()
1096 self.lazyvalues = lazyvalues
1096 self.lazyvalues = lazyvalues
1097
1097
1098 def __contains__(self, value):
1098 def __contains__(self, value):
1099 return value in self.addedvalues or value in self.lazyvalues
1099 return value in self.addedvalues or value in self.lazyvalues
1100
1100
1101 def __iter__(self):
1101 def __iter__(self):
1102 added = self.addedvalues
1102 added = self.addedvalues
1103 for r in added:
1103 for r in added:
1104 yield r
1104 yield r
1105 for r in self.lazyvalues:
1105 for r in self.lazyvalues:
1106 if not r in added:
1106 if not r in added:
1107 yield r
1107 yield r
1108
1108
1109 def add(self, value):
1109 def add(self, value):
1110 self.addedvalues.add(value)
1110 self.addedvalues.add(value)
1111
1111
1112 def update(self, values):
1112 def update(self, values):
1113 self.addedvalues.update(values)
1113 self.addedvalues.update(values)
1114
1114
1115 has = lazyset(self.ancestors(common))
1115 has = lazyset(self.ancestors(common))
1116 has.add(nullrev)
1116 has.add(nullrev)
1117 has.update(common)
1117 has.update(common)
1118
1118
1119 # take all ancestors from heads that aren't in has
1119 # take all ancestors from heads that aren't in has
1120 missing = set()
1120 missing = set()
1121 visit = collections.deque(r for r in heads if r not in has)
1121 visit = collections.deque(r for r in heads if r not in has)
1122 while visit:
1122 while visit:
1123 r = visit.popleft()
1123 r = visit.popleft()
1124 if r in missing:
1124 if r in missing:
1125 continue
1125 continue
1126 else:
1126 else:
1127 missing.add(r)
1127 missing.add(r)
1128 for p in self.parentrevs(r):
1128 for p in self.parentrevs(r):
1129 if p not in has:
1129 if p not in has:
1130 visit.append(p)
1130 visit.append(p)
1131 missing = list(missing)
1131 missing = list(missing)
1132 missing.sort()
1132 missing.sort()
1133 return has, [self.node(miss) for miss in missing]
1133 return has, [self.node(miss) for miss in missing]
1134
1134
1135 def incrementalmissingrevs(self, common=None):
1135 def incrementalmissingrevs(self, common=None):
1136 """Return an object that can be used to incrementally compute the
1136 """Return an object that can be used to incrementally compute the
1137 revision numbers of the ancestors of arbitrary sets that are not
1137 revision numbers of the ancestors of arbitrary sets that are not
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1138 ancestors of common. This is an ancestor.incrementalmissingancestors
1139 object.
1139 object.
1140
1140
1141 'common' is a list of revision numbers. If common is not supplied, uses
1141 'common' is a list of revision numbers. If common is not supplied, uses
1142 nullrev.
1142 nullrev.
1143 """
1143 """
1144 if common is None:
1144 if common is None:
1145 common = [nullrev]
1145 common = [nullrev]
1146
1146
1147 if rustancestor is not None:
1147 if rustancestor is not None:
1148 return rustancestor.MissingAncestors(self.index, common)
1148 return rustancestor.MissingAncestors(self.index, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1149 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1150
1150
1151 def findmissingrevs(self, common=None, heads=None):
1151 def findmissingrevs(self, common=None, heads=None):
1152 """Return the revision numbers of the ancestors of heads that
1152 """Return the revision numbers of the ancestors of heads that
1153 are not ancestors of common.
1153 are not ancestors of common.
1154
1154
1155 More specifically, return a list of revision numbers corresponding to
1155 More specifically, return a list of revision numbers corresponding to
1156 nodes N such that every N satisfies the following constraints:
1156 nodes N such that every N satisfies the following constraints:
1157
1157
1158 1. N is an ancestor of some node in 'heads'
1158 1. N is an ancestor of some node in 'heads'
1159 2. N is not an ancestor of any node in 'common'
1159 2. N is not an ancestor of any node in 'common'
1160
1160
1161 The list is sorted by revision number, meaning it is
1161 The list is sorted by revision number, meaning it is
1162 topologically sorted.
1162 topologically sorted.
1163
1163
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1164 'heads' and 'common' are both lists of revision numbers. If heads is
1165 not supplied, uses all of the revlog's heads. If common is not
1165 not supplied, uses all of the revlog's heads. If common is not
1166 supplied, uses nullid."""
1166 supplied, uses nullid."""
1167 if common is None:
1167 if common is None:
1168 common = [nullrev]
1168 common = [nullrev]
1169 if heads is None:
1169 if heads is None:
1170 heads = self.headrevs()
1170 heads = self.headrevs()
1171
1171
1172 inc = self.incrementalmissingrevs(common=common)
1172 inc = self.incrementalmissingrevs(common=common)
1173 return inc.missingancestors(heads)
1173 return inc.missingancestors(heads)
1174
1174
1175 def findmissing(self, common=None, heads=None):
1175 def findmissing(self, common=None, heads=None):
1176 """Return the ancestors of heads that are not ancestors of common.
1176 """Return the ancestors of heads that are not ancestors of common.
1177
1177
1178 More specifically, return a list of nodes N such that every N
1178 More specifically, return a list of nodes N such that every N
1179 satisfies the following constraints:
1179 satisfies the following constraints:
1180
1180
1181 1. N is an ancestor of some node in 'heads'
1181 1. N is an ancestor of some node in 'heads'
1182 2. N is not an ancestor of any node in 'common'
1182 2. N is not an ancestor of any node in 'common'
1183
1183
1184 The list is sorted by revision number, meaning it is
1184 The list is sorted by revision number, meaning it is
1185 topologically sorted.
1185 topologically sorted.
1186
1186
1187 'heads' and 'common' are both lists of node IDs. If heads is
1187 'heads' and 'common' are both lists of node IDs. If heads is
1188 not supplied, uses all of the revlog's heads. If common is not
1188 not supplied, uses all of the revlog's heads. If common is not
1189 supplied, uses nullid."""
1189 supplied, uses nullid."""
1190 if common is None:
1190 if common is None:
1191 common = [self.nullid]
1191 common = [self.nullid]
1192 if heads is None:
1192 if heads is None:
1193 heads = self.heads()
1193 heads = self.heads()
1194
1194
1195 common = [self.rev(n) for n in common]
1195 common = [self.rev(n) for n in common]
1196 heads = [self.rev(n) for n in heads]
1196 heads = [self.rev(n) for n in heads]
1197
1197
1198 inc = self.incrementalmissingrevs(common=common)
1198 inc = self.incrementalmissingrevs(common=common)
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1199 return [self.node(r) for r in inc.missingancestors(heads)]
1200
1200
1201 def nodesbetween(self, roots=None, heads=None):
1201 def nodesbetween(self, roots=None, heads=None):
1202 """Return a topological path from 'roots' to 'heads'.
1202 """Return a topological path from 'roots' to 'heads'.
1203
1203
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1204 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1205 topologically sorted list of all nodes N that satisfy both of
1205 topologically sorted list of all nodes N that satisfy both of
1206 these constraints:
1206 these constraints:
1207
1207
1208 1. N is a descendant of some node in 'roots'
1208 1. N is a descendant of some node in 'roots'
1209 2. N is an ancestor of some node in 'heads'
1209 2. N is an ancestor of some node in 'heads'
1210
1210
1211 Every node is considered to be both a descendant and an ancestor
1211 Every node is considered to be both a descendant and an ancestor
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1212 of itself, so every reachable node in 'roots' and 'heads' will be
1213 included in 'nodes'.
1213 included in 'nodes'.
1214
1214
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1215 'outroots' is the list of reachable nodes in 'roots', i.e., the
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1216 subset of 'roots' that is returned in 'nodes'. Likewise,
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1217 'outheads' is the subset of 'heads' that is also in 'nodes'.
1218
1218
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1219 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1220 unspecified, uses nullid as the only root. If 'heads' is
1221 unspecified, uses list of all of the revlog's heads."""
1221 unspecified, uses list of all of the revlog's heads."""
1222 nonodes = ([], [], [])
1222 nonodes = ([], [], [])
1223 if roots is not None:
1223 if roots is not None:
1224 roots = list(roots)
1224 roots = list(roots)
1225 if not roots:
1225 if not roots:
1226 return nonodes
1226 return nonodes
1227 lowestrev = min([self.rev(n) for n in roots])
1227 lowestrev = min([self.rev(n) for n in roots])
1228 else:
1228 else:
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1229 roots = [self.nullid] # Everybody's a descendant of nullid
1230 lowestrev = nullrev
1230 lowestrev = nullrev
1231 if (lowestrev == nullrev) and (heads is None):
1231 if (lowestrev == nullrev) and (heads is None):
1232 # We want _all_ the nodes!
1232 # We want _all_ the nodes!
1233 return (
1233 return (
1234 [self.node(r) for r in self],
1234 [self.node(r) for r in self],
1235 [self.nullid],
1235 [self.nullid],
1236 list(self.heads()),
1236 list(self.heads()),
1237 )
1237 )
1238 if heads is None:
1238 if heads is None:
1239 # All nodes are ancestors, so the latest ancestor is the last
1239 # All nodes are ancestors, so the latest ancestor is the last
1240 # node.
1240 # node.
1241 highestrev = len(self) - 1
1241 highestrev = len(self) - 1
1242 # Set ancestors to None to signal that every node is an ancestor.
1242 # Set ancestors to None to signal that every node is an ancestor.
1243 ancestors = None
1243 ancestors = None
1244 # Set heads to an empty dictionary for later discovery of heads
1244 # Set heads to an empty dictionary for later discovery of heads
1245 heads = {}
1245 heads = {}
1246 else:
1246 else:
1247 heads = list(heads)
1247 heads = list(heads)
1248 if not heads:
1248 if not heads:
1249 return nonodes
1249 return nonodes
1250 ancestors = set()
1250 ancestors = set()
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1251 # Turn heads into a dictionary so we can remove 'fake' heads.
1252 # Also, later we will be using it to filter out the heads we can't
1252 # Also, later we will be using it to filter out the heads we can't
1253 # find from roots.
1253 # find from roots.
1254 heads = dict.fromkeys(heads, False)
1254 heads = dict.fromkeys(heads, False)
1255 # Start at the top and keep marking parents until we're done.
1255 # Start at the top and keep marking parents until we're done.
1256 nodestotag = set(heads)
1256 nodestotag = set(heads)
1257 # Remember where the top was so we can use it as a limit later.
1257 # Remember where the top was so we can use it as a limit later.
1258 highestrev = max([self.rev(n) for n in nodestotag])
1258 highestrev = max([self.rev(n) for n in nodestotag])
1259 while nodestotag:
1259 while nodestotag:
1260 # grab a node to tag
1260 # grab a node to tag
1261 n = nodestotag.pop()
1261 n = nodestotag.pop()
1262 # Never tag nullid
1262 # Never tag nullid
1263 if n == self.nullid:
1263 if n == self.nullid:
1264 continue
1264 continue
1265 # A node's revision number represents its place in a
1265 # A node's revision number represents its place in a
1266 # topologically sorted list of nodes.
1266 # topologically sorted list of nodes.
1267 r = self.rev(n)
1267 r = self.rev(n)
1268 if r >= lowestrev:
1268 if r >= lowestrev:
1269 if n not in ancestors:
1269 if n not in ancestors:
1270 # If we are possibly a descendant of one of the roots
1270 # If we are possibly a descendant of one of the roots
1271 # and we haven't already been marked as an ancestor
1271 # and we haven't already been marked as an ancestor
1272 ancestors.add(n) # Mark as ancestor
1272 ancestors.add(n) # Mark as ancestor
1273 # Add non-nullid parents to list of nodes to tag.
1273 # Add non-nullid parents to list of nodes to tag.
1274 nodestotag.update(
1274 nodestotag.update(
1275 [p for p in self.parents(n) if p != self.nullid]
1275 [p for p in self.parents(n) if p != self.nullid]
1276 )
1276 )
1277 elif n in heads: # We've seen it before, is it a fake head?
1277 elif n in heads: # We've seen it before, is it a fake head?
1278 # So it is, real heads should not be the ancestors of
1278 # So it is, real heads should not be the ancestors of
1279 # any other heads.
1279 # any other heads.
1280 heads.pop(n)
1280 heads.pop(n)
1281 if not ancestors:
1281 if not ancestors:
1282 return nonodes
1282 return nonodes
1283 # Now that we have our set of ancestors, we want to remove any
1283 # Now that we have our set of ancestors, we want to remove any
1284 # roots that are not ancestors.
1284 # roots that are not ancestors.
1285
1285
1286 # If one of the roots was nullid, everything is included anyway.
1286 # If one of the roots was nullid, everything is included anyway.
1287 if lowestrev > nullrev:
1287 if lowestrev > nullrev:
1288 # But, since we weren't, let's recompute the lowest rev to not
1288 # But, since we weren't, let's recompute the lowest rev to not
1289 # include roots that aren't ancestors.
1289 # include roots that aren't ancestors.
1290
1290
1291 # Filter out roots that aren't ancestors of heads
1291 # Filter out roots that aren't ancestors of heads
1292 roots = [root for root in roots if root in ancestors]
1292 roots = [root for root in roots if root in ancestors]
1293 # Recompute the lowest revision
1293 # Recompute the lowest revision
1294 if roots:
1294 if roots:
1295 lowestrev = min([self.rev(root) for root in roots])
1295 lowestrev = min([self.rev(root) for root in roots])
1296 else:
1296 else:
1297 # No more roots? Return empty list
1297 # No more roots? Return empty list
1298 return nonodes
1298 return nonodes
1299 else:
1299 else:
1300 # We are descending from nullid, and don't need to care about
1300 # We are descending from nullid, and don't need to care about
1301 # any other roots.
1301 # any other roots.
1302 lowestrev = nullrev
1302 lowestrev = nullrev
1303 roots = [self.nullid]
1303 roots = [self.nullid]
1304 # Transform our roots list into a set.
1304 # Transform our roots list into a set.
1305 descendants = set(roots)
1305 descendants = set(roots)
1306 # Also, keep the original roots so we can filter out roots that aren't
1306 # Also, keep the original roots so we can filter out roots that aren't
1307 # 'real' roots (i.e. are descended from other roots).
1307 # 'real' roots (i.e. are descended from other roots).
1308 roots = descendants.copy()
1308 roots = descendants.copy()
1309 # Our topologically sorted list of output nodes.
1309 # Our topologically sorted list of output nodes.
1310 orderedout = []
1310 orderedout = []
1311 # Don't start at nullid since we don't want nullid in our output list,
1311 # Don't start at nullid since we don't want nullid in our output list,
1312 # and if nullid shows up in descendants, empty parents will look like
1312 # and if nullid shows up in descendants, empty parents will look like
1313 # they're descendants.
1313 # they're descendants.
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1314 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1315 n = self.node(r)
1315 n = self.node(r)
1316 isdescendant = False
1316 isdescendant = False
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1317 if lowestrev == nullrev: # Everybody is a descendant of nullid
1318 isdescendant = True
1318 isdescendant = True
1319 elif n in descendants:
1319 elif n in descendants:
1320 # n is already a descendant
1320 # n is already a descendant
1321 isdescendant = True
1321 isdescendant = True
1322 # This check only needs to be done here because all the roots
1322 # This check only needs to be done here because all the roots
1323 # will start being marked is descendants before the loop.
1323 # will start being marked is descendants before the loop.
1324 if n in roots:
1324 if n in roots:
1325 # If n was a root, check if it's a 'real' root.
1325 # If n was a root, check if it's a 'real' root.
1326 p = tuple(self.parents(n))
1326 p = tuple(self.parents(n))
1327 # If any of its parents are descendants, it's not a root.
1327 # If any of its parents are descendants, it's not a root.
1328 if (p[0] in descendants) or (p[1] in descendants):
1328 if (p[0] in descendants) or (p[1] in descendants):
1329 roots.remove(n)
1329 roots.remove(n)
1330 else:
1330 else:
1331 p = tuple(self.parents(n))
1331 p = tuple(self.parents(n))
1332 # A node is a descendant if either of its parents are
1332 # A node is a descendant if either of its parents are
1333 # descendants. (We seeded the dependents list with the roots
1333 # descendants. (We seeded the dependents list with the roots
1334 # up there, remember?)
1334 # up there, remember?)
1335 if (p[0] in descendants) or (p[1] in descendants):
1335 if (p[0] in descendants) or (p[1] in descendants):
1336 descendants.add(n)
1336 descendants.add(n)
1337 isdescendant = True
1337 isdescendant = True
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1338 if isdescendant and ((ancestors is None) or (n in ancestors)):
1339 # Only include nodes that are both descendants and ancestors.
1339 # Only include nodes that are both descendants and ancestors.
1340 orderedout.append(n)
1340 orderedout.append(n)
1341 if (ancestors is not None) and (n in heads):
1341 if (ancestors is not None) and (n in heads):
1342 # We're trying to figure out which heads are reachable
1342 # We're trying to figure out which heads are reachable
1343 # from roots.
1343 # from roots.
1344 # Mark this head as having been reached
1344 # Mark this head as having been reached
1345 heads[n] = True
1345 heads[n] = True
1346 elif ancestors is None:
1346 elif ancestors is None:
1347 # Otherwise, we're trying to discover the heads.
1347 # Otherwise, we're trying to discover the heads.
1348 # Assume this is a head because if it isn't, the next step
1348 # Assume this is a head because if it isn't, the next step
1349 # will eventually remove it.
1349 # will eventually remove it.
1350 heads[n] = True
1350 heads[n] = True
1351 # But, obviously its parents aren't.
1351 # But, obviously its parents aren't.
1352 for p in self.parents(n):
1352 for p in self.parents(n):
1353 heads.pop(p, None)
1353 heads.pop(p, None)
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1354 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1355 roots = list(roots)
1355 roots = list(roots)
1356 assert orderedout
1356 assert orderedout
1357 assert roots
1357 assert roots
1358 assert heads
1358 assert heads
1359 return (orderedout, roots, heads)
1359 return (orderedout, roots, heads)
1360
1360
1361 def headrevs(self, revs=None):
1361 def headrevs(self, revs=None):
1362 if revs is None:
1362 if revs is None:
1363 try:
1363 try:
1364 return self.index.headrevs()
1364 return self.index.headrevs()
1365 except AttributeError:
1365 except AttributeError:
1366 return self._headrevs()
1366 return self._headrevs()
1367 if rustdagop is not None:
1367 if rustdagop is not None:
1368 return rustdagop.headrevs(self.index, revs)
1368 return rustdagop.headrevs(self.index, revs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1369 return dagop.headrevs(revs, self._uncheckedparentrevs)
1370
1370
1371 def computephases(self, roots):
1371 def computephases(self, roots):
1372 return self.index.computephasesmapsets(roots)
1372 return self.index.computephasesmapsets(roots)
1373
1373
1374 def _headrevs(self):
1374 def _headrevs(self):
1375 count = len(self)
1375 count = len(self)
1376 if not count:
1376 if not count:
1377 return [nullrev]
1377 return [nullrev]
1378 # we won't iter over filtered rev so nobody is a head at start
1378 # we won't iter over filtered rev so nobody is a head at start
1379 ishead = [0] * (count + 1)
1379 ishead = [0] * (count + 1)
1380 index = self.index
1380 index = self.index
1381 for r in self:
1381 for r in self:
1382 ishead[r] = 1 # I may be an head
1382 ishead[r] = 1 # I may be an head
1383 e = index[r]
1383 e = index[r]
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1384 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1385 return [r for r, val in enumerate(ishead) if val]
1385 return [r for r, val in enumerate(ishead) if val]
1386
1386
1387 def heads(self, start=None, stop=None):
1387 def heads(self, start=None, stop=None):
1388 """return the list of all nodes that have no children
1388 """return the list of all nodes that have no children
1389
1389
1390 if start is specified, only heads that are descendants of
1390 if start is specified, only heads that are descendants of
1391 start will be returned
1391 start will be returned
1392 if stop is specified, it will consider all the revs from stop
1392 if stop is specified, it will consider all the revs from stop
1393 as if they had no children
1393 as if they had no children
1394 """
1394 """
1395 if start is None and stop is None:
1395 if start is None and stop is None:
1396 if not len(self):
1396 if not len(self):
1397 return [self.nullid]
1397 return [self.nullid]
1398 return [self.node(r) for r in self.headrevs()]
1398 return [self.node(r) for r in self.headrevs()]
1399
1399
1400 if start is None:
1400 if start is None:
1401 start = nullrev
1401 start = nullrev
1402 else:
1402 else:
1403 start = self.rev(start)
1403 start = self.rev(start)
1404
1404
1405 stoprevs = {self.rev(n) for n in stop or []}
1405 stoprevs = {self.rev(n) for n in stop or []}
1406
1406
1407 revs = dagop.headrevssubset(
1407 revs = dagop.headrevssubset(
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1408 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1409 )
1409 )
1410
1410
1411 return [self.node(rev) for rev in revs]
1411 return [self.node(rev) for rev in revs]
1412
1412
1413 def children(self, node):
1413 def children(self, node):
1414 """find the children of a given node"""
1414 """find the children of a given node"""
1415 c = []
1415 c = []
1416 p = self.rev(node)
1416 p = self.rev(node)
1417 for r in self.revs(start=p + 1):
1417 for r in self.revs(start=p + 1):
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1418 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1419 if prevs:
1419 if prevs:
1420 for pr in prevs:
1420 for pr in prevs:
1421 if pr == p:
1421 if pr == p:
1422 c.append(self.node(r))
1422 c.append(self.node(r))
1423 elif p == nullrev:
1423 elif p == nullrev:
1424 c.append(self.node(r))
1424 c.append(self.node(r))
1425 return c
1425 return c
1426
1426
1427 def commonancestorsheads(self, a, b):
1427 def commonancestorsheads(self, a, b):
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1428 """calculate all the heads of the common ancestors of nodes a and b"""
1429 a, b = self.rev(a), self.rev(b)
1429 a, b = self.rev(a), self.rev(b)
1430 ancs = self._commonancestorsheads(a, b)
1430 ancs = self._commonancestorsheads(a, b)
1431 return pycompat.maplist(self.node, ancs)
1431 return pycompat.maplist(self.node, ancs)
1432
1432
1433 def _commonancestorsheads(self, *revs):
1433 def _commonancestorsheads(self, *revs):
1434 """calculate all the heads of the common ancestors of revs"""
1434 """calculate all the heads of the common ancestors of revs"""
1435 try:
1435 try:
1436 ancs = self.index.commonancestorsheads(*revs)
1436 ancs = self.index.commonancestorsheads(*revs)
1437 except (AttributeError, OverflowError): # C implementation failed
1437 except (AttributeError, OverflowError): # C implementation failed
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1438 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1439 return ancs
1439 return ancs
1440
1440
1441 def isancestor(self, a, b):
1441 def isancestor(self, a, b):
1442 """return True if node a is an ancestor of node b
1442 """return True if node a is an ancestor of node b
1443
1443
1444 A revision is considered an ancestor of itself."""
1444 A revision is considered an ancestor of itself."""
1445 a, b = self.rev(a), self.rev(b)
1445 a, b = self.rev(a), self.rev(b)
1446 return self.isancestorrev(a, b)
1446 return self.isancestorrev(a, b)
1447
1447
1448 def isancestorrev(self, a, b):
1448 def isancestorrev(self, a, b):
1449 """return True if revision a is an ancestor of revision b
1449 """return True if revision a is an ancestor of revision b
1450
1450
1451 A revision is considered an ancestor of itself.
1451 A revision is considered an ancestor of itself.
1452
1452
1453 The implementation of this is trivial but the use of
1453 The implementation of this is trivial but the use of
1454 reachableroots is not."""
1454 reachableroots is not."""
1455 if a == nullrev:
1455 if a == nullrev:
1456 return True
1456 return True
1457 elif a == b:
1457 elif a == b:
1458 return True
1458 return True
1459 elif a > b:
1459 elif a > b:
1460 return False
1460 return False
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1461 return bool(self.reachableroots(a, [b], [a], includepath=False))
1462
1462
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1463 def reachableroots(self, minroot, heads, roots, includepath=False):
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1464 """return (heads(::(<roots> and <roots>::<heads>)))
1465
1465
1466 If includepath is True, return (<roots>::<heads>)."""
1466 If includepath is True, return (<roots>::<heads>)."""
1467 try:
1467 try:
1468 return self.index.reachableroots2(
1468 return self.index.reachableroots2(
1469 minroot, heads, roots, includepath
1469 minroot, heads, roots, includepath
1470 )
1470 )
1471 except AttributeError:
1471 except AttributeError:
1472 return dagop._reachablerootspure(
1472 return dagop._reachablerootspure(
1473 self.parentrevs, minroot, roots, heads, includepath
1473 self.parentrevs, minroot, roots, heads, includepath
1474 )
1474 )
1475
1475
1476 def ancestor(self, a, b):
1476 def ancestor(self, a, b):
1477 """calculate the "best" common ancestor of nodes a and b"""
1477 """calculate the "best" common ancestor of nodes a and b"""
1478
1478
1479 a, b = self.rev(a), self.rev(b)
1479 a, b = self.rev(a), self.rev(b)
1480 try:
1480 try:
1481 ancs = self.index.ancestors(a, b)
1481 ancs = self.index.ancestors(a, b)
1482 except (AttributeError, OverflowError):
1482 except (AttributeError, OverflowError):
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1483 ancs = ancestor.ancestors(self.parentrevs, a, b)
1484 if ancs:
1484 if ancs:
1485 # choose a consistent winner when there's a tie
1485 # choose a consistent winner when there's a tie
1486 return min(map(self.node, ancs))
1486 return min(map(self.node, ancs))
1487 return self.nullid
1487 return self.nullid
1488
1488
1489 def _match(self, id):
1489 def _match(self, id):
1490 if isinstance(id, int):
1490 if isinstance(id, int):
1491 # rev
1491 # rev
1492 return self.node(id)
1492 return self.node(id)
1493 if len(id) == self.nodeconstants.nodelen:
1493 if len(id) == self.nodeconstants.nodelen:
1494 # possibly a binary node
1494 # possibly a binary node
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1495 # odds of a binary node being all hex in ASCII are 1 in 10**25
1496 try:
1496 try:
1497 node = id
1497 node = id
1498 self.rev(node) # quick search the index
1498 self.rev(node) # quick search the index
1499 return node
1499 return node
1500 except error.LookupError:
1500 except error.LookupError:
1501 pass # may be partial hex id
1501 pass # may be partial hex id
1502 try:
1502 try:
1503 # str(rev)
1503 # str(rev)
1504 rev = int(id)
1504 rev = int(id)
1505 if b"%d" % rev != id:
1505 if b"%d" % rev != id:
1506 raise ValueError
1506 raise ValueError
1507 if rev < 0:
1507 if rev < 0:
1508 rev = len(self) + rev
1508 rev = len(self) + rev
1509 if rev < 0 or rev >= len(self):
1509 if rev < 0 or rev >= len(self):
1510 raise ValueError
1510 raise ValueError
1511 return self.node(rev)
1511 return self.node(rev)
1512 except (ValueError, OverflowError):
1512 except (ValueError, OverflowError):
1513 pass
1513 pass
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1514 if len(id) == 2 * self.nodeconstants.nodelen:
1515 try:
1515 try:
1516 # a full hex nodeid?
1516 # a full hex nodeid?
1517 node = bin(id)
1517 node = bin(id)
1518 self.rev(node)
1518 self.rev(node)
1519 return node
1519 return node
1520 except (TypeError, error.LookupError):
1520 except (TypeError, error.LookupError):
1521 pass
1521 pass
1522
1522
1523 def _partialmatch(self, id):
1523 def _partialmatch(self, id):
1524 # we don't care wdirfilenodeids as they should be always full hash
1524 # we don't care wdirfilenodeids as they should be always full hash
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1525 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1526 try:
1526 try:
1527 partial = self.index.partialmatch(id)
1527 partial = self.index.partialmatch(id)
1528 if partial and self.hasnode(partial):
1528 if partial and self.hasnode(partial):
1529 if maybewdir:
1529 if maybewdir:
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1530 # single 'ff...' match in radix tree, ambiguous with wdir
1531 raise error.RevlogError
1531 raise error.RevlogError
1532 return partial
1532 return partial
1533 if maybewdir:
1533 if maybewdir:
1534 # no 'ff...' match in radix tree, wdir identified
1534 # no 'ff...' match in radix tree, wdir identified
1535 raise error.WdirUnsupported
1535 raise error.WdirUnsupported
1536 return None
1536 return None
1537 except error.RevlogError:
1537 except error.RevlogError:
1538 # parsers.c radix tree lookup gave multiple matches
1538 # parsers.c radix tree lookup gave multiple matches
1539 # fast path: for unfiltered changelog, radix tree is accurate
1539 # fast path: for unfiltered changelog, radix tree is accurate
1540 if not getattr(self, 'filteredrevs', None):
1540 if not getattr(self, 'filteredrevs', None):
1541 raise error.AmbiguousPrefixLookupError(
1541 raise error.AmbiguousPrefixLookupError(
1542 id, self.display_id, _(b'ambiguous identifier')
1542 id, self.display_id, _(b'ambiguous identifier')
1543 )
1543 )
1544 # fall through to slow path that filters hidden revisions
1544 # fall through to slow path that filters hidden revisions
1545 except (AttributeError, ValueError):
1545 except (AttributeError, ValueError):
1546 # we are pure python, or key was too short to search radix tree
1546 # we are pure python, or key was too short to search radix tree
1547 pass
1547 pass
1548
1548
1549 if id in self._pcache:
1549 if id in self._pcache:
1550 return self._pcache[id]
1550 return self._pcache[id]
1551
1551
1552 if len(id) <= 40:
1552 if len(id) <= 40:
1553 try:
1553 try:
1554 # hex(node)[:...]
1554 # hex(node)[:...]
1555 l = len(id) // 2 # grab an even number of digits
1555 l = len(id) // 2 # grab an even number of digits
1556 prefix = bin(id[: l * 2])
1556 prefix = bin(id[: l * 2])
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1557 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1558 nl = [
1558 nl = [
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1559 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1560 ]
1560 ]
1561 if self.nodeconstants.nullhex.startswith(id):
1561 if self.nodeconstants.nullhex.startswith(id):
1562 nl.append(self.nullid)
1562 nl.append(self.nullid)
1563 if len(nl) > 0:
1563 if len(nl) > 0:
1564 if len(nl) == 1 and not maybewdir:
1564 if len(nl) == 1 and not maybewdir:
1565 self._pcache[id] = nl[0]
1565 self._pcache[id] = nl[0]
1566 return nl[0]
1566 return nl[0]
1567 raise error.AmbiguousPrefixLookupError(
1567 raise error.AmbiguousPrefixLookupError(
1568 id, self.display_id, _(b'ambiguous identifier')
1568 id, self.display_id, _(b'ambiguous identifier')
1569 )
1569 )
1570 if maybewdir:
1570 if maybewdir:
1571 raise error.WdirUnsupported
1571 raise error.WdirUnsupported
1572 return None
1572 return None
1573 except TypeError:
1573 except TypeError:
1574 pass
1574 pass
1575
1575
1576 def lookup(self, id):
1576 def lookup(self, id):
1577 """locate a node based on:
1577 """locate a node based on:
1578 - revision number or str(revision number)
1578 - revision number or str(revision number)
1579 - nodeid or subset of hex nodeid
1579 - nodeid or subset of hex nodeid
1580 """
1580 """
1581 n = self._match(id)
1581 n = self._match(id)
1582 if n is not None:
1582 if n is not None:
1583 return n
1583 return n
1584 n = self._partialmatch(id)
1584 n = self._partialmatch(id)
1585 if n:
1585 if n:
1586 return n
1586 return n
1587
1587
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1588 raise error.LookupError(id, self.display_id, _(b'no match found'))
1589
1589
1590 def shortest(self, node, minlength=1):
1590 def shortest(self, node, minlength=1):
1591 """Find the shortest unambiguous prefix that matches node."""
1591 """Find the shortest unambiguous prefix that matches node."""
1592
1592
1593 def isvalid(prefix):
1593 def isvalid(prefix):
1594 try:
1594 try:
1595 matchednode = self._partialmatch(prefix)
1595 matchednode = self._partialmatch(prefix)
1596 except error.AmbiguousPrefixLookupError:
1596 except error.AmbiguousPrefixLookupError:
1597 return False
1597 return False
1598 except error.WdirUnsupported:
1598 except error.WdirUnsupported:
1599 # single 'ff...' match
1599 # single 'ff...' match
1600 return True
1600 return True
1601 if matchednode is None:
1601 if matchednode is None:
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1602 raise error.LookupError(node, self.display_id, _(b'no node'))
1603 return True
1603 return True
1604
1604
1605 def maybewdir(prefix):
1605 def maybewdir(prefix):
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1606 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1607
1607
1608 hexnode = hex(node)
1608 hexnode = hex(node)
1609
1609
1610 def disambiguate(hexnode, minlength):
1610 def disambiguate(hexnode, minlength):
1611 """Disambiguate against wdirid."""
1611 """Disambiguate against wdirid."""
1612 for length in range(minlength, len(hexnode) + 1):
1612 for length in range(minlength, len(hexnode) + 1):
1613 prefix = hexnode[:length]
1613 prefix = hexnode[:length]
1614 if not maybewdir(prefix):
1614 if not maybewdir(prefix):
1615 return prefix
1615 return prefix
1616
1616
1617 if not getattr(self, 'filteredrevs', None):
1617 if not getattr(self, 'filteredrevs', None):
1618 try:
1618 try:
1619 length = max(self.index.shortest(node), minlength)
1619 length = max(self.index.shortest(node), minlength)
1620 return disambiguate(hexnode, length)
1620 return disambiguate(hexnode, length)
1621 except error.RevlogError:
1621 except error.RevlogError:
1622 if node != self.nodeconstants.wdirid:
1622 if node != self.nodeconstants.wdirid:
1623 raise error.LookupError(
1623 raise error.LookupError(
1624 node, self.display_id, _(b'no node')
1624 node, self.display_id, _(b'no node')
1625 )
1625 )
1626 except AttributeError:
1626 except AttributeError:
1627 # Fall through to pure code
1627 # Fall through to pure code
1628 pass
1628 pass
1629
1629
1630 if node == self.nodeconstants.wdirid:
1630 if node == self.nodeconstants.wdirid:
1631 for length in range(minlength, len(hexnode) + 1):
1631 for length in range(minlength, len(hexnode) + 1):
1632 prefix = hexnode[:length]
1632 prefix = hexnode[:length]
1633 if isvalid(prefix):
1633 if isvalid(prefix):
1634 return prefix
1634 return prefix
1635
1635
1636 for length in range(minlength, len(hexnode) + 1):
1636 for length in range(minlength, len(hexnode) + 1):
1637 prefix = hexnode[:length]
1637 prefix = hexnode[:length]
1638 if isvalid(prefix):
1638 if isvalid(prefix):
1639 return disambiguate(hexnode, length)
1639 return disambiguate(hexnode, length)
1640
1640
1641 def cmp(self, node, text):
1641 def cmp(self, node, text):
1642 """compare text with a given file revision
1642 """compare text with a given file revision
1643
1643
1644 returns True if text is different than what is stored.
1644 returns True if text is different than what is stored.
1645 """
1645 """
1646 p1, p2 = self.parents(node)
1646 p1, p2 = self.parents(node)
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1647 return storageutil.hashrevisionsha1(text, p1, p2) != node
1648
1648
1649 def _cachesegment(self, offset, data):
1649 def _cachesegment(self, offset, data):
1650 """Add a segment to the revlog cache.
1650 """Add a segment to the revlog cache.
1651
1651
1652 Accepts an absolute offset and the data that is at that location.
1652 Accepts an absolute offset and the data that is at that location.
1653 """
1653 """
1654 o, d = self._chunkcache
1654 o, d = self._chunkcache
1655 # try to add to existing cache
1655 # try to add to existing cache
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1656 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1657 self._chunkcache = o, d + data
1657 self._chunkcache = o, d + data
1658 else:
1658 else:
1659 self._chunkcache = offset, data
1659 self._chunkcache = offset, data
1660
1660
1661 def _readsegment(self, offset, length, df=None):
1661 def _readsegment(self, offset, length, df=None):
1662 """Load a segment of raw data from the revlog.
1662 """Load a segment of raw data from the revlog.
1663
1663
1664 Accepts an absolute offset, length to read, and an optional existing
1664 Accepts an absolute offset, length to read, and an optional existing
1665 file handle to read from.
1665 file handle to read from.
1666
1666
1667 If an existing file handle is passed, it will be seeked and the
1667 If an existing file handle is passed, it will be seeked and the
1668 original seek position will NOT be restored.
1668 original seek position will NOT be restored.
1669
1669
1670 Returns a str or buffer of raw byte data.
1670 Returns a str or buffer of raw byte data.
1671
1671
1672 Raises if the requested number of bytes could not be read.
1672 Raises if the requested number of bytes could not be read.
1673 """
1673 """
1674 # Cache data both forward and backward around the requested
1674 # Cache data both forward and backward around the requested
1675 # data, in a fixed size window. This helps speed up operations
1675 # data, in a fixed size window. This helps speed up operations
1676 # involving reading the revlog backwards.
1676 # involving reading the revlog backwards.
1677 cachesize = self._chunkcachesize
1677 cachesize = self._chunkcachesize
1678 realoffset = offset & ~(cachesize - 1)
1678 realoffset = offset & ~(cachesize - 1)
1679 reallength = (
1679 reallength = (
1680 (offset + length + cachesize) & ~(cachesize - 1)
1680 (offset + length + cachesize) & ~(cachesize - 1)
1681 ) - realoffset
1681 ) - realoffset
1682 with self._datareadfp(df) as df:
1682 with self._datareadfp(df) as df:
1683 df.seek(realoffset)
1683 df.seek(realoffset)
1684 d = df.read(reallength)
1684 d = df.read(reallength)
1685
1685
1686 self._cachesegment(realoffset, d)
1686 self._cachesegment(realoffset, d)
1687 if offset != realoffset or reallength != length:
1687 if offset != realoffset or reallength != length:
1688 startoffset = offset - realoffset
1688 startoffset = offset - realoffset
1689 if len(d) - startoffset < length:
1689 if len(d) - startoffset < length:
1690 raise error.RevlogError(
1690 raise error.RevlogError(
1691 _(
1691 _(
1692 b'partial read of revlog %s; expected %d bytes from '
1692 b'partial read of revlog %s; expected %d bytes from '
1693 b'offset %d, got %d'
1693 b'offset %d, got %d'
1694 )
1694 )
1695 % (
1695 % (
1696 self._indexfile if self._inline else self._datafile,
1696 self._indexfile if self._inline else self._datafile,
1697 length,
1697 length,
1698 offset,
1698 offset,
1699 len(d) - startoffset,
1699 len(d) - startoffset,
1700 )
1700 )
1701 )
1701 )
1702
1702
1703 return util.buffer(d, startoffset, length)
1703 return util.buffer(d, startoffset, length)
1704
1704
1705 if len(d) < length:
1705 if len(d) < length:
1706 raise error.RevlogError(
1706 raise error.RevlogError(
1707 _(
1707 _(
1708 b'partial read of revlog %s; expected %d bytes from offset '
1708 b'partial read of revlog %s; expected %d bytes from offset '
1709 b'%d, got %d'
1709 b'%d, got %d'
1710 )
1710 )
1711 % (
1711 % (
1712 self._indexfile if self._inline else self._datafile,
1712 self._indexfile if self._inline else self._datafile,
1713 length,
1713 length,
1714 offset,
1714 offset,
1715 len(d),
1715 len(d),
1716 )
1716 )
1717 )
1717 )
1718
1718
1719 return d
1719 return d
1720
1720
1721 def _getsegment(self, offset, length, df=None):
1721 def _getsegment(self, offset, length, df=None):
1722 """Obtain a segment of raw data from the revlog.
1722 """Obtain a segment of raw data from the revlog.
1723
1723
1724 Accepts an absolute offset, length of bytes to obtain, and an
1724 Accepts an absolute offset, length of bytes to obtain, and an
1725 optional file handle to the already-opened revlog. If the file
1725 optional file handle to the already-opened revlog. If the file
1726 handle is used, it's original seek position will not be preserved.
1726 handle is used, it's original seek position will not be preserved.
1727
1727
1728 Requests for data may be returned from a cache.
1728 Requests for data may be returned from a cache.
1729
1729
1730 Returns a str or a buffer instance of raw byte data.
1730 Returns a str or a buffer instance of raw byte data.
1731 """
1731 """
1732 o, d = self._chunkcache
1732 o, d = self._chunkcache
1733 l = len(d)
1733 l = len(d)
1734
1734
1735 # is it in the cache?
1735 # is it in the cache?
1736 cachestart = offset - o
1736 cachestart = offset - o
1737 cacheend = cachestart + length
1737 cacheend = cachestart + length
1738 if cachestart >= 0 and cacheend <= l:
1738 if cachestart >= 0 and cacheend <= l:
1739 if cachestart == 0 and cacheend == l:
1739 if cachestart == 0 and cacheend == l:
1740 return d # avoid a copy
1740 return d # avoid a copy
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1741 return util.buffer(d, cachestart, cacheend - cachestart)
1742
1742
1743 return self._readsegment(offset, length, df=df)
1743 return self._readsegment(offset, length, df=df)
1744
1744
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1745 def _getsegmentforrevs(self, startrev, endrev, df=None):
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1746 """Obtain a segment of raw data corresponding to a range of revisions.
1747
1747
1748 Accepts the start and end revisions and an optional already-open
1748 Accepts the start and end revisions and an optional already-open
1749 file handle to be used for reading. If the file handle is read, its
1749 file handle to be used for reading. If the file handle is read, its
1750 seek position will not be preserved.
1750 seek position will not be preserved.
1751
1751
1752 Requests for data may be satisfied by a cache.
1752 Requests for data may be satisfied by a cache.
1753
1753
1754 Returns a 2-tuple of (offset, data) for the requested range of
1754 Returns a 2-tuple of (offset, data) for the requested range of
1755 revisions. Offset is the integer offset from the beginning of the
1755 revisions. Offset is the integer offset from the beginning of the
1756 revlog and data is a str or buffer of the raw byte data.
1756 revlog and data is a str or buffer of the raw byte data.
1757
1757
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1758 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1759 to determine where each revision's data begins and ends.
1759 to determine where each revision's data begins and ends.
1760 """
1760 """
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1761 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1762 # (functions are expensive).
1762 # (functions are expensive).
1763 index = self.index
1763 index = self.index
1764 istart = index[startrev]
1764 istart = index[startrev]
1765 start = int(istart[0] >> 16)
1765 start = int(istart[0] >> 16)
1766 if startrev == endrev:
1766 if startrev == endrev:
1767 end = start + istart[1]
1767 end = start + istart[1]
1768 else:
1768 else:
1769 iend = index[endrev]
1769 iend = index[endrev]
1770 end = int(iend[0] >> 16) + iend[1]
1770 end = int(iend[0] >> 16) + iend[1]
1771
1771
1772 if self._inline:
1772 if self._inline:
1773 start += (startrev + 1) * self.index.entry_size
1773 start += (startrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1774 end += (endrev + 1) * self.index.entry_size
1775 length = end - start
1775 length = end - start
1776
1776
1777 return start, self._getsegment(start, length, df=df)
1777 return start, self._getsegment(start, length, df=df)
1778
1778
1779 def _chunk(self, rev, df=None):
1779 def _chunk(self, rev, df=None):
1780 """Obtain a single decompressed chunk for a revision.
1780 """Obtain a single decompressed chunk for a revision.
1781
1781
1782 Accepts an integer revision and an optional already-open file handle
1782 Accepts an integer revision and an optional already-open file handle
1783 to be used for reading. If used, the seek position of the file will not
1783 to be used for reading. If used, the seek position of the file will not
1784 be preserved.
1784 be preserved.
1785
1785
1786 Returns a str holding uncompressed data for the requested revision.
1786 Returns a str holding uncompressed data for the requested revision.
1787 """
1787 """
1788 compression_mode = self.index[rev][10]
1788 compression_mode = self.index[rev][10]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1789 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1790 if compression_mode == COMP_MODE_PLAIN:
1790 if compression_mode == COMP_MODE_PLAIN:
1791 return data
1791 return data
1792 elif compression_mode == COMP_MODE_DEFAULT:
1792 elif compression_mode == COMP_MODE_DEFAULT:
1793 return self._decompressor(data)
1793 return self._decompressor(data)
1794 elif compression_mode == COMP_MODE_INLINE:
1794 elif compression_mode == COMP_MODE_INLINE:
1795 return self.decompress(data)
1795 return self.decompress(data)
1796 else:
1796 else:
1797 msg = 'unknown compression mode %d'
1797 msg = 'unknown compression mode %d'
1798 msg %= compression_mode
1798 msg %= compression_mode
1799 raise error.RevlogError(msg)
1799 raise error.RevlogError(msg)
1800
1800
1801 def _chunks(self, revs, df=None, targetsize=None):
1801 def _chunks(self, revs, df=None, targetsize=None):
1802 """Obtain decompressed chunks for the specified revisions.
1802 """Obtain decompressed chunks for the specified revisions.
1803
1803
1804 Accepts an iterable of numeric revisions that are assumed to be in
1804 Accepts an iterable of numeric revisions that are assumed to be in
1805 ascending order. Also accepts an optional already-open file handle
1805 ascending order. Also accepts an optional already-open file handle
1806 to be used for reading. If used, the seek position of the file will
1806 to be used for reading. If used, the seek position of the file will
1807 not be preserved.
1807 not be preserved.
1808
1808
1809 This function is similar to calling ``self._chunk()`` multiple times,
1809 This function is similar to calling ``self._chunk()`` multiple times,
1810 but is faster.
1810 but is faster.
1811
1811
1812 Returns a list with decompressed data for each requested revision.
1812 Returns a list with decompressed data for each requested revision.
1813 """
1813 """
1814 if not revs:
1814 if not revs:
1815 return []
1815 return []
1816 start = self.start
1816 start = self.start
1817 length = self.length
1817 length = self.length
1818 inline = self._inline
1818 inline = self._inline
1819 iosize = self.index.entry_size
1819 iosize = self.index.entry_size
1820 buffer = util.buffer
1820 buffer = util.buffer
1821
1821
1822 l = []
1822 l = []
1823 ladd = l.append
1823 ladd = l.append
1824
1824
1825 if not self._withsparseread:
1825 if not self._withsparseread:
1826 slicedchunks = (revs,)
1826 slicedchunks = (revs,)
1827 else:
1827 else:
1828 slicedchunks = deltautil.slicechunk(
1828 slicedchunks = deltautil.slicechunk(
1829 self, revs, targetsize=targetsize
1829 self, revs, targetsize=targetsize
1830 )
1830 )
1831
1831
1832 for revschunk in slicedchunks:
1832 for revschunk in slicedchunks:
1833 firstrev = revschunk[0]
1833 firstrev = revschunk[0]
1834 # Skip trailing revisions with empty diff
1834 # Skip trailing revisions with empty diff
1835 for lastrev in revschunk[::-1]:
1835 for lastrev in revschunk[::-1]:
1836 if length(lastrev) != 0:
1836 if length(lastrev) != 0:
1837 break
1837 break
1838
1838
1839 try:
1839 try:
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1840 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1841 except OverflowError:
1841 except OverflowError:
1842 # issue4215 - we can't cache a run of chunks greater than
1842 # issue4215 - we can't cache a run of chunks greater than
1843 # 2G on Windows
1843 # 2G on Windows
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1844 return [self._chunk(rev, df=df) for rev in revschunk]
1845
1845
1846 decomp = self.decompress
1846 decomp = self.decompress
1847 # self._decompressor might be None, but will not be used in that case
1847 # self._decompressor might be None, but will not be used in that case
1848 def_decomp = self._decompressor
1848 def_decomp = self._decompressor
1849 for rev in revschunk:
1849 for rev in revschunk:
1850 chunkstart = start(rev)
1850 chunkstart = start(rev)
1851 if inline:
1851 if inline:
1852 chunkstart += (rev + 1) * iosize
1852 chunkstart += (rev + 1) * iosize
1853 chunklength = length(rev)
1853 chunklength = length(rev)
1854 comp_mode = self.index[rev][10]
1854 comp_mode = self.index[rev][10]
1855 c = buffer(data, chunkstart - offset, chunklength)
1855 c = buffer(data, chunkstart - offset, chunklength)
1856 if comp_mode == COMP_MODE_PLAIN:
1856 if comp_mode == COMP_MODE_PLAIN:
1857 ladd(c)
1857 ladd(c)
1858 elif comp_mode == COMP_MODE_INLINE:
1858 elif comp_mode == COMP_MODE_INLINE:
1859 ladd(decomp(c))
1859 ladd(decomp(c))
1860 elif comp_mode == COMP_MODE_DEFAULT:
1860 elif comp_mode == COMP_MODE_DEFAULT:
1861 ladd(def_decomp(c))
1861 ladd(def_decomp(c))
1862 else:
1862 else:
1863 msg = 'unknown compression mode %d'
1863 msg = 'unknown compression mode %d'
1864 msg %= comp_mode
1864 msg %= comp_mode
1865 raise error.RevlogError(msg)
1865 raise error.RevlogError(msg)
1866
1866
1867 return l
1867 return l
1868
1868
1869 def _chunkclear(self):
1869 def _chunkclear(self):
1870 """Clear the raw chunk cache."""
1870 """Clear the raw chunk cache."""
1871 self._chunkcache = (0, b'')
1871 self._chunkcache = (0, b'')
1872
1872
1873 def deltaparent(self, rev):
1873 def deltaparent(self, rev):
1874 """return deltaparent of the given revision"""
1874 """return deltaparent of the given revision"""
1875 base = self.index[rev][3]
1875 base = self.index[rev][3]
1876 if base == rev:
1876 if base == rev:
1877 return nullrev
1877 return nullrev
1878 elif self._generaldelta:
1878 elif self._generaldelta:
1879 return base
1879 return base
1880 else:
1880 else:
1881 return rev - 1
1881 return rev - 1
1882
1882
1883 def issnapshot(self, rev):
1883 def issnapshot(self, rev):
1884 """tells whether rev is a snapshot"""
1884 """tells whether rev is a snapshot"""
1885 if not self._sparserevlog:
1885 if not self._sparserevlog:
1886 return self.deltaparent(rev) == nullrev
1886 return self.deltaparent(rev) == nullrev
1887 elif util.safehasattr(self.index, b'issnapshot'):
1887 elif util.safehasattr(self.index, b'issnapshot'):
1888 # directly assign the method to cache the testing and access
1888 # directly assign the method to cache the testing and access
1889 self.issnapshot = self.index.issnapshot
1889 self.issnapshot = self.index.issnapshot
1890 return self.issnapshot(rev)
1890 return self.issnapshot(rev)
1891 if rev == nullrev:
1891 if rev == nullrev:
1892 return True
1892 return True
1893 entry = self.index[rev]
1893 entry = self.index[rev]
1894 base = entry[3]
1894 base = entry[3]
1895 if base == rev:
1895 if base == rev:
1896 return True
1896 return True
1897 if base == nullrev:
1897 if base == nullrev:
1898 return True
1898 return True
1899 p1 = entry[5]
1899 p1 = entry[5]
1900 p2 = entry[6]
1900 p2 = entry[6]
1901 if base == p1 or base == p2:
1901 if base == p1 or base == p2:
1902 return False
1902 return False
1903 return self.issnapshot(base)
1903 return self.issnapshot(base)
1904
1904
1905 def snapshotdepth(self, rev):
1905 def snapshotdepth(self, rev):
1906 """number of snapshot in the chain before this one"""
1906 """number of snapshot in the chain before this one"""
1907 if not self.issnapshot(rev):
1907 if not self.issnapshot(rev):
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1908 raise error.ProgrammingError(b'revision %d not a snapshot')
1909 return len(self._deltachain(rev)[0]) - 1
1909 return len(self._deltachain(rev)[0]) - 1
1910
1910
1911 def revdiff(self, rev1, rev2):
1911 def revdiff(self, rev1, rev2):
1912 """return or calculate a delta between two revisions
1912 """return or calculate a delta between two revisions
1913
1913
1914 The delta calculated is in binary form and is intended to be written to
1914 The delta calculated is in binary form and is intended to be written to
1915 revlog data directly. So this function needs raw revision data.
1915 revlog data directly. So this function needs raw revision data.
1916 """
1916 """
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1917 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1918 return bytes(self._chunk(rev2))
1918 return bytes(self._chunk(rev2))
1919
1919
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1920 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1921
1921
1922 def _processflags(self, text, flags, operation, raw=False):
1922 def _processflags(self, text, flags, operation, raw=False):
1923 """deprecated entry point to access flag processors"""
1923 """deprecated entry point to access flag processors"""
1924 msg = b'_processflag(...) use the specialized variant'
1924 msg = b'_processflag(...) use the specialized variant'
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1925 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1926 if raw:
1926 if raw:
1927 return text, flagutil.processflagsraw(self, text, flags)
1927 return text, flagutil.processflagsraw(self, text, flags)
1928 elif operation == b'read':
1928 elif operation == b'read':
1929 return flagutil.processflagsread(self, text, flags)
1929 return flagutil.processflagsread(self, text, flags)
1930 else: # write operation
1930 else: # write operation
1931 return flagutil.processflagswrite(self, text, flags)
1931 return flagutil.processflagswrite(self, text, flags)
1932
1932
1933 def revision(self, nodeorrev, _df=None, raw=False):
1933 def revision(self, nodeorrev, _df=None, raw=False):
1934 """return an uncompressed revision of a given node or revision
1934 """return an uncompressed revision of a given node or revision
1935 number.
1935 number.
1936
1936
1937 _df - an existing file handle to read from. (internal-only)
1937 _df - an existing file handle to read from. (internal-only)
1938 raw - an optional argument specifying if the revision data is to be
1938 raw - an optional argument specifying if the revision data is to be
1939 treated as raw data when applying flag transforms. 'raw' should be set
1939 treated as raw data when applying flag transforms. 'raw' should be set
1940 to True when generating changegroups or in debug commands.
1940 to True when generating changegroups or in debug commands.
1941 """
1941 """
1942 if raw:
1942 if raw:
1943 msg = (
1943 msg = (
1944 b'revlog.revision(..., raw=True) is deprecated, '
1944 b'revlog.revision(..., raw=True) is deprecated, '
1945 b'use revlog.rawdata(...)'
1945 b'use revlog.rawdata(...)'
1946 )
1946 )
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1947 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1948 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1949
1949
1950 def sidedata(self, nodeorrev, _df=None):
1950 def sidedata(self, nodeorrev, _df=None):
1951 """a map of extra data related to the changeset but not part of the hash
1951 """a map of extra data related to the changeset but not part of the hash
1952
1952
1953 This function currently return a dictionary. However, more advanced
1953 This function currently return a dictionary. However, more advanced
1954 mapping object will likely be used in the future for a more
1954 mapping object will likely be used in the future for a more
1955 efficient/lazy code.
1955 efficient/lazy code.
1956 """
1956 """
1957 return self._revisiondata(nodeorrev, _df)[1]
1957 return self._revisiondata(nodeorrev, _df)[1]
1958
1958
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1959 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1960 # deal with <nodeorrev> argument type
1960 # deal with <nodeorrev> argument type
1961 if isinstance(nodeorrev, int):
1961 if isinstance(nodeorrev, int):
1962 rev = nodeorrev
1962 rev = nodeorrev
1963 node = self.node(rev)
1963 node = self.node(rev)
1964 else:
1964 else:
1965 node = nodeorrev
1965 node = nodeorrev
1966 rev = None
1966 rev = None
1967
1967
1968 # fast path the special `nullid` rev
1968 # fast path the special `nullid` rev
1969 if node == self.nullid:
1969 if node == self.nullid:
1970 return b"", {}
1970 return b"", {}
1971
1971
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1972 # ``rawtext`` is the text as stored inside the revlog. Might be the
1973 # revision or might need to be processed to retrieve the revision.
1973 # revision or might need to be processed to retrieve the revision.
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1974 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1975
1975
1976 if self.hassidedata:
1976 if self.hassidedata:
1977 if rev is None:
1977 if rev is None:
1978 rev = self.rev(node)
1978 rev = self.rev(node)
1979 sidedata = self._sidedata(rev)
1979 sidedata = self._sidedata(rev)
1980 else:
1980 else:
1981 sidedata = {}
1981 sidedata = {}
1982
1982
1983 if raw and validated:
1983 if raw and validated:
1984 # if we don't want to process the raw text and that raw
1984 # if we don't want to process the raw text and that raw
1985 # text is cached, we can exit early.
1985 # text is cached, we can exit early.
1986 return rawtext, sidedata
1986 return rawtext, sidedata
1987 if rev is None:
1987 if rev is None:
1988 rev = self.rev(node)
1988 rev = self.rev(node)
1989 # the revlog's flag for this revision
1989 # the revlog's flag for this revision
1990 # (usually alter its state or content)
1990 # (usually alter its state or content)
1991 flags = self.flags(rev)
1991 flags = self.flags(rev)
1992
1992
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1993 if validated and flags == REVIDX_DEFAULT_FLAGS:
1994 # no extra flags set, no flag processor runs, text = rawtext
1994 # no extra flags set, no flag processor runs, text = rawtext
1995 return rawtext, sidedata
1995 return rawtext, sidedata
1996
1996
1997 if raw:
1997 if raw:
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1998 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1999 text = rawtext
1999 text = rawtext
2000 else:
2000 else:
2001 r = flagutil.processflagsread(self, rawtext, flags)
2001 r = flagutil.processflagsread(self, rawtext, flags)
2002 text, validatehash = r
2002 text, validatehash = r
2003 if validatehash:
2003 if validatehash:
2004 self.checkhash(text, node, rev=rev)
2004 self.checkhash(text, node, rev=rev)
2005 if not validated:
2005 if not validated:
2006 self._revisioncache = (node, rev, rawtext)
2006 self._revisioncache = (node, rev, rawtext)
2007
2007
2008 return text, sidedata
2008 return text, sidedata
2009
2009
2010 def _rawtext(self, node, rev, _df=None):
2010 def _rawtext(self, node, rev, _df=None):
2011 """return the possibly unvalidated rawtext for a revision
2011 """return the possibly unvalidated rawtext for a revision
2012
2012
2013 returns (rev, rawtext, validated)
2013 returns (rev, rawtext, validated)
2014 """
2014 """
2015
2015
2016 # revision in the cache (could be useful to apply delta)
2016 # revision in the cache (could be useful to apply delta)
2017 cachedrev = None
2017 cachedrev = None
2018 # An intermediate text to apply deltas to
2018 # An intermediate text to apply deltas to
2019 basetext = None
2019 basetext = None
2020
2020
2021 # Check if we have the entry in cache
2021 # Check if we have the entry in cache
2022 # The cache entry looks like (node, rev, rawtext)
2022 # The cache entry looks like (node, rev, rawtext)
2023 if self._revisioncache:
2023 if self._revisioncache:
2024 if self._revisioncache[0] == node:
2024 if self._revisioncache[0] == node:
2025 return (rev, self._revisioncache[2], True)
2025 return (rev, self._revisioncache[2], True)
2026 cachedrev = self._revisioncache[1]
2026 cachedrev = self._revisioncache[1]
2027
2027
2028 if rev is None:
2028 if rev is None:
2029 rev = self.rev(node)
2029 rev = self.rev(node)
2030
2030
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2031 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2032 if stopped:
2032 if stopped:
2033 basetext = self._revisioncache[2]
2033 basetext = self._revisioncache[2]
2034
2034
2035 # drop cache to save memory, the caller is expected to
2035 # drop cache to save memory, the caller is expected to
2036 # update self._revisioncache after validating the text
2036 # update self._revisioncache after validating the text
2037 self._revisioncache = None
2037 self._revisioncache = None
2038
2038
2039 targetsize = None
2039 targetsize = None
2040 rawsize = self.index[rev][2]
2040 rawsize = self.index[rev][2]
2041 if 0 <= rawsize:
2041 if 0 <= rawsize:
2042 targetsize = 4 * rawsize
2042 targetsize = 4 * rawsize
2043
2043
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2044 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2045 if basetext is None:
2045 if basetext is None:
2046 basetext = bytes(bins[0])
2046 basetext = bytes(bins[0])
2047 bins = bins[1:]
2047 bins = bins[1:]
2048
2048
2049 rawtext = mdiff.patches(basetext, bins)
2049 rawtext = mdiff.patches(basetext, bins)
2050 del basetext # let us have a chance to free memory early
2050 del basetext # let us have a chance to free memory early
2051 return (rev, rawtext, False)
2051 return (rev, rawtext, False)
2052
2052
2053 def _sidedata(self, rev):
2053 def _sidedata(self, rev):
2054 """Return the sidedata for a given revision number."""
2054 """Return the sidedata for a given revision number."""
2055 index_entry = self.index[rev]
2055 index_entry = self.index[rev]
2056 sidedata_offset = index_entry[8]
2056 sidedata_offset = index_entry[8]
2057 sidedata_size = index_entry[9]
2057 sidedata_size = index_entry[9]
2058
2058
2059 if self._inline:
2059 if self._inline:
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2060 sidedata_offset += self.index.entry_size * (1 + rev)
2061 if sidedata_size == 0:
2061 if sidedata_size == 0:
2062 return {}
2062 return {}
2063
2063
2064 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2064 comp_segment = self._getsegment(sidedata_offset, sidedata_size)
2065 comp = self.index[rev][11]
2065 comp = self.index[rev][11]
2066 if comp == COMP_MODE_PLAIN:
2066 if comp == COMP_MODE_PLAIN:
2067 segment = comp_segment
2067 segment = comp_segment
2068 elif comp == COMP_MODE_DEFAULT:
2068 elif comp == COMP_MODE_DEFAULT:
2069 segment = self._decompressor(comp_segment)
2069 segment = self._decompressor(comp_segment)
2070 elif comp == COMP_MODE_INLINE:
2070 elif comp == COMP_MODE_INLINE:
2071 segment = self.decompress(comp_segment)
2071 segment = self.decompress(comp_segment)
2072 else:
2072 else:
2073 msg = 'unknown compression mode %d'
2073 msg = 'unknown compression mode %d'
2074 msg %= comp
2074 msg %= comp
2075 raise error.RevlogError(msg)
2075 raise error.RevlogError(msg)
2076
2076
2077 sidedata = sidedatautil.deserialize_sidedata(segment)
2077 sidedata = sidedatautil.deserialize_sidedata(segment)
2078 return sidedata
2078 return sidedata
2079
2079
2080 def rawdata(self, nodeorrev, _df=None):
2080 def rawdata(self, nodeorrev, _df=None):
2081 """return an uncompressed raw data of a given node or revision number.
2081 """return an uncompressed raw data of a given node or revision number.
2082
2082
2083 _df - an existing file handle to read from. (internal-only)
2083 _df - an existing file handle to read from. (internal-only)
2084 """
2084 """
2085 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2085 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2086
2086
2087 def hash(self, text, p1, p2):
2087 def hash(self, text, p1, p2):
2088 """Compute a node hash.
2088 """Compute a node hash.
2089
2089
2090 Available as a function so that subclasses can replace the hash
2090 Available as a function so that subclasses can replace the hash
2091 as needed.
2091 as needed.
2092 """
2092 """
2093 return storageutil.hashrevisionsha1(text, p1, p2)
2093 return storageutil.hashrevisionsha1(text, p1, p2)
2094
2094
2095 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2095 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2096 """Check node hash integrity.
2096 """Check node hash integrity.
2097
2097
2098 Available as a function so that subclasses can extend hash mismatch
2098 Available as a function so that subclasses can extend hash mismatch
2099 behaviors as needed.
2099 behaviors as needed.
2100 """
2100 """
2101 try:
2101 try:
2102 if p1 is None and p2 is None:
2102 if p1 is None and p2 is None:
2103 p1, p2 = self.parents(node)
2103 p1, p2 = self.parents(node)
2104 if node != self.hash(text, p1, p2):
2104 if node != self.hash(text, p1, p2):
2105 # Clear the revision cache on hash failure. The revision cache
2105 # Clear the revision cache on hash failure. The revision cache
2106 # only stores the raw revision and clearing the cache does have
2106 # only stores the raw revision and clearing the cache does have
2107 # the side-effect that we won't have a cache hit when the raw
2107 # the side-effect that we won't have a cache hit when the raw
2108 # revision data is accessed. But this case should be rare and
2108 # revision data is accessed. But this case should be rare and
2109 # it is extra work to teach the cache about the hash
2109 # it is extra work to teach the cache about the hash
2110 # verification state.
2110 # verification state.
2111 if self._revisioncache and self._revisioncache[0] == node:
2111 if self._revisioncache and self._revisioncache[0] == node:
2112 self._revisioncache = None
2112 self._revisioncache = None
2113
2113
2114 revornode = rev
2114 revornode = rev
2115 if revornode is None:
2115 if revornode is None:
2116 revornode = templatefilters.short(hex(node))
2116 revornode = templatefilters.short(hex(node))
2117 raise error.RevlogError(
2117 raise error.RevlogError(
2118 _(b"integrity check failed on %s:%s")
2118 _(b"integrity check failed on %s:%s")
2119 % (self.display_id, pycompat.bytestr(revornode))
2119 % (self.display_id, pycompat.bytestr(revornode))
2120 )
2120 )
2121 except error.RevlogError:
2121 except error.RevlogError:
2122 if self._censorable and storageutil.iscensoredtext(text):
2122 if self._censorable and storageutil.iscensoredtext(text):
2123 raise error.CensoredNodeError(self.display_id, node, text)
2123 raise error.CensoredNodeError(self.display_id, node, text)
2124 raise
2124 raise
2125
2125
2126 def _enforceinlinesize(self, tr):
2126 def _enforceinlinesize(self, tr):
2127 """Check if the revlog is too big for inline and convert if so.
2127 """Check if the revlog is too big for inline and convert if so.
2128
2128
2129 This should be called after revisions are added to the revlog. If the
2129 This should be called after revisions are added to the revlog. If the
2130 revlog has grown too large to be an inline revlog, it will convert it
2130 revlog has grown too large to be an inline revlog, it will convert it
2131 to use multiple index and data files.
2131 to use multiple index and data files.
2132 """
2132 """
2133 tiprev = len(self) - 1
2133 tiprev = len(self) - 1
2134 total_size = self.start(tiprev) + self.length(tiprev)
2134 total_size = self.start(tiprev) + self.length(tiprev)
2135 if not self._inline or total_size < _maxinline:
2135 if not self._inline or total_size < _maxinline:
2136 return
2136 return
2137
2137
2138 troffset = tr.findoffset(self._indexfile)
2138 troffset = tr.findoffset(self._indexfile)
2139 if troffset is None:
2139 if troffset is None:
2140 raise error.RevlogError(
2140 raise error.RevlogError(
2141 _(b"%s not found in the transaction") % self._indexfile
2141 _(b"%s not found in the transaction") % self._indexfile
2142 )
2142 )
2143 trindex = 0
2143 trindex = 0
2144 tr.add(self._datafile, 0)
2144 tr.add(self._datafile, 0)
2145
2145
2146 existing_handles = False
2146 existing_handles = False
2147 if self._writinghandles is not None:
2147 if self._writinghandles is not None:
2148 existing_handles = True
2148 existing_handles = True
2149 fp = self._writinghandles[0]
2149 fp = self._writinghandles[0]
2150 fp.flush()
2150 fp.flush()
2151 fp.close()
2151 fp.close()
2152 # We can't use the cached file handle after close(). So prevent
2152 # We can't use the cached file handle after close(). So prevent
2153 # its usage.
2153 # its usage.
2154 self._writinghandles = None
2154 self._writinghandles = None
2155
2155
2156 new_dfh = self._datafp(b'w+')
2156 new_dfh = self._datafp(b'w+')
2157 new_dfh.truncate(0) # drop any potentially existing data
2157 new_dfh.truncate(0) # drop any potentially existing data
2158 try:
2158 try:
2159 with self._indexfp() as read_ifh:
2159 with self._indexfp() as read_ifh:
2160 for r in self:
2160 for r in self:
2161 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2161 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2162 if troffset <= self.start(r):
2162 if troffset <= self.start(r):
2163 trindex = r
2163 trindex = r
2164 new_dfh.flush()
2164 new_dfh.flush()
2165
2165
2166 with self.__index_new_fp() as fp:
2166 with self.__index_new_fp() as fp:
2167 self._format_flags &= ~FLAG_INLINE_DATA
2167 self._format_flags &= ~FLAG_INLINE_DATA
2168 self._inline = False
2168 self._inline = False
2169 for i in self:
2169 for i in self:
2170 e = self.index.entry_binary(i)
2170 e = self.index.entry_binary(i)
2171 if i == 0 and self._docket is None:
2171 if i == 0 and self._docket is None:
2172 header = self._format_flags | self._format_version
2172 header = self._format_flags | self._format_version
2173 header = self.index.pack_header(header)
2173 header = self.index.pack_header(header)
2174 e = header + e
2174 e = header + e
2175 fp.write(e)
2175 fp.write(e)
2176 if self._docket is not None:
2176 if self._docket is not None:
2177 self._docket.index_end = fp.tell()
2177 self._docket.index_end = fp.tell()
2178 # the temp file replace the real index when we exit the context
2178 # the temp file replace the real index when we exit the context
2179 # manager
2179 # manager
2180
2180
2181 tr.replace(self._indexfile, trindex * self.index.entry_size)
2181 tr.replace(self._indexfile, trindex * self.index.entry_size)
2182 nodemaputil.setup_persistent_nodemap(tr, self)
2182 nodemaputil.setup_persistent_nodemap(tr, self)
2183 self._chunkclear()
2183 self._chunkclear()
2184
2184
2185 if existing_handles:
2185 if existing_handles:
2186 # switched from inline to conventional reopen the index
2186 # switched from inline to conventional reopen the index
2187 ifh = self.__index_write_fp()
2187 ifh = self.__index_write_fp()
2188 self._writinghandles = (ifh, new_dfh)
2188 self._writinghandles = (ifh, new_dfh)
2189 new_dfh = None
2189 new_dfh = None
2190 finally:
2190 finally:
2191 if new_dfh is not None:
2191 if new_dfh is not None:
2192 new_dfh.close()
2192 new_dfh.close()
2193
2193
2194 def _nodeduplicatecallback(self, transaction, node):
2194 def _nodeduplicatecallback(self, transaction, node):
2195 """called when trying to add a node already stored."""
2195 """called when trying to add a node already stored."""
2196
2196
2197 @contextlib.contextmanager
2197 @contextlib.contextmanager
2198 def _writing(self, transaction):
2198 def _writing(self, transaction):
2199 if self._trypending:
2199 if self._trypending:
2200 msg = b'try to write in a `trypending` revlog: %s'
2200 msg = b'try to write in a `trypending` revlog: %s'
2201 msg %= self.display_id
2201 msg %= self.display_id
2202 raise error.ProgrammingError(msg)
2202 raise error.ProgrammingError(msg)
2203 if self._writinghandles is not None:
2203 if self._writinghandles is not None:
2204 yield
2204 yield
2205 else:
2205 else:
2206 r = len(self)
2206 r = len(self)
2207 dsize = 0
2207 dsize = 0
2208 if r:
2208 if r:
2209 dsize = self.end(r - 1)
2209 dsize = self.end(r - 1)
2210 dfh = None
2210 dfh = None
2211 if not self._inline:
2211 if not self._inline:
2212 try:
2212 try:
2213 dfh = self._datafp(b"r+")
2213 dfh = self._datafp(b"r+")
2214 if self._docket is None:
2214 if self._docket is None:
2215 dfh.seek(0, os.SEEK_END)
2215 dfh.seek(0, os.SEEK_END)
2216 else:
2216 else:
2217 dfh.seek(self._docket.data_end, os.SEEK_SET)
2217 dfh.seek(self._docket.data_end, os.SEEK_SET)
2218 except IOError as inst:
2218 except IOError as inst:
2219 if inst.errno != errno.ENOENT:
2219 if inst.errno != errno.ENOENT:
2220 raise
2220 raise
2221 dfh = self._datafp(b"w+")
2221 dfh = self._datafp(b"w+")
2222 transaction.add(self._datafile, dsize)
2222 transaction.add(self._datafile, dsize)
2223 try:
2223 try:
2224 isize = r * self.index.entry_size
2224 isize = r * self.index.entry_size
2225 ifh = self.__index_write_fp()
2225 ifh = self.__index_write_fp()
2226 if self._inline:
2226 if self._inline:
2227 transaction.add(self._indexfile, dsize + isize)
2227 transaction.add(self._indexfile, dsize + isize)
2228 else:
2228 else:
2229 transaction.add(self._indexfile, isize)
2229 transaction.add(self._indexfile, isize)
2230 try:
2230 try:
2231 self._writinghandles = (ifh, dfh)
2231 self._writinghandles = (ifh, dfh)
2232 try:
2232 try:
2233 yield
2233 yield
2234 if self._docket is not None:
2234 if self._docket is not None:
2235 self._write_docket(transaction)
2235 self._write_docket(transaction)
2236 finally:
2236 finally:
2237 self._writinghandles = None
2237 self._writinghandles = None
2238 finally:
2238 finally:
2239 ifh.close()
2239 ifh.close()
2240 finally:
2240 finally:
2241 if dfh is not None:
2241 if dfh is not None:
2242 dfh.close()
2242 dfh.close()
2243
2243
2244 def _write_docket(self, transaction):
2244 def _write_docket(self, transaction):
2245 """write the current docket on disk
2245 """write the current docket on disk
2246
2246
2247 Exist as a method to help changelog to implement transaction logic
2247 Exist as a method to help changelog to implement transaction logic
2248
2248
2249 We could also imagine using the same transaction logic for all revlog
2249 We could also imagine using the same transaction logic for all revlog
2250 since docket are cheap."""
2250 since docket are cheap."""
2251 self._docket.write(transaction)
2251 self._docket.write(transaction)
2252
2252
2253 def addrevision(
2253 def addrevision(
2254 self,
2254 self,
2255 text,
2255 text,
2256 transaction,
2256 transaction,
2257 link,
2257 link,
2258 p1,
2258 p1,
2259 p2,
2259 p2,
2260 cachedelta=None,
2260 cachedelta=None,
2261 node=None,
2261 node=None,
2262 flags=REVIDX_DEFAULT_FLAGS,
2262 flags=REVIDX_DEFAULT_FLAGS,
2263 deltacomputer=None,
2263 deltacomputer=None,
2264 sidedata=None,
2264 sidedata=None,
2265 ):
2265 ):
2266 """add a revision to the log
2266 """add a revision to the log
2267
2267
2268 text - the revision data to add
2268 text - the revision data to add
2269 transaction - the transaction object used for rollback
2269 transaction - the transaction object used for rollback
2270 link - the linkrev data to add
2270 link - the linkrev data to add
2271 p1, p2 - the parent nodeids of the revision
2271 p1, p2 - the parent nodeids of the revision
2272 cachedelta - an optional precomputed delta
2272 cachedelta - an optional precomputed delta
2273 node - nodeid of revision; typically node is not specified, and it is
2273 node - nodeid of revision; typically node is not specified, and it is
2274 computed by default as hash(text, p1, p2), however subclasses might
2274 computed by default as hash(text, p1, p2), however subclasses might
2275 use different hashing method (and override checkhash() in such case)
2275 use different hashing method (and override checkhash() in such case)
2276 flags - the known flags to set on the revision
2276 flags - the known flags to set on the revision
2277 deltacomputer - an optional deltacomputer instance shared between
2277 deltacomputer - an optional deltacomputer instance shared between
2278 multiple calls
2278 multiple calls
2279 """
2279 """
2280 if link == nullrev:
2280 if link == nullrev:
2281 raise error.RevlogError(
2281 raise error.RevlogError(
2282 _(b"attempted to add linkrev -1 to %s") % self.display_id
2282 _(b"attempted to add linkrev -1 to %s") % self.display_id
2283 )
2283 )
2284
2284
2285 if sidedata is None:
2285 if sidedata is None:
2286 sidedata = {}
2286 sidedata = {}
2287 elif sidedata and not self.hassidedata:
2287 elif sidedata and not self.hassidedata:
2288 raise error.ProgrammingError(
2288 raise error.ProgrammingError(
2289 _(b"trying to add sidedata to a revlog who don't support them")
2289 _(b"trying to add sidedata to a revlog who don't support them")
2290 )
2290 )
2291
2291
2292 if flags:
2292 if flags:
2293 node = node or self.hash(text, p1, p2)
2293 node = node or self.hash(text, p1, p2)
2294
2294
2295 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2295 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2296
2296
2297 # If the flag processor modifies the revision data, ignore any provided
2297 # If the flag processor modifies the revision data, ignore any provided
2298 # cachedelta.
2298 # cachedelta.
2299 if rawtext != text:
2299 if rawtext != text:
2300 cachedelta = None
2300 cachedelta = None
2301
2301
2302 if len(rawtext) > _maxentrysize:
2302 if len(rawtext) > _maxentrysize:
2303 raise error.RevlogError(
2303 raise error.RevlogError(
2304 _(
2304 _(
2305 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2305 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2306 )
2306 )
2307 % (self.display_id, len(rawtext))
2307 % (self.display_id, len(rawtext))
2308 )
2308 )
2309
2309
2310 node = node or self.hash(rawtext, p1, p2)
2310 node = node or self.hash(rawtext, p1, p2)
2311 rev = self.index.get_rev(node)
2311 rev = self.index.get_rev(node)
2312 if rev is not None:
2312 if rev is not None:
2313 return rev
2313 return rev
2314
2314
2315 if validatehash:
2315 if validatehash:
2316 self.checkhash(rawtext, node, p1=p1, p2=p2)
2316 self.checkhash(rawtext, node, p1=p1, p2=p2)
2317
2317
2318 return self.addrawrevision(
2318 return self.addrawrevision(
2319 rawtext,
2319 rawtext,
2320 transaction,
2320 transaction,
2321 link,
2321 link,
2322 p1,
2322 p1,
2323 p2,
2323 p2,
2324 node,
2324 node,
2325 flags,
2325 flags,
2326 cachedelta=cachedelta,
2326 cachedelta=cachedelta,
2327 deltacomputer=deltacomputer,
2327 deltacomputer=deltacomputer,
2328 sidedata=sidedata,
2328 sidedata=sidedata,
2329 )
2329 )
2330
2330
2331 def addrawrevision(
2331 def addrawrevision(
2332 self,
2332 self,
2333 rawtext,
2333 rawtext,
2334 transaction,
2334 transaction,
2335 link,
2335 link,
2336 p1,
2336 p1,
2337 p2,
2337 p2,
2338 node,
2338 node,
2339 flags,
2339 flags,
2340 cachedelta=None,
2340 cachedelta=None,
2341 deltacomputer=None,
2341 deltacomputer=None,
2342 sidedata=None,
2342 sidedata=None,
2343 ):
2343 ):
2344 """add a raw revision with known flags, node and parents
2344 """add a raw revision with known flags, node and parents
2345 useful when reusing a revision not stored in this revlog (ex: received
2345 useful when reusing a revision not stored in this revlog (ex: received
2346 over wire, or read from an external bundle).
2346 over wire, or read from an external bundle).
2347 """
2347 """
2348 with self._writing(transaction):
2348 with self._writing(transaction):
2349 return self._addrevision(
2349 return self._addrevision(
2350 node,
2350 node,
2351 rawtext,
2351 rawtext,
2352 transaction,
2352 transaction,
2353 link,
2353 link,
2354 p1,
2354 p1,
2355 p2,
2355 p2,
2356 flags,
2356 flags,
2357 cachedelta,
2357 cachedelta,
2358 deltacomputer=deltacomputer,
2358 deltacomputer=deltacomputer,
2359 sidedata=sidedata,
2359 sidedata=sidedata,
2360 )
2360 )
2361
2361
2362 def compress(self, data):
2362 def compress(self, data):
2363 """Generate a possibly-compressed representation of data."""
2363 """Generate a possibly-compressed representation of data."""
2364 if not data:
2364 if not data:
2365 return b'', data
2365 return b'', data
2366
2366
2367 compressed = self._compressor.compress(data)
2367 compressed = self._compressor.compress(data)
2368
2368
2369 if compressed:
2369 if compressed:
2370 # The revlog compressor added the header in the returned data.
2370 # The revlog compressor added the header in the returned data.
2371 return b'', compressed
2371 return b'', compressed
2372
2372
2373 if data[0:1] == b'\0':
2373 if data[0:1] == b'\0':
2374 return b'', data
2374 return b'', data
2375 return b'u', data
2375 return b'u', data
2376
2376
2377 def decompress(self, data):
2377 def decompress(self, data):
2378 """Decompress a revlog chunk.
2378 """Decompress a revlog chunk.
2379
2379
2380 The chunk is expected to begin with a header identifying the
2380 The chunk is expected to begin with a header identifying the
2381 format type so it can be routed to an appropriate decompressor.
2381 format type so it can be routed to an appropriate decompressor.
2382 """
2382 """
2383 if not data:
2383 if not data:
2384 return data
2384 return data
2385
2385
2386 # Revlogs are read much more frequently than they are written and many
2386 # Revlogs are read much more frequently than they are written and many
2387 # chunks only take microseconds to decompress, so performance is
2387 # chunks only take microseconds to decompress, so performance is
2388 # important here.
2388 # important here.
2389 #
2389 #
2390 # We can make a few assumptions about revlogs:
2390 # We can make a few assumptions about revlogs:
2391 #
2391 #
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2392 # 1) the majority of chunks will be compressed (as opposed to inline
2393 # raw data).
2393 # raw data).
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2394 # 2) decompressing *any* data will likely by at least 10x slower than
2395 # returning raw inline data.
2395 # returning raw inline data.
2396 # 3) we want to prioritize common and officially supported compression
2396 # 3) we want to prioritize common and officially supported compression
2397 # engines
2397 # engines
2398 #
2398 #
2399 # It follows that we want to optimize for "decompress compressed data
2399 # It follows that we want to optimize for "decompress compressed data
2400 # when encoded with common and officially supported compression engines"
2400 # when encoded with common and officially supported compression engines"
2401 # case over "raw data" and "data encoded by less common or non-official
2401 # case over "raw data" and "data encoded by less common or non-official
2402 # compression engines." That is why we have the inline lookup first
2402 # compression engines." That is why we have the inline lookup first
2403 # followed by the compengines lookup.
2403 # followed by the compengines lookup.
2404 #
2404 #
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2405 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2406 # compressed chunks. And this matters for changelog and manifest reads.
2406 # compressed chunks. And this matters for changelog and manifest reads.
2407 t = data[0:1]
2407 t = data[0:1]
2408
2408
2409 if t == b'x':
2409 if t == b'x':
2410 try:
2410 try:
2411 return _zlibdecompress(data)
2411 return _zlibdecompress(data)
2412 except zlib.error as e:
2412 except zlib.error as e:
2413 raise error.RevlogError(
2413 raise error.RevlogError(
2414 _(b'revlog decompress error: %s')
2414 _(b'revlog decompress error: %s')
2415 % stringutil.forcebytestr(e)
2415 % stringutil.forcebytestr(e)
2416 )
2416 )
2417 # '\0' is more common than 'u' so it goes first.
2417 # '\0' is more common than 'u' so it goes first.
2418 elif t == b'\0':
2418 elif t == b'\0':
2419 return data
2419 return data
2420 elif t == b'u':
2420 elif t == b'u':
2421 return util.buffer(data, 1)
2421 return util.buffer(data, 1)
2422
2422
2423 compressor = self._get_decompressor(t)
2423 compressor = self._get_decompressor(t)
2424
2424
2425 return compressor.decompress(data)
2425 return compressor.decompress(data)
2426
2426
2427 def _addrevision(
2427 def _addrevision(
2428 self,
2428 self,
2429 node,
2429 node,
2430 rawtext,
2430 rawtext,
2431 transaction,
2431 transaction,
2432 link,
2432 link,
2433 p1,
2433 p1,
2434 p2,
2434 p2,
2435 flags,
2435 flags,
2436 cachedelta,
2436 cachedelta,
2437 alwayscache=False,
2437 alwayscache=False,
2438 deltacomputer=None,
2438 deltacomputer=None,
2439 sidedata=None,
2439 sidedata=None,
2440 ):
2440 ):
2441 """internal function to add revisions to the log
2441 """internal function to add revisions to the log
2442
2442
2443 see addrevision for argument descriptions.
2443 see addrevision for argument descriptions.
2444
2444
2445 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2445 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2446
2446
2447 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2447 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2448 be used.
2448 be used.
2449
2449
2450 invariants:
2450 invariants:
2451 - rawtext is optional (can be None); if not set, cachedelta must be set.
2451 - rawtext is optional (can be None); if not set, cachedelta must be set.
2452 if both are set, they must correspond to each other.
2452 if both are set, they must correspond to each other.
2453 """
2453 """
2454 if node == self.nullid:
2454 if node == self.nullid:
2455 raise error.RevlogError(
2455 raise error.RevlogError(
2456 _(b"%s: attempt to add null revision") % self.display_id
2456 _(b"%s: attempt to add null revision") % self.display_id
2457 )
2457 )
2458 if (
2458 if (
2459 node == self.nodeconstants.wdirid
2459 node == self.nodeconstants.wdirid
2460 or node in self.nodeconstants.wdirfilenodeids
2460 or node in self.nodeconstants.wdirfilenodeids
2461 ):
2461 ):
2462 raise error.RevlogError(
2462 raise error.RevlogError(
2463 _(b"%s: attempt to add wdir revision") % self.display_id
2463 _(b"%s: attempt to add wdir revision") % self.display_id
2464 )
2464 )
2465 if self._writinghandles is None:
2465 if self._writinghandles is None:
2466 msg = b'adding revision outside `revlog._writing` context'
2466 msg = b'adding revision outside `revlog._writing` context'
2467 raise error.ProgrammingError(msg)
2467 raise error.ProgrammingError(msg)
2468
2468
2469 if self._inline:
2469 if self._inline:
2470 fh = self._writinghandles[0]
2470 fh = self._writinghandles[0]
2471 else:
2471 else:
2472 fh = self._writinghandles[1]
2472 fh = self._writinghandles[1]
2473
2473
2474 btext = [rawtext]
2474 btext = [rawtext]
2475
2475
2476 curr = len(self)
2476 curr = len(self)
2477 prev = curr - 1
2477 prev = curr - 1
2478
2478
2479 offset = self._get_data_offset(prev)
2479 offset = self._get_data_offset(prev)
2480
2480
2481 if self._concurrencychecker:
2481 if self._concurrencychecker:
2482 ifh, dfh = self._writinghandles
2482 ifh, dfh = self._writinghandles
2483 if self._inline:
2483 if self._inline:
2484 # offset is "as if" it were in the .d file, so we need to add on
2484 # offset is "as if" it were in the .d file, so we need to add on
2485 # the size of the entry metadata.
2485 # the size of the entry metadata.
2486 self._concurrencychecker(
2486 self._concurrencychecker(
2487 ifh, self._indexfile, offset + curr * self.index.entry_size
2487 ifh, self._indexfile, offset + curr * self.index.entry_size
2488 )
2488 )
2489 else:
2489 else:
2490 # Entries in the .i are a consistent size.
2490 # Entries in the .i are a consistent size.
2491 self._concurrencychecker(
2491 self._concurrencychecker(
2492 ifh, self._indexfile, curr * self.index.entry_size
2492 ifh, self._indexfile, curr * self.index.entry_size
2493 )
2493 )
2494 self._concurrencychecker(dfh, self._datafile, offset)
2494 self._concurrencychecker(dfh, self._datafile, offset)
2495
2495
2496 p1r, p2r = self.rev(p1), self.rev(p2)
2496 p1r, p2r = self.rev(p1), self.rev(p2)
2497
2497
2498 # full versions are inserted when the needed deltas
2498 # full versions are inserted when the needed deltas
2499 # become comparable to the uncompressed text
2499 # become comparable to the uncompressed text
2500 if rawtext is None:
2500 if rawtext is None:
2501 # need rawtext size, before changed by flag processors, which is
2501 # need rawtext size, before changed by flag processors, which is
2502 # the non-raw size. use revlog explicitly to avoid filelog's extra
2502 # the non-raw size. use revlog explicitly to avoid filelog's extra
2503 # logic that might remove metadata size.
2503 # logic that might remove metadata size.
2504 textlen = mdiff.patchedsize(
2504 textlen = mdiff.patchedsize(
2505 revlog.size(self, cachedelta[0]), cachedelta[1]
2505 revlog.size(self, cachedelta[0]), cachedelta[1]
2506 )
2506 )
2507 else:
2507 else:
2508 textlen = len(rawtext)
2508 textlen = len(rawtext)
2509
2509
2510 if deltacomputer is None:
2510 if deltacomputer is None:
2511 deltacomputer = deltautil.deltacomputer(self)
2511 deltacomputer = deltautil.deltacomputer(self)
2512
2512
2513 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2513 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2514
2514
2515 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2515 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2516
2516
2517 compression_mode = COMP_MODE_INLINE
2517 compression_mode = COMP_MODE_INLINE
2518 if self._docket is not None:
2518 if self._docket is not None:
2519 h, d = deltainfo.data
2519 h, d = deltainfo.data
2520 if not h and not d:
2520 if not h and not d:
2521 # not data to store at all... declare them uncompressed
2521 # not data to store at all... declare them uncompressed
2522 compression_mode = COMP_MODE_PLAIN
2522 compression_mode = COMP_MODE_PLAIN
2523 elif not h:
2523 elif not h:
2524 t = d[0:1]
2524 t = d[0:1]
2525 if t == b'\0':
2525 if t == b'\0':
2526 compression_mode = COMP_MODE_PLAIN
2526 compression_mode = COMP_MODE_PLAIN
2527 elif t == self._docket.default_compression_header:
2527 elif t == self._docket.default_compression_header:
2528 compression_mode = COMP_MODE_DEFAULT
2528 compression_mode = COMP_MODE_DEFAULT
2529 elif h == b'u':
2529 elif h == b'u':
2530 # we have a more efficient way to declare uncompressed
2530 # we have a more efficient way to declare uncompressed
2531 h = b''
2531 h = b''
2532 compression_mode = COMP_MODE_PLAIN
2532 compression_mode = COMP_MODE_PLAIN
2533 deltainfo = deltautil.drop_u_compression(deltainfo)
2533 deltainfo = deltautil.drop_u_compression(deltainfo)
2534
2534
2535 sidedata_compression_mode = COMP_MODE_INLINE
2535 sidedata_compression_mode = COMP_MODE_INLINE
2536 if sidedata and self.hassidedata:
2536 if sidedata and self.hassidedata:
2537 sidedata_compression_mode = COMP_MODE_PLAIN
2537 sidedata_compression_mode = COMP_MODE_PLAIN
2538 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2538 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2539 sidedata_offset = offset + deltainfo.deltalen
2539 sidedata_offset = offset + deltainfo.deltalen
2540 h, comp_sidedata = self.compress(serialized_sidedata)
2540 h, comp_sidedata = self.compress(serialized_sidedata)
2541 if (
2541 if (
2542 h != b'u'
2542 h != b'u'
2543 and comp_sidedata[0:1] != b'\0'
2543 and comp_sidedata[0:1] != b'\0'
2544 and len(comp_sidedata) < len(serialized_sidedata)
2544 and len(comp_sidedata) < len(serialized_sidedata)
2545 ):
2545 ):
2546 assert not h
2546 assert not h
2547 if (
2547 if (
2548 comp_sidedata[0:1]
2548 comp_sidedata[0:1]
2549 == self._docket.default_compression_header
2549 == self._docket.default_compression_header
2550 ):
2550 ):
2551 sidedata_compression_mode = COMP_MODE_DEFAULT
2551 sidedata_compression_mode = COMP_MODE_DEFAULT
2552 serialized_sidedata = comp_sidedata
2552 serialized_sidedata = comp_sidedata
2553 else:
2553 else:
2554 sidedata_compression_mode = COMP_MODE_INLINE
2554 sidedata_compression_mode = COMP_MODE_INLINE
2555 serialized_sidedata = comp_sidedata
2555 serialized_sidedata = comp_sidedata
2556 else:
2556 else:
2557 serialized_sidedata = b""
2557 serialized_sidedata = b""
2558 # Don't store the offset if the sidedata is empty, that way
2558 # Don't store the offset if the sidedata is empty, that way
2559 # we can easily detect empty sidedata and they will be no different
2559 # we can easily detect empty sidedata and they will be no different
2560 # than ones we manually add.
2560 # than ones we manually add.
2561 sidedata_offset = 0
2561 sidedata_offset = 0
2562
2562
2563 e = (
2563 e = (
2564 offset_type(offset, flags),
2564 offset_type(offset, flags),
2565 deltainfo.deltalen,
2565 deltainfo.deltalen,
2566 textlen,
2566 textlen,
2567 deltainfo.base,
2567 deltainfo.base,
2568 link,
2568 link,
2569 p1r,
2569 p1r,
2570 p2r,
2570 p2r,
2571 node,
2571 node,
2572 sidedata_offset,
2572 sidedata_offset,
2573 len(serialized_sidedata),
2573 len(serialized_sidedata),
2574 compression_mode,
2574 compression_mode,
2575 sidedata_compression_mode,
2575 sidedata_compression_mode,
2576 )
2576 )
2577
2577
2578 self.index.append(e)
2578 self.index.append(e)
2579 entry = self.index.entry_binary(curr)
2579 entry = self.index.entry_binary(curr)
2580 if curr == 0 and self._docket is None:
2580 if curr == 0 and self._docket is None:
2581 header = self._format_flags | self._format_version
2581 header = self._format_flags | self._format_version
2582 header = self.index.pack_header(header)
2582 header = self.index.pack_header(header)
2583 entry = header + entry
2583 entry = header + entry
2584 self._writeentry(
2584 self._writeentry(
2585 transaction,
2585 transaction,
2586 entry,
2586 entry,
2587 deltainfo.data,
2587 deltainfo.data,
2588 link,
2588 link,
2589 offset,
2589 offset,
2590 serialized_sidedata,
2590 serialized_sidedata,
2591 )
2591 )
2592
2592
2593 rawtext = btext[0]
2593 rawtext = btext[0]
2594
2594
2595 if alwayscache and rawtext is None:
2595 if alwayscache and rawtext is None:
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2596 rawtext = deltacomputer.buildtext(revinfo, fh)
2597
2597
2598 if type(rawtext) == bytes: # only accept immutable objects
2598 if type(rawtext) == bytes: # only accept immutable objects
2599 self._revisioncache = (node, curr, rawtext)
2599 self._revisioncache = (node, curr, rawtext)
2600 self._chainbasecache[curr] = deltainfo.chainbase
2600 self._chainbasecache[curr] = deltainfo.chainbase
2601 return curr
2601 return curr
2602
2602
2603 def _get_data_offset(self, prev):
2603 def _get_data_offset(self, prev):
2604 """Returns the current offset in the (in-transaction) data file.
2604 """Returns the current offset in the (in-transaction) data file.
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2605 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2606 file to store that information: since sidedata can be rewritten to the
2606 file to store that information: since sidedata can be rewritten to the
2607 end of the data file within a transaction, you can have cases where, for
2607 end of the data file within a transaction, you can have cases where, for
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2608 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2609 to `n - 1`'s sidedata being written after `n`'s data.
2609 to `n - 1`'s sidedata being written after `n`'s data.
2610
2610
2611 TODO cache this in a docket file before getting out of experimental."""
2611 TODO cache this in a docket file before getting out of experimental."""
2612 if self._docket is None:
2612 if self._docket is None:
2613 return self.end(prev)
2613 return self.end(prev)
2614 else:
2614 else:
2615 return self._docket.data_end
2615 return self._docket.data_end
2616
2616
2617 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2617 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2618 # Files opened in a+ mode have inconsistent behavior on various
2618 # Files opened in a+ mode have inconsistent behavior on various
2619 # platforms. Windows requires that a file positioning call be made
2619 # platforms. Windows requires that a file positioning call be made
2620 # when the file handle transitions between reads and writes. See
2620 # when the file handle transitions between reads and writes. See
2621 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2621 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2622 # platforms, Python or the platform itself can be buggy. Some versions
2622 # platforms, Python or the platform itself can be buggy. Some versions
2623 # of Solaris have been observed to not append at the end of the file
2623 # of Solaris have been observed to not append at the end of the file
2624 # if the file was seeked to before the end. See issue4943 for more.
2624 # if the file was seeked to before the end. See issue4943 for more.
2625 #
2625 #
2626 # We work around this issue by inserting a seek() before writing.
2626 # We work around this issue by inserting a seek() before writing.
2627 # Note: This is likely not necessary on Python 3. However, because
2627 # Note: This is likely not necessary on Python 3. However, because
2628 # the file handle is reused for reads and may be seeked there, we need
2628 # the file handle is reused for reads and may be seeked there, we need
2629 # to be careful before changing this.
2629 # to be careful before changing this.
2630 if self._writinghandles is None:
2630 if self._writinghandles is None:
2631 msg = b'adding revision outside `revlog._writing` context'
2631 msg = b'adding revision outside `revlog._writing` context'
2632 raise error.ProgrammingError(msg)
2632 raise error.ProgrammingError(msg)
2633 ifh, dfh = self._writinghandles
2633 ifh, dfh = self._writinghandles
2634 if self._docket is None:
2634 if self._docket is None:
2635 ifh.seek(0, os.SEEK_END)
2635 ifh.seek(0, os.SEEK_END)
2636 else:
2636 else:
2637 ifh.seek(self._docket.index_end, os.SEEK_SET)
2637 ifh.seek(self._docket.index_end, os.SEEK_SET)
2638 if dfh:
2638 if dfh:
2639 if self._docket is None:
2639 if self._docket is None:
2640 dfh.seek(0, os.SEEK_END)
2640 dfh.seek(0, os.SEEK_END)
2641 else:
2641 else:
2642 dfh.seek(self._docket.data_end, os.SEEK_SET)
2642 dfh.seek(self._docket.data_end, os.SEEK_SET)
2643
2643
2644 curr = len(self) - 1
2644 curr = len(self) - 1
2645 if not self._inline:
2645 if not self._inline:
2646 transaction.add(self._datafile, offset)
2646 transaction.add(self._datafile, offset)
2647 transaction.add(self._indexfile, curr * len(entry))
2647 transaction.add(self._indexfile, curr * len(entry))
2648 if data[0]:
2648 if data[0]:
2649 dfh.write(data[0])
2649 dfh.write(data[0])
2650 dfh.write(data[1])
2650 dfh.write(data[1])
2651 if sidedata:
2651 if sidedata:
2652 dfh.write(sidedata)
2652 dfh.write(sidedata)
2653 ifh.write(entry)
2653 ifh.write(entry)
2654 else:
2654 else:
2655 offset += curr * self.index.entry_size
2655 offset += curr * self.index.entry_size
2656 transaction.add(self._indexfile, offset)
2656 transaction.add(self._indexfile, offset)
2657 ifh.write(entry)
2657 ifh.write(entry)
2658 ifh.write(data[0])
2658 ifh.write(data[0])
2659 ifh.write(data[1])
2659 ifh.write(data[1])
2660 if sidedata:
2660 if sidedata:
2661 ifh.write(sidedata)
2661 ifh.write(sidedata)
2662 self._enforceinlinesize(transaction)
2662 self._enforceinlinesize(transaction)
2663 if self._docket is not None:
2663 if self._docket is not None:
2664 self._docket.index_end = self._writinghandles[0].tell()
2664 self._docket.index_end = self._writinghandles[0].tell()
2665 self._docket.data_end = self._writinghandles[1].tell()
2665 self._docket.data_end = self._writinghandles[1].tell()
2666
2666
2667 nodemaputil.setup_persistent_nodemap(transaction, self)
2667 nodemaputil.setup_persistent_nodemap(transaction, self)
2668
2668
2669 def addgroup(
2669 def addgroup(
2670 self,
2670 self,
2671 deltas,
2671 deltas,
2672 linkmapper,
2672 linkmapper,
2673 transaction,
2673 transaction,
2674 alwayscache=False,
2674 alwayscache=False,
2675 addrevisioncb=None,
2675 addrevisioncb=None,
2676 duplicaterevisioncb=None,
2676 duplicaterevisioncb=None,
2677 ):
2677 ):
2678 """
2678 """
2679 add a delta group
2679 add a delta group
2680
2680
2681 given a set of deltas, add them to the revision log. the
2681 given a set of deltas, add them to the revision log. the
2682 first delta is against its parent, which should be in our
2682 first delta is against its parent, which should be in our
2683 log, the rest are against the previous delta.
2683 log, the rest are against the previous delta.
2684
2684
2685 If ``addrevisioncb`` is defined, it will be called with arguments of
2685 If ``addrevisioncb`` is defined, it will be called with arguments of
2686 this revlog and the node that was added.
2686 this revlog and the node that was added.
2687 """
2687 """
2688
2688
2689 if self._adding_group:
2689 if self._adding_group:
2690 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2690 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2691
2691
2692 self._adding_group = True
2692 self._adding_group = True
2693 empty = True
2693 empty = True
2694 try:
2694 try:
2695 with self._writing(transaction):
2695 with self._writing(transaction):
2696 deltacomputer = deltautil.deltacomputer(self)
2696 deltacomputer = deltautil.deltacomputer(self)
2697 # loop through our set of deltas
2697 # loop through our set of deltas
2698 for data in deltas:
2698 for data in deltas:
2699 (
2699 (
2700 node,
2700 node,
2701 p1,
2701 p1,
2702 p2,
2702 p2,
2703 linknode,
2703 linknode,
2704 deltabase,
2704 deltabase,
2705 delta,
2705 delta,
2706 flags,
2706 flags,
2707 sidedata,
2707 sidedata,
2708 ) = data
2708 ) = data
2709 link = linkmapper(linknode)
2709 link = linkmapper(linknode)
2710 flags = flags or REVIDX_DEFAULT_FLAGS
2710 flags = flags or REVIDX_DEFAULT_FLAGS
2711
2711
2712 rev = self.index.get_rev(node)
2712 rev = self.index.get_rev(node)
2713 if rev is not None:
2713 if rev is not None:
2714 # this can happen if two branches make the same change
2714 # this can happen if two branches make the same change
2715 self._nodeduplicatecallback(transaction, rev)
2715 self._nodeduplicatecallback(transaction, rev)
2716 if duplicaterevisioncb:
2716 if duplicaterevisioncb:
2717 duplicaterevisioncb(self, rev)
2717 duplicaterevisioncb(self, rev)
2718 empty = False
2718 empty = False
2719 continue
2719 continue
2720
2720
2721 for p in (p1, p2):
2721 for p in (p1, p2):
2722 if not self.index.has_node(p):
2722 if not self.index.has_node(p):
2723 raise error.LookupError(
2723 raise error.LookupError(
2724 p, self.radix, _(b'unknown parent')
2724 p, self.radix, _(b'unknown parent')
2725 )
2725 )
2726
2726
2727 if not self.index.has_node(deltabase):
2727 if not self.index.has_node(deltabase):
2728 raise error.LookupError(
2728 raise error.LookupError(
2729 deltabase, self.display_id, _(b'unknown delta base')
2729 deltabase, self.display_id, _(b'unknown delta base')
2730 )
2730 )
2731
2731
2732 baserev = self.rev(deltabase)
2732 baserev = self.rev(deltabase)
2733
2733
2734 if baserev != nullrev and self.iscensored(baserev):
2734 if baserev != nullrev and self.iscensored(baserev):
2735 # if base is censored, delta must be full replacement in a
2735 # if base is censored, delta must be full replacement in a
2736 # single patch operation
2736 # single patch operation
2737 hlen = struct.calcsize(b">lll")
2737 hlen = struct.calcsize(b">lll")
2738 oldlen = self.rawsize(baserev)
2738 oldlen = self.rawsize(baserev)
2739 newlen = len(delta) - hlen
2739 newlen = len(delta) - hlen
2740 if delta[:hlen] != mdiff.replacediffheader(
2740 if delta[:hlen] != mdiff.replacediffheader(
2741 oldlen, newlen
2741 oldlen, newlen
2742 ):
2742 ):
2743 raise error.CensoredBaseError(
2743 raise error.CensoredBaseError(
2744 self.display_id, self.node(baserev)
2744 self.display_id, self.node(baserev)
2745 )
2745 )
2746
2746
2747 if not flags and self._peek_iscensored(baserev, delta):
2747 if not flags and self._peek_iscensored(baserev, delta):
2748 flags |= REVIDX_ISCENSORED
2748 flags |= REVIDX_ISCENSORED
2749
2749
2750 # We assume consumers of addrevisioncb will want to retrieve
2750 # We assume consumers of addrevisioncb will want to retrieve
2751 # the added revision, which will require a call to
2751 # the added revision, which will require a call to
2752 # revision(). revision() will fast path if there is a cache
2752 # revision(). revision() will fast path if there is a cache
2753 # hit. So, we tell _addrevision() to always cache in this case.
2753 # hit. So, we tell _addrevision() to always cache in this case.
2754 # We're only using addgroup() in the context of changegroup
2754 # We're only using addgroup() in the context of changegroup
2755 # generation so the revision data can always be handled as raw
2755 # generation so the revision data can always be handled as raw
2756 # by the flagprocessor.
2756 # by the flagprocessor.
2757 rev = self._addrevision(
2757 rev = self._addrevision(
2758 node,
2758 node,
2759 None,
2759 None,
2760 transaction,
2760 transaction,
2761 link,
2761 link,
2762 p1,
2762 p1,
2763 p2,
2763 p2,
2764 flags,
2764 flags,
2765 (baserev, delta),
2765 (baserev, delta),
2766 alwayscache=alwayscache,
2766 alwayscache=alwayscache,
2767 deltacomputer=deltacomputer,
2767 deltacomputer=deltacomputer,
2768 sidedata=sidedata,
2768 sidedata=sidedata,
2769 )
2769 )
2770
2770
2771 if addrevisioncb:
2771 if addrevisioncb:
2772 addrevisioncb(self, rev)
2772 addrevisioncb(self, rev)
2773 empty = False
2773 empty = False
2774 finally:
2774 finally:
2775 self._adding_group = False
2775 self._adding_group = False
2776 return not empty
2776 return not empty
2777
2777
2778 def iscensored(self, rev):
2778 def iscensored(self, rev):
2779 """Check if a file revision is censored."""
2779 """Check if a file revision is censored."""
2780 if not self._censorable:
2780 if not self._censorable:
2781 return False
2781 return False
2782
2782
2783 return self.flags(rev) & REVIDX_ISCENSORED
2783 return self.flags(rev) & REVIDX_ISCENSORED
2784
2784
2785 def _peek_iscensored(self, baserev, delta):
2785 def _peek_iscensored(self, baserev, delta):
2786 """Quickly check if a delta produces a censored revision."""
2786 """Quickly check if a delta produces a censored revision."""
2787 if not self._censorable:
2787 if not self._censorable:
2788 return False
2788 return False
2789
2789
2790 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2790 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2791
2791
2792 def getstrippoint(self, minlink):
2792 def getstrippoint(self, minlink):
2793 """find the minimum rev that must be stripped to strip the linkrev
2793 """find the minimum rev that must be stripped to strip the linkrev
2794
2794
2795 Returns a tuple containing the minimum rev and a set of all revs that
2795 Returns a tuple containing the minimum rev and a set of all revs that
2796 have linkrevs that will be broken by this strip.
2796 have linkrevs that will be broken by this strip.
2797 """
2797 """
2798 return storageutil.resolvestripinfo(
2798 return storageutil.resolvestripinfo(
2799 minlink,
2799 minlink,
2800 len(self) - 1,
2800 len(self) - 1,
2801 self.headrevs(),
2801 self.headrevs(),
2802 self.linkrev,
2802 self.linkrev,
2803 self.parentrevs,
2803 self.parentrevs,
2804 )
2804 )
2805
2805
2806 def strip(self, minlink, transaction):
2806 def strip(self, minlink, transaction):
2807 """truncate the revlog on the first revision with a linkrev >= minlink
2807 """truncate the revlog on the first revision with a linkrev >= minlink
2808
2808
2809 This function is called when we're stripping revision minlink and
2809 This function is called when we're stripping revision minlink and
2810 its descendants from the repository.
2810 its descendants from the repository.
2811
2811
2812 We have to remove all revisions with linkrev >= minlink, because
2812 We have to remove all revisions with linkrev >= minlink, because
2813 the equivalent changelog revisions will be renumbered after the
2813 the equivalent changelog revisions will be renumbered after the
2814 strip.
2814 strip.
2815
2815
2816 So we truncate the revlog on the first of these revisions, and
2816 So we truncate the revlog on the first of these revisions, and
2817 trust that the caller has saved the revisions that shouldn't be
2817 trust that the caller has saved the revisions that shouldn't be
2818 removed and that it'll re-add them after this truncation.
2818 removed and that it'll re-add them after this truncation.
2819 """
2819 """
2820 if len(self) == 0:
2820 if len(self) == 0:
2821 return
2821 return
2822
2822
2823 rev, _ = self.getstrippoint(minlink)
2823 rev, _ = self.getstrippoint(minlink)
2824 if rev == len(self):
2824 if rev == len(self):
2825 return
2825 return
2826
2826
2827 # first truncate the files on disk
2827 # first truncate the files on disk
2828 data_end = self.start(rev)
2828 data_end = self.start(rev)
2829 if not self._inline:
2829 if not self._inline:
2830 transaction.add(self._datafile, data_end)
2830 transaction.add(self._datafile, data_end)
2831 end = rev * self.index.entry_size
2831 end = rev * self.index.entry_size
2832 else:
2832 else:
2833 end = data_end + (rev * self.index.entry_size)
2833 end = data_end + (rev * self.index.entry_size)
2834
2834
2835 transaction.add(self._indexfile, end)
2835 transaction.add(self._indexfile, end)
2836 if self._docket is not None:
2836 if self._docket is not None:
2837 # XXX we could, leverage the docket while stripping. However it is
2837 # XXX we could, leverage the docket while stripping. However it is
2838 # not powerfull enough at the time of this comment
2838 # not powerfull enough at the time of this comment
2839 self._docket.index_end = end
2839 self._docket.index_end = end
2840 self._docket.data_end = data_end
2840 self._docket.data_end = data_end
2841 self._docket.write(transaction, stripping=True)
2841 self._docket.write(transaction, stripping=True)
2842
2842
2843 # then reset internal state in memory to forget those revisions
2843 # then reset internal state in memory to forget those revisions
2844 self._revisioncache = None
2844 self._revisioncache = None
2845 self._chaininfocache = util.lrucachedict(500)
2845 self._chaininfocache = util.lrucachedict(500)
2846 self._chunkclear()
2846 self._chunkclear()
2847
2847
2848 del self.index[rev:-1]
2848 del self.index[rev:-1]
2849
2849
2850 def checksize(self):
2850 def checksize(self):
2851 """Check size of index and data files
2851 """Check size of index and data files
2852
2852
2853 return a (dd, di) tuple.
2853 return a (dd, di) tuple.
2854 - dd: extra bytes for the "data" file
2854 - dd: extra bytes for the "data" file
2855 - di: extra bytes for the "index" file
2855 - di: extra bytes for the "index" file
2856
2856
2857 A healthy revlog will return (0, 0).
2857 A healthy revlog will return (0, 0).
2858 """
2858 """
2859 expected = 0
2859 expected = 0
2860 if len(self):
2860 if len(self):
2861 expected = max(0, self.end(len(self) - 1))
2861 expected = max(0, self.end(len(self) - 1))
2862
2862
2863 try:
2863 try:
2864 with self._datafp() as f:
2864 with self._datafp() as f:
2865 f.seek(0, io.SEEK_END)
2865 f.seek(0, io.SEEK_END)
2866 actual = f.tell()
2866 actual = f.tell()
2867 dd = actual - expected
2867 dd = actual - expected
2868 except IOError as inst:
2868 except IOError as inst:
2869 if inst.errno != errno.ENOENT:
2869 if inst.errno != errno.ENOENT:
2870 raise
2870 raise
2871 dd = 0
2871 dd = 0
2872
2872
2873 try:
2873 try:
2874 f = self.opener(self._indexfile)
2874 f = self.opener(self._indexfile)
2875 f.seek(0, io.SEEK_END)
2875 f.seek(0, io.SEEK_END)
2876 actual = f.tell()
2876 actual = f.tell()
2877 f.close()
2877 f.close()
2878 s = self.index.entry_size
2878 s = self.index.entry_size
2879 i = max(0, actual // s)
2879 i = max(0, actual // s)
2880 di = actual - (i * s)
2880 di = actual - (i * s)
2881 if self._inline:
2881 if self._inline:
2882 databytes = 0
2882 databytes = 0
2883 for r in self:
2883 for r in self:
2884 databytes += max(0, self.length(r))
2884 databytes += max(0, self.length(r))
2885 dd = 0
2885 dd = 0
2886 di = actual - len(self) * s - databytes
2886 di = actual - len(self) * s - databytes
2887 except IOError as inst:
2887 except IOError as inst:
2888 if inst.errno != errno.ENOENT:
2888 if inst.errno != errno.ENOENT:
2889 raise
2889 raise
2890 di = 0
2890 di = 0
2891
2891
2892 return (dd, di)
2892 return (dd, di)
2893
2893
2894 def files(self):
2894 def files(self):
2895 res = [self._indexfile]
2895 res = [self._indexfile]
2896 if not self._inline:
2896 if not self._inline:
2897 res.append(self._datafile)
2897 res.append(self._datafile)
2898 return res
2898 return res
2899
2899
2900 def emitrevisions(
2900 def emitrevisions(
2901 self,
2901 self,
2902 nodes,
2902 nodes,
2903 nodesorder=None,
2903 nodesorder=None,
2904 revisiondata=False,
2904 revisiondata=False,
2905 assumehaveparentrevisions=False,
2905 assumehaveparentrevisions=False,
2906 deltamode=repository.CG_DELTAMODE_STD,
2906 deltamode=repository.CG_DELTAMODE_STD,
2907 sidedata_helpers=None,
2907 sidedata_helpers=None,
2908 ):
2908 ):
2909 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2909 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2910 raise error.ProgrammingError(
2910 raise error.ProgrammingError(
2911 b'unhandled value for nodesorder: %s' % nodesorder
2911 b'unhandled value for nodesorder: %s' % nodesorder
2912 )
2912 )
2913
2913
2914 if nodesorder is None and not self._generaldelta:
2914 if nodesorder is None and not self._generaldelta:
2915 nodesorder = b'storage'
2915 nodesorder = b'storage'
2916
2916
2917 if (
2917 if (
2918 not self._storedeltachains
2918 not self._storedeltachains
2919 and deltamode != repository.CG_DELTAMODE_PREV
2919 and deltamode != repository.CG_DELTAMODE_PREV
2920 ):
2920 ):
2921 deltamode = repository.CG_DELTAMODE_FULL
2921 deltamode = repository.CG_DELTAMODE_FULL
2922
2922
2923 return storageutil.emitrevisions(
2923 return storageutil.emitrevisions(
2924 self,
2924 self,
2925 nodes,
2925 nodes,
2926 nodesorder,
2926 nodesorder,
2927 revlogrevisiondelta,
2927 revlogrevisiondelta,
2928 deltaparentfn=self.deltaparent,
2928 deltaparentfn=self.deltaparent,
2929 candeltafn=self.candelta,
2929 candeltafn=self.candelta,
2930 rawsizefn=self.rawsize,
2930 rawsizefn=self.rawsize,
2931 revdifffn=self.revdiff,
2931 revdifffn=self.revdiff,
2932 flagsfn=self.flags,
2932 flagsfn=self.flags,
2933 deltamode=deltamode,
2933 deltamode=deltamode,
2934 revisiondata=revisiondata,
2934 revisiondata=revisiondata,
2935 assumehaveparentrevisions=assumehaveparentrevisions,
2935 assumehaveparentrevisions=assumehaveparentrevisions,
2936 sidedata_helpers=sidedata_helpers,
2936 sidedata_helpers=sidedata_helpers,
2937 )
2937 )
2938
2938
2939 DELTAREUSEALWAYS = b'always'
2939 DELTAREUSEALWAYS = b'always'
2940 DELTAREUSESAMEREVS = b'samerevs'
2940 DELTAREUSESAMEREVS = b'samerevs'
2941 DELTAREUSENEVER = b'never'
2941 DELTAREUSENEVER = b'never'
2942
2942
2943 DELTAREUSEFULLADD = b'fulladd'
2943 DELTAREUSEFULLADD = b'fulladd'
2944
2944
2945 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2945 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2946
2946
2947 def clone(
2947 def clone(
2948 self,
2948 self,
2949 tr,
2949 tr,
2950 destrevlog,
2950 destrevlog,
2951 addrevisioncb=None,
2951 addrevisioncb=None,
2952 deltareuse=DELTAREUSESAMEREVS,
2952 deltareuse=DELTAREUSESAMEREVS,
2953 forcedeltabothparents=None,
2953 forcedeltabothparents=None,
2954 sidedata_helpers=None,
2954 sidedata_helpers=None,
2955 ):
2955 ):
2956 """Copy this revlog to another, possibly with format changes.
2956 """Copy this revlog to another, possibly with format changes.
2957
2957
2958 The destination revlog will contain the same revisions and nodes.
2958 The destination revlog will contain the same revisions and nodes.
2959 However, it may not be bit-for-bit identical due to e.g. delta encoding
2959 However, it may not be bit-for-bit identical due to e.g. delta encoding
2960 differences.
2960 differences.
2961
2961
2962 The ``deltareuse`` argument control how deltas from the existing revlog
2962 The ``deltareuse`` argument control how deltas from the existing revlog
2963 are preserved in the destination revlog. The argument can have the
2963 are preserved in the destination revlog. The argument can have the
2964 following values:
2964 following values:
2965
2965
2966 DELTAREUSEALWAYS
2966 DELTAREUSEALWAYS
2967 Deltas will always be reused (if possible), even if the destination
2967 Deltas will always be reused (if possible), even if the destination
2968 revlog would not select the same revisions for the delta. This is the
2968 revlog would not select the same revisions for the delta. This is the
2969 fastest mode of operation.
2969 fastest mode of operation.
2970 DELTAREUSESAMEREVS
2970 DELTAREUSESAMEREVS
2971 Deltas will be reused if the destination revlog would pick the same
2971 Deltas will be reused if the destination revlog would pick the same
2972 revisions for the delta. This mode strikes a balance between speed
2972 revisions for the delta. This mode strikes a balance between speed
2973 and optimization.
2973 and optimization.
2974 DELTAREUSENEVER
2974 DELTAREUSENEVER
2975 Deltas will never be reused. This is the slowest mode of execution.
2975 Deltas will never be reused. This is the slowest mode of execution.
2976 This mode can be used to recompute deltas (e.g. if the diff/delta
2976 This mode can be used to recompute deltas (e.g. if the diff/delta
2977 algorithm changes).
2977 algorithm changes).
2978 DELTAREUSEFULLADD
2978 DELTAREUSEFULLADD
2979 Revision will be re-added as if their were new content. This is
2979 Revision will be re-added as if their were new content. This is
2980 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2980 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2981 eg: large file detection and handling.
2981 eg: large file detection and handling.
2982
2982
2983 Delta computation can be slow, so the choice of delta reuse policy can
2983 Delta computation can be slow, so the choice of delta reuse policy can
2984 significantly affect run time.
2984 significantly affect run time.
2985
2985
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2986 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2987 two extremes. Deltas will be reused if they are appropriate. But if the
2988 delta could choose a better revision, it will do so. This means if you
2988 delta could choose a better revision, it will do so. This means if you
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2989 are converting a non-generaldelta revlog to a generaldelta revlog,
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2990 deltas will be recomputed if the delta's parent isn't a parent of the
2991 revision.
2991 revision.
2992
2992
2993 In addition to the delta policy, the ``forcedeltabothparents``
2993 In addition to the delta policy, the ``forcedeltabothparents``
2994 argument controls whether to force compute deltas against both parents
2994 argument controls whether to force compute deltas against both parents
2995 for merges. By default, the current default is used.
2995 for merges. By default, the current default is used.
2996
2996
2997 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2997 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
2998 `sidedata_helpers`.
2998 `sidedata_helpers`.
2999 """
2999 """
3000 if deltareuse not in self.DELTAREUSEALL:
3000 if deltareuse not in self.DELTAREUSEALL:
3001 raise ValueError(
3001 raise ValueError(
3002 _(b'value for deltareuse invalid: %s') % deltareuse
3002 _(b'value for deltareuse invalid: %s') % deltareuse
3003 )
3003 )
3004
3004
3005 if len(destrevlog):
3005 if len(destrevlog):
3006 raise ValueError(_(b'destination revlog is not empty'))
3006 raise ValueError(_(b'destination revlog is not empty'))
3007
3007
3008 if getattr(self, 'filteredrevs', None):
3008 if getattr(self, 'filteredrevs', None):
3009 raise ValueError(_(b'source revlog has filtered revisions'))
3009 raise ValueError(_(b'source revlog has filtered revisions'))
3010 if getattr(destrevlog, 'filteredrevs', None):
3010 if getattr(destrevlog, 'filteredrevs', None):
3011 raise ValueError(_(b'destination revlog has filtered revisions'))
3011 raise ValueError(_(b'destination revlog has filtered revisions'))
3012
3012
3013 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3013 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3014 # if possible.
3014 # if possible.
3015 oldlazydelta = destrevlog._lazydelta
3015 oldlazydelta = destrevlog._lazydelta
3016 oldlazydeltabase = destrevlog._lazydeltabase
3016 oldlazydeltabase = destrevlog._lazydeltabase
3017 oldamd = destrevlog._deltabothparents
3017 oldamd = destrevlog._deltabothparents
3018
3018
3019 try:
3019 try:
3020 if deltareuse == self.DELTAREUSEALWAYS:
3020 if deltareuse == self.DELTAREUSEALWAYS:
3021 destrevlog._lazydeltabase = True
3021 destrevlog._lazydeltabase = True
3022 destrevlog._lazydelta = True
3022 destrevlog._lazydelta = True
3023 elif deltareuse == self.DELTAREUSESAMEREVS:
3023 elif deltareuse == self.DELTAREUSESAMEREVS:
3024 destrevlog._lazydeltabase = False
3024 destrevlog._lazydeltabase = False
3025 destrevlog._lazydelta = True
3025 destrevlog._lazydelta = True
3026 elif deltareuse == self.DELTAREUSENEVER:
3026 elif deltareuse == self.DELTAREUSENEVER:
3027 destrevlog._lazydeltabase = False
3027 destrevlog._lazydeltabase = False
3028 destrevlog._lazydelta = False
3028 destrevlog._lazydelta = False
3029
3029
3030 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3030 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3031
3031
3032 self._clone(
3032 self._clone(
3033 tr,
3033 tr,
3034 destrevlog,
3034 destrevlog,
3035 addrevisioncb,
3035 addrevisioncb,
3036 deltareuse,
3036 deltareuse,
3037 forcedeltabothparents,
3037 forcedeltabothparents,
3038 sidedata_helpers,
3038 sidedata_helpers,
3039 )
3039 )
3040
3040
3041 finally:
3041 finally:
3042 destrevlog._lazydelta = oldlazydelta
3042 destrevlog._lazydelta = oldlazydelta
3043 destrevlog._lazydeltabase = oldlazydeltabase
3043 destrevlog._lazydeltabase = oldlazydeltabase
3044 destrevlog._deltabothparents = oldamd
3044 destrevlog._deltabothparents = oldamd
3045
3045
3046 def _clone(
3046 def _clone(
3047 self,
3047 self,
3048 tr,
3048 tr,
3049 destrevlog,
3049 destrevlog,
3050 addrevisioncb,
3050 addrevisioncb,
3051 deltareuse,
3051 deltareuse,
3052 forcedeltabothparents,
3052 forcedeltabothparents,
3053 sidedata_helpers,
3053 sidedata_helpers,
3054 ):
3054 ):
3055 """perform the core duty of `revlog.clone` after parameter processing"""
3055 """perform the core duty of `revlog.clone` after parameter processing"""
3056 deltacomputer = deltautil.deltacomputer(destrevlog)
3056 deltacomputer = deltautil.deltacomputer(destrevlog)
3057 index = self.index
3057 index = self.index
3058 for rev in self:
3058 for rev in self:
3059 entry = index[rev]
3059 entry = index[rev]
3060
3060
3061 # Some classes override linkrev to take filtered revs into
3061 # Some classes override linkrev to take filtered revs into
3062 # account. Use raw entry from index.
3062 # account. Use raw entry from index.
3063 flags = entry[0] & 0xFFFF
3063 flags = entry[0] & 0xFFFF
3064 linkrev = entry[4]
3064 linkrev = entry[4]
3065 p1 = index[entry[5]][7]
3065 p1 = index[entry[5]][7]
3066 p2 = index[entry[6]][7]
3066 p2 = index[entry[6]][7]
3067 node = entry[7]
3067 node = entry[7]
3068
3068
3069 # (Possibly) reuse the delta from the revlog if allowed and
3069 # (Possibly) reuse the delta from the revlog if allowed and
3070 # the revlog chunk is a delta.
3070 # the revlog chunk is a delta.
3071 cachedelta = None
3071 cachedelta = None
3072 rawtext = None
3072 rawtext = None
3073 if deltareuse == self.DELTAREUSEFULLADD:
3073 if deltareuse == self.DELTAREUSEFULLADD:
3074 text, sidedata = self._revisiondata(rev)
3074 text, sidedata = self._revisiondata(rev)
3075
3075
3076 if sidedata_helpers is not None:
3076 if sidedata_helpers is not None:
3077 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3077 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3078 self, sidedata_helpers, sidedata, rev
3078 self, sidedata_helpers, sidedata, rev
3079 )
3079 )
3080 flags = flags | new_flags[0] & ~new_flags[1]
3080 flags = flags | new_flags[0] & ~new_flags[1]
3081
3081
3082 destrevlog.addrevision(
3082 destrevlog.addrevision(
3083 text,
3083 text,
3084 tr,
3084 tr,
3085 linkrev,
3085 linkrev,
3086 p1,
3086 p1,
3087 p2,
3087 p2,
3088 cachedelta=cachedelta,
3088 cachedelta=cachedelta,
3089 node=node,
3089 node=node,
3090 flags=flags,
3090 flags=flags,
3091 deltacomputer=deltacomputer,
3091 deltacomputer=deltacomputer,
3092 sidedata=sidedata,
3092 sidedata=sidedata,
3093 )
3093 )
3094 else:
3094 else:
3095 if destrevlog._lazydelta:
3095 if destrevlog._lazydelta:
3096 dp = self.deltaparent(rev)
3096 dp = self.deltaparent(rev)
3097 if dp != nullrev:
3097 if dp != nullrev:
3098 cachedelta = (dp, bytes(self._chunk(rev)))
3098 cachedelta = (dp, bytes(self._chunk(rev)))
3099
3099
3100 sidedata = None
3100 sidedata = None
3101 if not cachedelta:
3101 if not cachedelta:
3102 rawtext, sidedata = self._revisiondata(rev)
3102 rawtext, sidedata = self._revisiondata(rev)
3103 if sidedata is None:
3103 if sidedata is None:
3104 sidedata = self.sidedata(rev)
3104 sidedata = self.sidedata(rev)
3105
3105
3106 if sidedata_helpers is not None:
3106 if sidedata_helpers is not None:
3107 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3107 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3108 self, sidedata_helpers, sidedata, rev
3108 self, sidedata_helpers, sidedata, rev
3109 )
3109 )
3110 flags = flags | new_flags[0] & ~new_flags[1]
3110 flags = flags | new_flags[0] & ~new_flags[1]
3111
3111
3112 with destrevlog._writing(tr):
3112 with destrevlog._writing(tr):
3113 destrevlog._addrevision(
3113 destrevlog._addrevision(
3114 node,
3114 node,
3115 rawtext,
3115 rawtext,
3116 tr,
3116 tr,
3117 linkrev,
3117 linkrev,
3118 p1,
3118 p1,
3119 p2,
3119 p2,
3120 flags,
3120 flags,
3121 cachedelta,
3121 cachedelta,
3122 deltacomputer=deltacomputer,
3122 deltacomputer=deltacomputer,
3123 sidedata=sidedata,
3123 sidedata=sidedata,
3124 )
3124 )
3125
3125
3126 if addrevisioncb:
3126 if addrevisioncb:
3127 addrevisioncb(self, rev, node)
3127 addrevisioncb(self, rev, node)
3128
3128
3129 def censorrevision(self, tr, censornode, tombstone=b''):
3129 def censorrevision(self, tr, censornode, tombstone=b''):
3130 if self._format_version == REVLOGV0:
3130 if self._format_version == REVLOGV0:
3131 raise error.RevlogError(
3131 raise error.RevlogError(
3132 _(b'cannot censor with version %d revlogs')
3132 _(b'cannot censor with version %d revlogs')
3133 % self._format_version
3133 % self._format_version
3134 )
3134 )
3135
3135
3136 censorrev = self.rev(censornode)
3136 censorrev = self.rev(censornode)
3137 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3137 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3138
3138
3139 if len(tombstone) > self.rawsize(censorrev):
3139 if len(tombstone) > self.rawsize(censorrev):
3140 raise error.Abort(
3140 raise error.Abort(
3141 _(b'censor tombstone must be no longer than censored data')
3141 _(b'censor tombstone must be no longer than censored data')
3142 )
3142 )
3143
3143
3144 # Rewriting the revlog in place is hard. Our strategy for censoring is
3144 # Rewriting the revlog in place is hard. Our strategy for censoring is
3145 # to create a new revlog, copy all revisions to it, then replace the
3145 # to create a new revlog, copy all revisions to it, then replace the
3146 # revlogs on transaction close.
3146 # revlogs on transaction close.
3147 #
3147 #
3148 # This is a bit dangerous. We could easily have a mismatch of state.
3148 # This is a bit dangerous. We could easily have a mismatch of state.
3149 newrl = revlog(
3149 newrl = revlog(
3150 self.opener,
3150 self.opener,
3151 target=self.target,
3151 target=self.target,
3152 radix=self.radix,
3152 radix=self.radix,
3153 postfix=b'tmpcensored',
3153 postfix=b'tmpcensored',
3154 censorable=True,
3154 censorable=True,
3155 )
3155 )
3156 newrl._format_version = self._format_version
3156 newrl._format_version = self._format_version
3157 newrl._format_flags = self._format_flags
3157 newrl._format_flags = self._format_flags
3158 newrl._generaldelta = self._generaldelta
3158 newrl._generaldelta = self._generaldelta
3159 newrl._parse_index = self._parse_index
3159 newrl._parse_index = self._parse_index
3160
3160
3161 for rev in self.revs():
3161 for rev in self.revs():
3162 node = self.node(rev)
3162 node = self.node(rev)
3163 p1, p2 = self.parents(node)
3163 p1, p2 = self.parents(node)
3164
3164
3165 if rev == censorrev:
3165 if rev == censorrev:
3166 newrl.addrawrevision(
3166 newrl.addrawrevision(
3167 tombstone,
3167 tombstone,
3168 tr,
3168 tr,
3169 self.linkrev(censorrev),
3169 self.linkrev(censorrev),
3170 p1,
3170 p1,
3171 p2,
3171 p2,
3172 censornode,
3172 censornode,
3173 REVIDX_ISCENSORED,
3173 REVIDX_ISCENSORED,
3174 )
3174 )
3175
3175
3176 if newrl.deltaparent(rev) != nullrev:
3176 if newrl.deltaparent(rev) != nullrev:
3177 raise error.Abort(
3177 raise error.Abort(
3178 _(
3178 _(
3179 b'censored revision stored as delta; '
3179 b'censored revision stored as delta; '
3180 b'cannot censor'
3180 b'cannot censor'
3181 ),
3181 ),
3182 hint=_(
3182 hint=_(
3183 b'censoring of revlogs is not '
3183 b'censoring of revlogs is not '
3184 b'fully implemented; please report '
3184 b'fully implemented; please report '
3185 b'this bug'
3185 b'this bug'
3186 ),
3186 ),
3187 )
3187 )
3188 continue
3188 continue
3189
3189
3190 if self.iscensored(rev):
3190 if self.iscensored(rev):
3191 if self.deltaparent(rev) != nullrev:
3191 if self.deltaparent(rev) != nullrev:
3192 raise error.Abort(
3192 raise error.Abort(
3193 _(
3193 _(
3194 b'cannot censor due to censored '
3194 b'cannot censor due to censored '
3195 b'revision having delta stored'
3195 b'revision having delta stored'
3196 )
3196 )
3197 )
3197 )
3198 rawtext = self._chunk(rev)
3198 rawtext = self._chunk(rev)
3199 else:
3199 else:
3200 rawtext = self.rawdata(rev)
3200 rawtext = self.rawdata(rev)
3201
3201
3202 newrl.addrawrevision(
3202 newrl.addrawrevision(
3203 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3203 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3204 )
3204 )
3205
3205
3206 tr.addbackup(self._indexfile, location=b'store')
3206 tr.addbackup(self._indexfile, location=b'store')
3207 if not self._inline:
3207 if not self._inline:
3208 tr.addbackup(self._datafile, location=b'store')
3208 tr.addbackup(self._datafile, location=b'store')
3209
3209
3210 self.opener.rename(newrl._indexfile, self._indexfile)
3210 self.opener.rename(newrl._indexfile, self._indexfile)
3211 if not self._inline:
3211 if not self._inline:
3212 self.opener.rename(newrl._datafile, self._datafile)
3212 self.opener.rename(newrl._datafile, self._datafile)
3213
3213
3214 self.clearcaches()
3214 self.clearcaches()
3215 self._loadindex()
3215 self._loadindex()
3216
3216
3217 def verifyintegrity(self, state):
3217 def verifyintegrity(self, state):
3218 """Verifies the integrity of the revlog.
3218 """Verifies the integrity of the revlog.
3219
3219
3220 Yields ``revlogproblem`` instances describing problems that are
3220 Yields ``revlogproblem`` instances describing problems that are
3221 found.
3221 found.
3222 """
3222 """
3223 dd, di = self.checksize()
3223 dd, di = self.checksize()
3224 if dd:
3224 if dd:
3225 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3225 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3226 if di:
3226 if di:
3227 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3227 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3228
3228
3229 version = self._format_version
3229 version = self._format_version
3230
3230
3231 # The verifier tells us what version revlog we should be.
3231 # The verifier tells us what version revlog we should be.
3232 if version != state[b'expectedversion']:
3232 if version != state[b'expectedversion']:
3233 yield revlogproblem(
3233 yield revlogproblem(
3234 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3234 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3235 % (self.display_id, version, state[b'expectedversion'])
3235 % (self.display_id, version, state[b'expectedversion'])
3236 )
3236 )
3237
3237
3238 state[b'skipread'] = set()
3238 state[b'skipread'] = set()
3239 state[b'safe_renamed'] = set()
3239 state[b'safe_renamed'] = set()
3240
3240
3241 for rev in self:
3241 for rev in self:
3242 node = self.node(rev)
3242 node = self.node(rev)
3243
3243
3244 # Verify contents. 4 cases to care about:
3244 # Verify contents. 4 cases to care about:
3245 #
3245 #
3246 # common: the most common case
3246 # common: the most common case
3247 # rename: with a rename
3247 # rename: with a rename
3248 # meta: file content starts with b'\1\n', the metadata
3248 # meta: file content starts with b'\1\n', the metadata
3249 # header defined in filelog.py, but without a rename
3249 # header defined in filelog.py, but without a rename
3250 # ext: content stored externally
3250 # ext: content stored externally
3251 #
3251 #
3252 # More formally, their differences are shown below:
3252 # More formally, their differences are shown below:
3253 #
3253 #
3254 # | common | rename | meta | ext
3254 # | common | rename | meta | ext
3255 # -------------------------------------------------------
3255 # -------------------------------------------------------
3256 # flags() | 0 | 0 | 0 | not 0
3256 # flags() | 0 | 0 | 0 | not 0
3257 # renamed() | False | True | False | ?
3257 # renamed() | False | True | False | ?
3258 # rawtext[0:2]=='\1\n'| False | True | True | ?
3258 # rawtext[0:2]=='\1\n'| False | True | True | ?
3259 #
3259 #
3260 # "rawtext" means the raw text stored in revlog data, which
3260 # "rawtext" means the raw text stored in revlog data, which
3261 # could be retrieved by "rawdata(rev)". "text"
3261 # could be retrieved by "rawdata(rev)". "text"
3262 # mentioned below is "revision(rev)".
3262 # mentioned below is "revision(rev)".
3263 #
3263 #
3264 # There are 3 different lengths stored physically:
3264 # There are 3 different lengths stored physically:
3265 # 1. L1: rawsize, stored in revlog index
3265 # 1. L1: rawsize, stored in revlog index
3266 # 2. L2: len(rawtext), stored in revlog data
3266 # 2. L2: len(rawtext), stored in revlog data
3267 # 3. L3: len(text), stored in revlog data if flags==0, or
3267 # 3. L3: len(text), stored in revlog data if flags==0, or
3268 # possibly somewhere else if flags!=0
3268 # possibly somewhere else if flags!=0
3269 #
3269 #
3270 # L1 should be equal to L2. L3 could be different from them.
3270 # L1 should be equal to L2. L3 could be different from them.
3271 # "text" may or may not affect commit hash depending on flag
3271 # "text" may or may not affect commit hash depending on flag
3272 # processors (see flagutil.addflagprocessor).
3272 # processors (see flagutil.addflagprocessor).
3273 #
3273 #
3274 # | common | rename | meta | ext
3274 # | common | rename | meta | ext
3275 # -------------------------------------------------
3275 # -------------------------------------------------
3276 # rawsize() | L1 | L1 | L1 | L1
3276 # rawsize() | L1 | L1 | L1 | L1
3277 # size() | L1 | L2-LM | L1(*) | L1 (?)
3277 # size() | L1 | L2-LM | L1(*) | L1 (?)
3278 # len(rawtext) | L2 | L2 | L2 | L2
3278 # len(rawtext) | L2 | L2 | L2 | L2
3279 # len(text) | L2 | L2 | L2 | L3
3279 # len(text) | L2 | L2 | L2 | L3
3280 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3280 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3281 #
3281 #
3282 # LM: length of metadata, depending on rawtext
3282 # LM: length of metadata, depending on rawtext
3283 # (*): not ideal, see comment in filelog.size
3283 # (*): not ideal, see comment in filelog.size
3284 # (?): could be "- len(meta)" if the resolved content has
3284 # (?): could be "- len(meta)" if the resolved content has
3285 # rename metadata
3285 # rename metadata
3286 #
3286 #
3287 # Checks needed to be done:
3287 # Checks needed to be done:
3288 # 1. length check: L1 == L2, in all cases.
3288 # 1. length check: L1 == L2, in all cases.
3289 # 2. hash check: depending on flag processor, we may need to
3289 # 2. hash check: depending on flag processor, we may need to
3290 # use either "text" (external), or "rawtext" (in revlog).
3290 # use either "text" (external), or "rawtext" (in revlog).
3291
3291
3292 try:
3292 try:
3293 skipflags = state.get(b'skipflags', 0)
3293 skipflags = state.get(b'skipflags', 0)
3294 if skipflags:
3294 if skipflags:
3295 skipflags &= self.flags(rev)
3295 skipflags &= self.flags(rev)
3296
3296
3297 _verify_revision(self, skipflags, state, node)
3297 _verify_revision(self, skipflags, state, node)
3298
3298
3299 l1 = self.rawsize(rev)
3299 l1 = self.rawsize(rev)
3300 l2 = len(self.rawdata(node))
3300 l2 = len(self.rawdata(node))
3301
3301
3302 if l1 != l2:
3302 if l1 != l2:
3303 yield revlogproblem(
3303 yield revlogproblem(
3304 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3304 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3305 node=node,
3305 node=node,
3306 )
3306 )
3307
3307
3308 except error.CensoredNodeError:
3308 except error.CensoredNodeError:
3309 if state[b'erroroncensored']:
3309 if state[b'erroroncensored']:
3310 yield revlogproblem(
3310 yield revlogproblem(
3311 error=_(b'censored file data'), node=node
3311 error=_(b'censored file data'), node=node
3312 )
3312 )
3313 state[b'skipread'].add(node)
3313 state[b'skipread'].add(node)
3314 except Exception as e:
3314 except Exception as e:
3315 yield revlogproblem(
3315 yield revlogproblem(
3316 error=_(b'unpacking %s: %s')
3316 error=_(b'unpacking %s: %s')
3317 % (short(node), stringutil.forcebytestr(e)),
3317 % (short(node), stringutil.forcebytestr(e)),
3318 node=node,
3318 node=node,
3319 )
3319 )
3320 state[b'skipread'].add(node)
3320 state[b'skipread'].add(node)
3321
3321
3322 def storageinfo(
3322 def storageinfo(
3323 self,
3323 self,
3324 exclusivefiles=False,
3324 exclusivefiles=False,
3325 sharedfiles=False,
3325 sharedfiles=False,
3326 revisionscount=False,
3326 revisionscount=False,
3327 trackedsize=False,
3327 trackedsize=False,
3328 storedsize=False,
3328 storedsize=False,
3329 ):
3329 ):
3330 d = {}
3330 d = {}
3331
3331
3332 if exclusivefiles:
3332 if exclusivefiles:
3333 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3333 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3334 if not self._inline:
3334 if not self._inline:
3335 d[b'exclusivefiles'].append((self.opener, self._datafile))
3335 d[b'exclusivefiles'].append((self.opener, self._datafile))
3336
3336
3337 if sharedfiles:
3337 if sharedfiles:
3338 d[b'sharedfiles'] = []
3338 d[b'sharedfiles'] = []
3339
3339
3340 if revisionscount:
3340 if revisionscount:
3341 d[b'revisionscount'] = len(self)
3341 d[b'revisionscount'] = len(self)
3342
3342
3343 if trackedsize:
3343 if trackedsize:
3344 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3344 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3345
3345
3346 if storedsize:
3346 if storedsize:
3347 d[b'storedsize'] = sum(
3347 d[b'storedsize'] = sum(
3348 self.opener.stat(path).st_size for path in self.files()
3348 self.opener.stat(path).st_size for path in self.files()
3349 )
3349 )
3350
3350
3351 return d
3351 return d
3352
3352
3353 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3353 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3354 if not self.hassidedata:
3354 if not self.hassidedata:
3355 return
3355 return
3356 # revlog formats with sidedata support does not support inline
3356 # revlog formats with sidedata support does not support inline
3357 assert not self._inline
3357 assert not self._inline
3358 if not helpers[1] and not helpers[2]:
3358 if not helpers[1] and not helpers[2]:
3359 # Nothing to generate or remove
3359 # Nothing to generate or remove
3360 return
3360 return
3361
3361
3362 new_entries = []
3362 new_entries = []
3363 # append the new sidedata
3363 # append the new sidedata
3364 with self._writing(transaction):
3364 with self._writing(transaction):
3365 ifh, dfh = self._writinghandles
3365 ifh, dfh = self._writinghandles
3366 if self._docket is not None:
3366 if self._docket is not None:
3367 dfh.seek(self._docket.data_end, os.SEEK_SET)
3367 dfh.seek(self._docket.data_end, os.SEEK_SET)
3368 else:
3368 else:
3369 dfh.seek(0, os.SEEK_END)
3369 dfh.seek(0, os.SEEK_END)
3370
3370
3371 current_offset = dfh.tell()
3371 current_offset = dfh.tell()
3372 for rev in range(startrev, endrev + 1):
3372 for rev in range(startrev, endrev + 1):
3373 entry = self.index[rev]
3373 entry = self.index[rev]
3374 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3374 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3375 store=self,
3375 store=self,
3376 sidedata_helpers=helpers,
3376 sidedata_helpers=helpers,
3377 sidedata={},
3377 sidedata={},
3378 rev=rev,
3378 rev=rev,
3379 )
3379 )
3380
3380
3381 serialized_sidedata = sidedatautil.serialize_sidedata(
3381 serialized_sidedata = sidedatautil.serialize_sidedata(
3382 new_sidedata
3382 new_sidedata
3383 )
3383 )
3384
3384
3385 sidedata_compression_mode = COMP_MODE_INLINE
3385 sidedata_compression_mode = COMP_MODE_INLINE
3386 if serialized_sidedata and self.hassidedata:
3386 if serialized_sidedata and self.hassidedata:
3387 sidedata_compression_mode = COMP_MODE_PLAIN
3387 sidedata_compression_mode = COMP_MODE_PLAIN
3388 h, comp_sidedata = self.compress(serialized_sidedata)
3388 h, comp_sidedata = self.compress(serialized_sidedata)
3389 if (
3389 if (
3390 h != b'u'
3390 h != b'u'
3391 and comp_sidedata[0] != b'\0'
3391 and comp_sidedata[0] != b'\0'
3392 and len(comp_sidedata) < len(serialized_sidedata)
3392 and len(comp_sidedata) < len(serialized_sidedata)
3393 ):
3393 ):
3394 assert not h
3394 assert not h
3395 if (
3395 if (
3396 comp_sidedata[0]
3396 comp_sidedata[0]
3397 == self._docket.default_compression_header
3397 == self._docket.default_compression_header
3398 ):
3398 ):
3399 sidedata_compression_mode = COMP_MODE_DEFAULT
3399 sidedata_compression_mode = COMP_MODE_DEFAULT
3400 serialized_sidedata = comp_sidedata
3400 serialized_sidedata = comp_sidedata
3401 else:
3401 else:
3402 sidedata_compression_mode = COMP_MODE_INLINE
3402 sidedata_compression_mode = COMP_MODE_INLINE
3403 serialized_sidedata = comp_sidedata
3403 serialized_sidedata = comp_sidedata
3404 if entry[8] != 0 or entry[9] != 0:
3404 if entry[8] != 0 or entry[9] != 0:
3405 # rewriting entries that already have sidedata is not
3405 # rewriting entries that already have sidedata is not
3406 # supported yet, because it introduces garbage data in the
3406 # supported yet, because it introduces garbage data in the
3407 # revlog.
3407 # revlog.
3408 msg = b"rewriting existing sidedata is not supported yet"
3408 msg = b"rewriting existing sidedata is not supported yet"
3409 raise error.Abort(msg)
3409 raise error.Abort(msg)
3410
3410
3411 # Apply (potential) flags to add and to remove after running
3411 # Apply (potential) flags to add and to remove after running
3412 # the sidedata helpers
3412 # the sidedata helpers
3413 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3413 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3414 entry_update = (
3414 entry_update = (
3415 current_offset,
3415 current_offset,
3416 len(serialized_sidedata),
3416 len(serialized_sidedata),
3417 new_offset_flags,
3417 new_offset_flags,
3418 sidedata_compression_mode,
3418 sidedata_compression_mode,
3419 )
3419 )
3420
3420
3421 # the sidedata computation might have move the file cursors around
3421 # the sidedata computation might have move the file cursors around
3422 dfh.seek(current_offset, os.SEEK_SET)
3422 dfh.seek(current_offset, os.SEEK_SET)
3423 dfh.write(serialized_sidedata)
3423 dfh.write(serialized_sidedata)
3424 new_entries.append(entry_update)
3424 new_entries.append(entry_update)
3425 current_offset += len(serialized_sidedata)
3425 current_offset += len(serialized_sidedata)
3426 if self._docket is not None:
3426 if self._docket is not None:
3427 self._docket.data_end = dfh.tell()
3427 self._docket.data_end = dfh.tell()
3428
3428
3429 # rewrite the new index entries
3429 # rewrite the new index entries
3430 ifh.seek(startrev * self.index.entry_size)
3430 ifh.seek(startrev * self.index.entry_size)
3431 for i, e in enumerate(new_entries):
3431 for i, e in enumerate(new_entries):
3432 rev = startrev + i
3432 rev = startrev + i
3433 self.index.replace_sidedata_info(rev, *e)
3433 self.index.replace_sidedata_info(rev, *e)
3434 packed = self.index.entry_binary(rev)
3434 packed = self.index.entry_binary(rev)
3435 if rev == 0 and self._docket is None:
3435 if rev == 0 and self._docket is None:
3436 header = self._format_flags | self._format_version
3436 header = self._format_flags | self._format_version
3437 header = self.index.pack_header(header)
3437 header = self.index.pack_header(header)
3438 packed = header + packed
3438 packed = header + packed
3439 ifh.write(packed)
3439 ifh.write(packed)
@@ -1,68 +1,68 b''
1 #require reporevlogstore
1 #require reporevlogstore
2
2
3 A repo with unknown revlogv2 requirement string cannot be opened
3 A repo with unknown revlogv2 requirement string cannot be opened
4
4
5 $ hg init invalidreq
5 $ hg init invalidreq
6 $ cd invalidreq
6 $ cd invalidreq
7 $ echo exp-revlogv2.unknown >> .hg/requires
7 $ echo exp-revlogv2.unknown >> .hg/requires
8 $ hg log
8 $ hg log
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 [255]
11 [255]
12 $ cd ..
12 $ cd ..
13
13
14 Can create and open repo with revlog v2 requirement
14 Can create and open repo with revlog v2 requirement
15
15
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [experimental]
17 > [experimental]
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 > EOF
19 > EOF
20
20
21 $ hg init empty-repo
21 $ hg init empty-repo
22 $ cd empty-repo
22 $ cd empty-repo
23 $ cat .hg/requires
23 $ cat .hg/requires
24 dotencode
24 dotencode
25 exp-revlogv2.2
25 exp-revlogv2.2
26 fncache
26 fncache
27 generaldelta
27 generaldelta
28 persistent-nodemap (rust !)
28 persistent-nodemap (rust !)
29 revlog-compression-zstd (zstd !)
29 revlog-compression-zstd (zstd !)
30 sparserevlog
30 sparserevlog
31 store
31 store
32
32
33 $ hg log
33 $ hg log
34
34
35 Unknown flags to revlog are rejected
35 Unknown flags to revlog are rejected
36
36
37 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
37 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
38 ... fh.write(b'\xff\x00\xde\xad') and None
38 ... fh.write(b'\xff\x00\xde\xad') and None
39
39
40 $ hg log
40 $ hg log
41 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
41 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
42 [50]
42 [50]
43
43
44 $ cd ..
44 $ cd ..
45
45
46 Writing a simple revlog v2 works
46 Writing a simple revlog v2 works
47
47
48 $ hg init simple
48 $ hg init simple
49 $ cd simple
49 $ cd simple
50 $ touch foo
50 $ touch foo
51 $ hg -q commit -A -m initial
51 $ hg -q commit -A -m initial
52
52
53 $ hg log
53 $ hg log
54 changeset: 0:96ee1d7354c4
54 changeset: 0:96ee1d7354c4
55 tag: tip
55 tag: tip
56 user: test
56 user: test
57 date: Thu Jan 01 00:00:00 1970 +0000
57 date: Thu Jan 01 00:00:00 1970 +0000
58 summary: initial
58 summary: initial
59
59
60 Header written as expected
60 Header written as expected
61
61
62 $ f --hexdump --bytes 4 .hg/store/00changelog.i
62 $ f --hexdump --bytes 4 .hg/store/00changelog.i
63 .hg/store/00changelog.i:
63 .hg/store/00changelog.i:
64 0000: 00 01 de ad |....|
64 0000: 00 00 de ad |....|
65
65
66 $ f --hexdump --bytes 4 .hg/store/data/foo.i
66 $ f --hexdump --bytes 4 .hg/store/data/foo.i
67 .hg/store/data/foo.i:
67 .hg/store/data/foo.i:
68 0000: 00 01 de ad |....|
68 0000: 00 00 de ad |....|
General Comments 0
You need to be logged in to leave comments. Login now