##// END OF EJS Templates
revlog: move censoring code in a dedicated module...
marmoute -
r48183:33d62691 default
parent child Browse files
Show More
@@ -1,3535 +1,3463 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 # coding: utf8
3 #
3 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 """Storage back-end for Mercurial.
10
10
11 This provides efficient delta storage with O(1) retrieve and append
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
12 and O(changes) merge between branches.
13 """
13 """
14
14
15 from __future__ import absolute_import
15 from __future__ import absolute_import
16
16
17 import binascii
17 import binascii
18 import collections
18 import collections
19 import contextlib
19 import contextlib
20 import errno
20 import errno
21 import io
21 import io
22 import os
22 import os
23 import struct
23 import struct
24 import zlib
24 import zlib
25
25
26 # import stuff from node for others to import from revlog
26 # import stuff from node for others to import from revlog
27 from .node import (
27 from .node import (
28 bin,
28 bin,
29 hex,
29 hex,
30 nullrev,
30 nullrev,
31 sha1nodeconstants,
31 sha1nodeconstants,
32 short,
32 short,
33 wdirrev,
33 wdirrev,
34 )
34 )
35 from .i18n import _
35 from .i18n import _
36 from .pycompat import getattr
36 from .pycompat import getattr
37 from .revlogutils.constants import (
37 from .revlogutils.constants import (
38 ALL_KINDS,
38 ALL_KINDS,
39 CHANGELOGV2,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
47 KIND_CHANGELOG,
48 REVLOGV0,
48 REVLOGV0,
49 REVLOGV1,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
56 SUPPORTED_FLAGS,
57 )
57 )
58 from .revlogutils.flagutil import (
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
66 )
67 from .thirdparty import attr
67 from .thirdparty import attr
68 from . import (
68 from . import (
69 ancestor,
69 ancestor,
70 dagop,
70 dagop,
71 error,
71 error,
72 mdiff,
72 mdiff,
73 policy,
73 policy,
74 pycompat,
74 pycompat,
75 templatefilters,
75 templatefilters,
76 util,
76 util,
77 )
77 )
78 from .interfaces import (
78 from .interfaces import (
79 repository,
79 repository,
80 util as interfaceutil,
80 util as interfaceutil,
81 )
81 )
82 from .revlogutils import (
82 from .revlogutils import (
83 censor,
83 deltas as deltautil,
84 deltas as deltautil,
84 docket as docketutil,
85 docket as docketutil,
85 flagutil,
86 flagutil,
86 nodemap as nodemaputil,
87 nodemap as nodemaputil,
87 revlogv0,
88 revlogv0,
88 sidedata as sidedatautil,
89 sidedata as sidedatautil,
89 )
90 )
90 from .utils import (
91 from .utils import (
91 storageutil,
92 storageutil,
92 stringutil,
93 stringutil,
93 )
94 )
94
95
95 # blanked usage of all the name to prevent pyflakes constraints
96 # blanked usage of all the name to prevent pyflakes constraints
96 # We need these name available in the module for extensions.
97 # We need these name available in the module for extensions.
97
98
98 REVLOGV0
99 REVLOGV0
99 REVLOGV1
100 REVLOGV1
100 REVLOGV2
101 REVLOGV2
101 FLAG_INLINE_DATA
102 FLAG_INLINE_DATA
102 FLAG_GENERALDELTA
103 FLAG_GENERALDELTA
103 REVLOG_DEFAULT_FLAGS
104 REVLOG_DEFAULT_FLAGS
104 REVLOG_DEFAULT_FORMAT
105 REVLOG_DEFAULT_FORMAT
105 REVLOG_DEFAULT_VERSION
106 REVLOG_DEFAULT_VERSION
106 REVLOGV1_FLAGS
107 REVLOGV1_FLAGS
107 REVLOGV2_FLAGS
108 REVLOGV2_FLAGS
108 REVIDX_ISCENSORED
109 REVIDX_ISCENSORED
109 REVIDX_ELLIPSIS
110 REVIDX_ELLIPSIS
110 REVIDX_HASCOPIESINFO
111 REVIDX_HASCOPIESINFO
111 REVIDX_EXTSTORED
112 REVIDX_EXTSTORED
112 REVIDX_DEFAULT_FLAGS
113 REVIDX_DEFAULT_FLAGS
113 REVIDX_FLAGS_ORDER
114 REVIDX_FLAGS_ORDER
114 REVIDX_RAWTEXT_CHANGING_FLAGS
115 REVIDX_RAWTEXT_CHANGING_FLAGS
115
116
116 parsers = policy.importmod('parsers')
117 parsers = policy.importmod('parsers')
117 rustancestor = policy.importrust('ancestor')
118 rustancestor = policy.importrust('ancestor')
118 rustdagop = policy.importrust('dagop')
119 rustdagop = policy.importrust('dagop')
119 rustrevlog = policy.importrust('revlog')
120 rustrevlog = policy.importrust('revlog')
120
121
121 # Aliased for performance.
122 # Aliased for performance.
122 _zlibdecompress = zlib.decompress
123 _zlibdecompress = zlib.decompress
123
124
124 # max size of revlog with inline data
125 # max size of revlog with inline data
125 _maxinline = 131072
126 _maxinline = 131072
126 _chunksize = 1048576
127 _chunksize = 1048576
127
128
128 # Flag processors for REVIDX_ELLIPSIS.
129 # Flag processors for REVIDX_ELLIPSIS.
129 def ellipsisreadprocessor(rl, text):
130 def ellipsisreadprocessor(rl, text):
130 return text, False
131 return text, False
131
132
132
133
133 def ellipsiswriteprocessor(rl, text):
134 def ellipsiswriteprocessor(rl, text):
134 return text, False
135 return text, False
135
136
136
137
137 def ellipsisrawprocessor(rl, text):
138 def ellipsisrawprocessor(rl, text):
138 return False
139 return False
139
140
140
141
141 ellipsisprocessor = (
142 ellipsisprocessor = (
142 ellipsisreadprocessor,
143 ellipsisreadprocessor,
143 ellipsiswriteprocessor,
144 ellipsiswriteprocessor,
144 ellipsisrawprocessor,
145 ellipsisrawprocessor,
145 )
146 )
146
147
147
148
148 def offset_type(offset, type):
149 def offset_type(offset, type):
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 raise ValueError(b'unknown revlog index flags')
151 raise ValueError(b'unknown revlog index flags')
151 return int(int(offset) << 16 | type)
152 return int(int(offset) << 16 | type)
152
153
153
154
154 def _verify_revision(rl, skipflags, state, node):
155 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
157 point for extensions to influence the operation."""
157 if skipflags:
158 if skipflags:
158 state[b'skipread'].add(node)
159 state[b'skipread'].add(node)
159 else:
160 else:
160 # Side-effect: read content and verify hash.
161 # Side-effect: read content and verify hash.
161 rl.revision(node)
162 rl.revision(node)
162
163
163
164
164 # True if a fast implementation for persistent-nodemap is available
165 # True if a fast implementation for persistent-nodemap is available
165 #
166 #
166 # We also consider we have a "fast" implementation in "pure" python because
167 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
168 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
169 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
171 parsers, 'BaseIndexObject'
171 )
172 )
172
173
173
174
174 @attr.s(slots=True, frozen=True)
175 @attr.s(slots=True, frozen=True)
175 class _revisioninfo(object):
176 class _revisioninfo(object):
176 """Information about a revision that allows building its fulltext
177 """Information about a revision that allows building its fulltext
177 node: expected hash of the revision
178 node: expected hash of the revision
178 p1, p2: parent revs of the revision
179 p1, p2: parent revs of the revision
179 btext: built text cache consisting of a one-element list
180 btext: built text cache consisting of a one-element list
180 cachedelta: (baserev, uncompressed_delta) or None
181 cachedelta: (baserev, uncompressed_delta) or None
181 flags: flags associated to the revision storage
182 flags: flags associated to the revision storage
182
183
183 One of btext[0] or cachedelta must be set.
184 One of btext[0] or cachedelta must be set.
184 """
185 """
185
186
186 node = attr.ib()
187 node = attr.ib()
187 p1 = attr.ib()
188 p1 = attr.ib()
188 p2 = attr.ib()
189 p2 = attr.ib()
189 btext = attr.ib()
190 btext = attr.ib()
190 textlen = attr.ib()
191 textlen = attr.ib()
191 cachedelta = attr.ib()
192 cachedelta = attr.ib()
192 flags = attr.ib()
193 flags = attr.ib()
193
194
194
195
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
197 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
198 class revlogrevisiondelta(object):
198 node = attr.ib()
199 node = attr.ib()
199 p1node = attr.ib()
200 p1node = attr.ib()
200 p2node = attr.ib()
201 p2node = attr.ib()
201 basenode = attr.ib()
202 basenode = attr.ib()
202 flags = attr.ib()
203 flags = attr.ib()
203 baserevisionsize = attr.ib()
204 baserevisionsize = attr.ib()
204 revision = attr.ib()
205 revision = attr.ib()
205 delta = attr.ib()
206 delta = attr.ib()
206 sidedata = attr.ib()
207 sidedata = attr.ib()
207 protocol_flags = attr.ib()
208 protocol_flags = attr.ib()
208 linknode = attr.ib(default=None)
209 linknode = attr.ib(default=None)
209
210
210
211
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
213 @attr.s(frozen=True)
213 class revlogproblem(object):
214 class revlogproblem(object):
214 warning = attr.ib(default=None)
215 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
216 error = attr.ib(default=None)
216 node = attr.ib(default=None)
217 node = attr.ib(default=None)
217
218
218
219
219 def parse_index_v1(data, inline):
220 def parse_index_v1(data, inline):
220 # call the C implementation to parse the index data
221 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline)
222 index, cache = parsers.parse_index2(data, inline)
222 return index, cache
223 return index, cache
223
224
224
225
225 def parse_index_v2(data, inline):
226 def parse_index_v2(data, inline):
226 # call the C implementation to parse the index data
227 # call the C implementation to parse the index data
227 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
228 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
228 return index, cache
229 return index, cache
229
230
230
231
231 def parse_index_cl_v2(data, inline):
232 def parse_index_cl_v2(data, inline):
232 # call the C implementation to parse the index data
233 # call the C implementation to parse the index data
233 assert not inline
234 assert not inline
234 from .pure.parsers import parse_index_cl_v2
235 from .pure.parsers import parse_index_cl_v2
235
236
236 index, cache = parse_index_cl_v2(data)
237 index, cache = parse_index_cl_v2(data)
237 return index, cache
238 return index, cache
238
239
239
240
240 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
241 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
241
242
242 def parse_index_v1_nodemap(data, inline):
243 def parse_index_v1_nodemap(data, inline):
243 index, cache = parsers.parse_index_devel_nodemap(data, inline)
244 index, cache = parsers.parse_index_devel_nodemap(data, inline)
244 return index, cache
245 return index, cache
245
246
246
247
247 else:
248 else:
248 parse_index_v1_nodemap = None
249 parse_index_v1_nodemap = None
249
250
250
251
251 def parse_index_v1_mixed(data, inline):
252 def parse_index_v1_mixed(data, inline):
252 index, cache = parse_index_v1(data, inline)
253 index, cache = parse_index_v1(data, inline)
253 return rustrevlog.MixedIndex(index), cache
254 return rustrevlog.MixedIndex(index), cache
254
255
255
256
256 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
257 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
257 # signed integer)
258 # signed integer)
258 _maxentrysize = 0x7FFFFFFF
259 _maxentrysize = 0x7FFFFFFF
259
260
260 PARTIAL_READ_MSG = _(
261 PARTIAL_READ_MSG = _(
261 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
262 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
262 )
263 )
263
264
264 FILE_TOO_SHORT_MSG = _(
265 FILE_TOO_SHORT_MSG = _(
265 b'cannot read from revlog %s;'
266 b'cannot read from revlog %s;'
266 b' expected %d bytes from offset %d, data size is %d'
267 b' expected %d bytes from offset %d, data size is %d'
267 )
268 )
268
269
269
270
270 class revlog(object):
271 class revlog(object):
271 """
272 """
272 the underlying revision storage object
273 the underlying revision storage object
273
274
274 A revlog consists of two parts, an index and the revision data.
275 A revlog consists of two parts, an index and the revision data.
275
276
276 The index is a file with a fixed record size containing
277 The index is a file with a fixed record size containing
277 information on each revision, including its nodeid (hash), the
278 information on each revision, including its nodeid (hash), the
278 nodeids of its parents, the position and offset of its data within
279 nodeids of its parents, the position and offset of its data within
279 the data file, and the revision it's based on. Finally, each entry
280 the data file, and the revision it's based on. Finally, each entry
280 contains a linkrev entry that can serve as a pointer to external
281 contains a linkrev entry that can serve as a pointer to external
281 data.
282 data.
282
283
283 The revision data itself is a linear collection of data chunks.
284 The revision data itself is a linear collection of data chunks.
284 Each chunk represents a revision and is usually represented as a
285 Each chunk represents a revision and is usually represented as a
285 delta against the previous chunk. To bound lookup time, runs of
286 delta against the previous chunk. To bound lookup time, runs of
286 deltas are limited to about 2 times the length of the original
287 deltas are limited to about 2 times the length of the original
287 version data. This makes retrieval of a version proportional to
288 version data. This makes retrieval of a version proportional to
288 its size, or O(1) relative to the number of revisions.
289 its size, or O(1) relative to the number of revisions.
289
290
290 Both pieces of the revlog are written to in an append-only
291 Both pieces of the revlog are written to in an append-only
291 fashion, which means we never need to rewrite a file to insert or
292 fashion, which means we never need to rewrite a file to insert or
292 remove data, and can use some simple techniques to avoid the need
293 remove data, and can use some simple techniques to avoid the need
293 for locking while reading.
294 for locking while reading.
294
295
295 If checkambig, indexfile is opened with checkambig=True at
296 If checkambig, indexfile is opened with checkambig=True at
296 writing, to avoid file stat ambiguity.
297 writing, to avoid file stat ambiguity.
297
298
298 If mmaplargeindex is True, and an mmapindexthreshold is set, the
299 If mmaplargeindex is True, and an mmapindexthreshold is set, the
299 index will be mmapped rather than read if it is larger than the
300 index will be mmapped rather than read if it is larger than the
300 configured threshold.
301 configured threshold.
301
302
302 If censorable is True, the revlog can have censored revisions.
303 If censorable is True, the revlog can have censored revisions.
303
304
304 If `upperboundcomp` is not None, this is the expected maximal gain from
305 If `upperboundcomp` is not None, this is the expected maximal gain from
305 compression for the data content.
306 compression for the data content.
306
307
307 `concurrencychecker` is an optional function that receives 3 arguments: a
308 `concurrencychecker` is an optional function that receives 3 arguments: a
308 file handle, a filename, and an expected position. It should check whether
309 file handle, a filename, and an expected position. It should check whether
309 the current position in the file handle is valid, and log/warn/fail (by
310 the current position in the file handle is valid, and log/warn/fail (by
310 raising).
311 raising).
311
312
312
313
313 Internal details
314 Internal details
314 ----------------
315 ----------------
315
316
316 A large part of the revlog logic deals with revisions' "index entries", tuple
317 A large part of the revlog logic deals with revisions' "index entries", tuple
317 objects that contains the same "items" whatever the revlog version.
318 objects that contains the same "items" whatever the revlog version.
318 Different versions will have different ways of storing these items (sometimes
319 Different versions will have different ways of storing these items (sometimes
319 not having them at all), but the tuple will always be the same. New fields
320 not having them at all), but the tuple will always be the same. New fields
320 are usually added at the end to avoid breaking existing code that relies
321 are usually added at the end to avoid breaking existing code that relies
321 on the existing order. The field are defined as follows:
322 on the existing order. The field are defined as follows:
322
323
323 [0] offset:
324 [0] offset:
324 The byte index of the start of revision data chunk.
325 The byte index of the start of revision data chunk.
325 That value is shifted up by 16 bits. use "offset = field >> 16" to
326 That value is shifted up by 16 bits. use "offset = field >> 16" to
326 retrieve it.
327 retrieve it.
327
328
328 flags:
329 flags:
329 A flag field that carries special information or changes the behavior
330 A flag field that carries special information or changes the behavior
330 of the revision. (see `REVIDX_*` constants for details)
331 of the revision. (see `REVIDX_*` constants for details)
331 The flag field only occupies the first 16 bits of this field,
332 The flag field only occupies the first 16 bits of this field,
332 use "flags = field & 0xFFFF" to retrieve the value.
333 use "flags = field & 0xFFFF" to retrieve the value.
333
334
334 [1] compressed length:
335 [1] compressed length:
335 The size, in bytes, of the chunk on disk
336 The size, in bytes, of the chunk on disk
336
337
337 [2] uncompressed length:
338 [2] uncompressed length:
338 The size, in bytes, of the full revision once reconstructed.
339 The size, in bytes, of the full revision once reconstructed.
339
340
340 [3] base rev:
341 [3] base rev:
341 Either the base of the revision delta chain (without general
342 Either the base of the revision delta chain (without general
342 delta), or the base of the delta (stored in the data chunk)
343 delta), or the base of the delta (stored in the data chunk)
343 with general delta.
344 with general delta.
344
345
345 [4] link rev:
346 [4] link rev:
346 Changelog revision number of the changeset introducing this
347 Changelog revision number of the changeset introducing this
347 revision.
348 revision.
348
349
349 [5] parent 1 rev:
350 [5] parent 1 rev:
350 Revision number of the first parent
351 Revision number of the first parent
351
352
352 [6] parent 2 rev:
353 [6] parent 2 rev:
353 Revision number of the second parent
354 Revision number of the second parent
354
355
355 [7] node id:
356 [7] node id:
356 The node id of the current revision
357 The node id of the current revision
357
358
358 [8] sidedata offset:
359 [8] sidedata offset:
359 The byte index of the start of the revision's side-data chunk.
360 The byte index of the start of the revision's side-data chunk.
360
361
361 [9] sidedata chunk length:
362 [9] sidedata chunk length:
362 The size, in bytes, of the revision's side-data chunk.
363 The size, in bytes, of the revision's side-data chunk.
363
364
364 [10] data compression mode:
365 [10] data compression mode:
365 two bits that detail the way the data chunk is compressed on disk.
366 two bits that detail the way the data chunk is compressed on disk.
366 (see "COMP_MODE_*" constants for details). For revlog version 0 and
367 (see "COMP_MODE_*" constants for details). For revlog version 0 and
367 1 this will always be COMP_MODE_INLINE.
368 1 this will always be COMP_MODE_INLINE.
368
369
369 [11] side-data compression mode:
370 [11] side-data compression mode:
370 two bits that detail the way the sidedata chunk is compressed on disk.
371 two bits that detail the way the sidedata chunk is compressed on disk.
371 (see "COMP_MODE_*" constants for details)
372 (see "COMP_MODE_*" constants for details)
372 """
373 """
373
374
374 _flagserrorclass = error.RevlogError
375 _flagserrorclass = error.RevlogError
375
376
376 def __init__(
377 def __init__(
377 self,
378 self,
378 opener,
379 opener,
379 target,
380 target,
380 radix,
381 radix,
381 postfix=None, # only exist for `tmpcensored` now
382 postfix=None, # only exist for `tmpcensored` now
382 checkambig=False,
383 checkambig=False,
383 mmaplargeindex=False,
384 mmaplargeindex=False,
384 censorable=False,
385 censorable=False,
385 upperboundcomp=None,
386 upperboundcomp=None,
386 persistentnodemap=False,
387 persistentnodemap=False,
387 concurrencychecker=None,
388 concurrencychecker=None,
388 trypending=False,
389 trypending=False,
389 ):
390 ):
390 """
391 """
391 create a revlog object
392 create a revlog object
392
393
393 opener is a function that abstracts the file opening operation
394 opener is a function that abstracts the file opening operation
394 and can be used to implement COW semantics or the like.
395 and can be used to implement COW semantics or the like.
395
396
396 `target`: a (KIND, ID) tuple that identify the content stored in
397 `target`: a (KIND, ID) tuple that identify the content stored in
397 this revlog. It help the rest of the code to understand what the revlog
398 this revlog. It help the rest of the code to understand what the revlog
398 is about without having to resort to heuristic and index filename
399 is about without having to resort to heuristic and index filename
399 analysis. Note: that this must be reliably be set by normal code, but
400 analysis. Note: that this must be reliably be set by normal code, but
400 that test, debug, or performance measurement code might not set this to
401 that test, debug, or performance measurement code might not set this to
401 accurate value.
402 accurate value.
402 """
403 """
403 self.upperboundcomp = upperboundcomp
404 self.upperboundcomp = upperboundcomp
404
405
405 self.radix = radix
406 self.radix = radix
406
407
407 self._docket_file = None
408 self._docket_file = None
408 self._indexfile = None
409 self._indexfile = None
409 self._datafile = None
410 self._datafile = None
410 self._sidedatafile = None
411 self._sidedatafile = None
411 self._nodemap_file = None
412 self._nodemap_file = None
412 self.postfix = postfix
413 self.postfix = postfix
413 self._trypending = trypending
414 self._trypending = trypending
414 self.opener = opener
415 self.opener = opener
415 if persistentnodemap:
416 if persistentnodemap:
416 self._nodemap_file = nodemaputil.get_nodemap_file(self)
417 self._nodemap_file = nodemaputil.get_nodemap_file(self)
417
418
418 assert target[0] in ALL_KINDS
419 assert target[0] in ALL_KINDS
419 assert len(target) == 2
420 assert len(target) == 2
420 self.target = target
421 self.target = target
421 # When True, indexfile is opened with checkambig=True at writing, to
422 # When True, indexfile is opened with checkambig=True at writing, to
422 # avoid file stat ambiguity.
423 # avoid file stat ambiguity.
423 self._checkambig = checkambig
424 self._checkambig = checkambig
424 self._mmaplargeindex = mmaplargeindex
425 self._mmaplargeindex = mmaplargeindex
425 self._censorable = censorable
426 self._censorable = censorable
426 # 3-tuple of (node, rev, text) for a raw revision.
427 # 3-tuple of (node, rev, text) for a raw revision.
427 self._revisioncache = None
428 self._revisioncache = None
428 # Maps rev to chain base rev.
429 # Maps rev to chain base rev.
429 self._chainbasecache = util.lrucachedict(100)
430 self._chainbasecache = util.lrucachedict(100)
430 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
431 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
431 self._chunkcache = (0, b'')
432 self._chunkcache = (0, b'')
432 # How much data to read and cache into the raw revlog data cache.
433 # How much data to read and cache into the raw revlog data cache.
433 self._chunkcachesize = 65536
434 self._chunkcachesize = 65536
434 self._maxchainlen = None
435 self._maxchainlen = None
435 self._deltabothparents = True
436 self._deltabothparents = True
436 self.index = None
437 self.index = None
437 self._docket = None
438 self._docket = None
438 self._nodemap_docket = None
439 self._nodemap_docket = None
439 # Mapping of partial identifiers to full nodes.
440 # Mapping of partial identifiers to full nodes.
440 self._pcache = {}
441 self._pcache = {}
441 # Mapping of revision integer to full node.
442 # Mapping of revision integer to full node.
442 self._compengine = b'zlib'
443 self._compengine = b'zlib'
443 self._compengineopts = {}
444 self._compengineopts = {}
444 self._maxdeltachainspan = -1
445 self._maxdeltachainspan = -1
445 self._withsparseread = False
446 self._withsparseread = False
446 self._sparserevlog = False
447 self._sparserevlog = False
447 self.hassidedata = False
448 self.hassidedata = False
448 self._srdensitythreshold = 0.50
449 self._srdensitythreshold = 0.50
449 self._srmingapsize = 262144
450 self._srmingapsize = 262144
450
451
451 # Make copy of flag processors so each revlog instance can support
452 # Make copy of flag processors so each revlog instance can support
452 # custom flags.
453 # custom flags.
453 self._flagprocessors = dict(flagutil.flagprocessors)
454 self._flagprocessors = dict(flagutil.flagprocessors)
454
455
455 # 3-tuple of file handles being used for active writing.
456 # 3-tuple of file handles being used for active writing.
456 self._writinghandles = None
457 self._writinghandles = None
457 # prevent nesting of addgroup
458 # prevent nesting of addgroup
458 self._adding_group = None
459 self._adding_group = None
459
460
460 self._loadindex()
461 self._loadindex()
461
462
462 self._concurrencychecker = concurrencychecker
463 self._concurrencychecker = concurrencychecker
463
464
464 def _init_opts(self):
465 def _init_opts(self):
465 """process options (from above/config) to setup associated default revlog mode
466 """process options (from above/config) to setup associated default revlog mode
466
467
467 These values might be affected when actually reading on disk information.
468 These values might be affected when actually reading on disk information.
468
469
469 The relevant values are returned for use in _loadindex().
470 The relevant values are returned for use in _loadindex().
470
471
471 * newversionflags:
472 * newversionflags:
472 version header to use if we need to create a new revlog
473 version header to use if we need to create a new revlog
473
474
474 * mmapindexthreshold:
475 * mmapindexthreshold:
475 minimal index size for start to use mmap
476 minimal index size for start to use mmap
476
477
477 * force_nodemap:
478 * force_nodemap:
478 force the usage of a "development" version of the nodemap code
479 force the usage of a "development" version of the nodemap code
479 """
480 """
480 mmapindexthreshold = None
481 mmapindexthreshold = None
481 opts = self.opener.options
482 opts = self.opener.options
482
483
483 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
484 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
484 new_header = CHANGELOGV2
485 new_header = CHANGELOGV2
485 elif b'revlogv2' in opts:
486 elif b'revlogv2' in opts:
486 new_header = REVLOGV2
487 new_header = REVLOGV2
487 elif b'revlogv1' in opts:
488 elif b'revlogv1' in opts:
488 new_header = REVLOGV1 | FLAG_INLINE_DATA
489 new_header = REVLOGV1 | FLAG_INLINE_DATA
489 if b'generaldelta' in opts:
490 if b'generaldelta' in opts:
490 new_header |= FLAG_GENERALDELTA
491 new_header |= FLAG_GENERALDELTA
491 elif b'revlogv0' in self.opener.options:
492 elif b'revlogv0' in self.opener.options:
492 new_header = REVLOGV0
493 new_header = REVLOGV0
493 else:
494 else:
494 new_header = REVLOG_DEFAULT_VERSION
495 new_header = REVLOG_DEFAULT_VERSION
495
496
496 if b'chunkcachesize' in opts:
497 if b'chunkcachesize' in opts:
497 self._chunkcachesize = opts[b'chunkcachesize']
498 self._chunkcachesize = opts[b'chunkcachesize']
498 if b'maxchainlen' in opts:
499 if b'maxchainlen' in opts:
499 self._maxchainlen = opts[b'maxchainlen']
500 self._maxchainlen = opts[b'maxchainlen']
500 if b'deltabothparents' in opts:
501 if b'deltabothparents' in opts:
501 self._deltabothparents = opts[b'deltabothparents']
502 self._deltabothparents = opts[b'deltabothparents']
502 self._lazydelta = bool(opts.get(b'lazydelta', True))
503 self._lazydelta = bool(opts.get(b'lazydelta', True))
503 self._lazydeltabase = False
504 self._lazydeltabase = False
504 if self._lazydelta:
505 if self._lazydelta:
505 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
506 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
506 if b'compengine' in opts:
507 if b'compengine' in opts:
507 self._compengine = opts[b'compengine']
508 self._compengine = opts[b'compengine']
508 if b'zlib.level' in opts:
509 if b'zlib.level' in opts:
509 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
510 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
510 if b'zstd.level' in opts:
511 if b'zstd.level' in opts:
511 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
512 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
512 if b'maxdeltachainspan' in opts:
513 if b'maxdeltachainspan' in opts:
513 self._maxdeltachainspan = opts[b'maxdeltachainspan']
514 self._maxdeltachainspan = opts[b'maxdeltachainspan']
514 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
515 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
515 mmapindexthreshold = opts[b'mmapindexthreshold']
516 mmapindexthreshold = opts[b'mmapindexthreshold']
516 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 # sparse-revlog forces sparse-read
519 # sparse-revlog forces sparse-read
519 self._withsparseread = self._sparserevlog or withsparseread
520 self._withsparseread = self._sparserevlog or withsparseread
520 if b'sparse-read-density-threshold' in opts:
521 if b'sparse-read-density-threshold' in opts:
521 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 if b'sparse-read-min-gap-size' in opts:
523 if b'sparse-read-min-gap-size' in opts:
523 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 if opts.get(b'enableellipsis'):
525 if opts.get(b'enableellipsis'):
525 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526
527
527 # revlog v0 doesn't have flag processors
528 # revlog v0 doesn't have flag processors
528 for flag, processor in pycompat.iteritems(
529 for flag, processor in pycompat.iteritems(
529 opts.get(b'flagprocessors', {})
530 opts.get(b'flagprocessors', {})
530 ):
531 ):
531 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532
533
533 if self._chunkcachesize <= 0:
534 if self._chunkcachesize <= 0:
534 raise error.RevlogError(
535 raise error.RevlogError(
535 _(b'revlog chunk cache size %r is not greater than 0')
536 _(b'revlog chunk cache size %r is not greater than 0')
536 % self._chunkcachesize
537 % self._chunkcachesize
537 )
538 )
538 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 raise error.RevlogError(
540 raise error.RevlogError(
540 _(b'revlog chunk cache size %r is not a power of 2')
541 _(b'revlog chunk cache size %r is not a power of 2')
541 % self._chunkcachesize
542 % self._chunkcachesize
542 )
543 )
543 force_nodemap = opts.get(b'devel-force-nodemap', False)
544 force_nodemap = opts.get(b'devel-force-nodemap', False)
544 return new_header, mmapindexthreshold, force_nodemap
545 return new_header, mmapindexthreshold, force_nodemap
545
546
546 def _get_data(self, filepath, mmap_threshold, size=None):
547 def _get_data(self, filepath, mmap_threshold, size=None):
547 """return a file content with or without mmap
548 """return a file content with or without mmap
548
549
549 If the file is missing return the empty string"""
550 If the file is missing return the empty string"""
550 try:
551 try:
551 with self.opener(filepath) as fp:
552 with self.opener(filepath) as fp:
552 if mmap_threshold is not None:
553 if mmap_threshold is not None:
553 file_size = self.opener.fstat(fp).st_size
554 file_size = self.opener.fstat(fp).st_size
554 if file_size >= mmap_threshold:
555 if file_size >= mmap_threshold:
555 if size is not None:
556 if size is not None:
556 # avoid potentiel mmap crash
557 # avoid potentiel mmap crash
557 size = min(file_size, size)
558 size = min(file_size, size)
558 # TODO: should .close() to release resources without
559 # TODO: should .close() to release resources without
559 # relying on Python GC
560 # relying on Python GC
560 if size is None:
561 if size is None:
561 return util.buffer(util.mmapread(fp))
562 return util.buffer(util.mmapread(fp))
562 else:
563 else:
563 return util.buffer(util.mmapread(fp, size))
564 return util.buffer(util.mmapread(fp, size))
564 if size is None:
565 if size is None:
565 return fp.read()
566 return fp.read()
566 else:
567 else:
567 return fp.read(size)
568 return fp.read(size)
568 except IOError as inst:
569 except IOError as inst:
569 if inst.errno != errno.ENOENT:
570 if inst.errno != errno.ENOENT:
570 raise
571 raise
571 return b''
572 return b''
572
573
573 def _loadindex(self):
574 def _loadindex(self):
574
575
575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
576 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
576
577
577 if self.postfix is not None:
578 if self.postfix is not None:
578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
579 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
580 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
580 entry_point = b'%s.i.a' % self.radix
581 entry_point = b'%s.i.a' % self.radix
581 else:
582 else:
582 entry_point = b'%s.i' % self.radix
583 entry_point = b'%s.i' % self.radix
583
584
584 entry_data = b''
585 entry_data = b''
585 self._initempty = True
586 self._initempty = True
586 entry_data = self._get_data(entry_point, mmapindexthreshold)
587 entry_data = self._get_data(entry_point, mmapindexthreshold)
587 if len(entry_data) > 0:
588 if len(entry_data) > 0:
588 header = INDEX_HEADER.unpack(entry_data[:4])[0]
589 header = INDEX_HEADER.unpack(entry_data[:4])[0]
589 self._initempty = False
590 self._initempty = False
590 else:
591 else:
591 header = new_header
592 header = new_header
592
593
593 self._format_flags = header & ~0xFFFF
594 self._format_flags = header & ~0xFFFF
594 self._format_version = header & 0xFFFF
595 self._format_version = header & 0xFFFF
595
596
596 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
597 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
597 if supported_flags is None:
598 if supported_flags is None:
598 msg = _(b'unknown version (%d) in revlog %s')
599 msg = _(b'unknown version (%d) in revlog %s')
599 msg %= (self._format_version, self.display_id)
600 msg %= (self._format_version, self.display_id)
600 raise error.RevlogError(msg)
601 raise error.RevlogError(msg)
601 elif self._format_flags & ~supported_flags:
602 elif self._format_flags & ~supported_flags:
602 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
603 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
603 display_flag = self._format_flags >> 16
604 display_flag = self._format_flags >> 16
604 msg %= (display_flag, self._format_version, self.display_id)
605 msg %= (display_flag, self._format_version, self.display_id)
605 raise error.RevlogError(msg)
606 raise error.RevlogError(msg)
606
607
607 features = FEATURES_BY_VERSION[self._format_version]
608 features = FEATURES_BY_VERSION[self._format_version]
608 self._inline = features[b'inline'](self._format_flags)
609 self._inline = features[b'inline'](self._format_flags)
609 self._generaldelta = features[b'generaldelta'](self._format_flags)
610 self._generaldelta = features[b'generaldelta'](self._format_flags)
610 self.hassidedata = features[b'sidedata']
611 self.hassidedata = features[b'sidedata']
611
612
612 if not features[b'docket']:
613 if not features[b'docket']:
613 self._indexfile = entry_point
614 self._indexfile = entry_point
614 index_data = entry_data
615 index_data = entry_data
615 else:
616 else:
616 self._docket_file = entry_point
617 self._docket_file = entry_point
617 if self._initempty:
618 if self._initempty:
618 self._docket = docketutil.default_docket(self, header)
619 self._docket = docketutil.default_docket(self, header)
619 else:
620 else:
620 self._docket = docketutil.parse_docket(
621 self._docket = docketutil.parse_docket(
621 self, entry_data, use_pending=self._trypending
622 self, entry_data, use_pending=self._trypending
622 )
623 )
623 self._indexfile = self._docket.index_filepath()
624 self._indexfile = self._docket.index_filepath()
624 index_data = b''
625 index_data = b''
625 index_size = self._docket.index_end
626 index_size = self._docket.index_end
626 if index_size > 0:
627 if index_size > 0:
627 index_data = self._get_data(
628 index_data = self._get_data(
628 self._indexfile, mmapindexthreshold, size=index_size
629 self._indexfile, mmapindexthreshold, size=index_size
629 )
630 )
630 if len(index_data) < index_size:
631 if len(index_data) < index_size:
631 msg = _(b'too few index data for %s: got %d, expected %d')
632 msg = _(b'too few index data for %s: got %d, expected %d')
632 msg %= (self.display_id, len(index_data), index_size)
633 msg %= (self.display_id, len(index_data), index_size)
633 raise error.RevlogError(msg)
634 raise error.RevlogError(msg)
634
635
635 self._inline = False
636 self._inline = False
636 # generaldelta implied by version 2 revlogs.
637 # generaldelta implied by version 2 revlogs.
637 self._generaldelta = True
638 self._generaldelta = True
638 # the logic for persistent nodemap will be dealt with within the
639 # the logic for persistent nodemap will be dealt with within the
639 # main docket, so disable it for now.
640 # main docket, so disable it for now.
640 self._nodemap_file = None
641 self._nodemap_file = None
641
642
642 if self._docket is not None:
643 if self._docket is not None:
643 self._datafile = self._docket.data_filepath()
644 self._datafile = self._docket.data_filepath()
644 self._sidedatafile = self._docket.sidedata_filepath()
645 self._sidedatafile = self._docket.sidedata_filepath()
645 elif self.postfix is None:
646 elif self.postfix is None:
646 self._datafile = b'%s.d' % self.radix
647 self._datafile = b'%s.d' % self.radix
647 else:
648 else:
648 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
649 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
649
650
650 self.nodeconstants = sha1nodeconstants
651 self.nodeconstants = sha1nodeconstants
651 self.nullid = self.nodeconstants.nullid
652 self.nullid = self.nodeconstants.nullid
652
653
653 # sparse-revlog can't be on without general-delta (issue6056)
654 # sparse-revlog can't be on without general-delta (issue6056)
654 if not self._generaldelta:
655 if not self._generaldelta:
655 self._sparserevlog = False
656 self._sparserevlog = False
656
657
657 self._storedeltachains = True
658 self._storedeltachains = True
658
659
659 devel_nodemap = (
660 devel_nodemap = (
660 self._nodemap_file
661 self._nodemap_file
661 and force_nodemap
662 and force_nodemap
662 and parse_index_v1_nodemap is not None
663 and parse_index_v1_nodemap is not None
663 )
664 )
664
665
665 use_rust_index = False
666 use_rust_index = False
666 if rustrevlog is not None:
667 if rustrevlog is not None:
667 if self._nodemap_file is not None:
668 if self._nodemap_file is not None:
668 use_rust_index = True
669 use_rust_index = True
669 else:
670 else:
670 use_rust_index = self.opener.options.get(b'rust.index')
671 use_rust_index = self.opener.options.get(b'rust.index')
671
672
672 self._parse_index = parse_index_v1
673 self._parse_index = parse_index_v1
673 if self._format_version == REVLOGV0:
674 if self._format_version == REVLOGV0:
674 self._parse_index = revlogv0.parse_index_v0
675 self._parse_index = revlogv0.parse_index_v0
675 elif self._format_version == REVLOGV2:
676 elif self._format_version == REVLOGV2:
676 self._parse_index = parse_index_v2
677 self._parse_index = parse_index_v2
677 elif self._format_version == CHANGELOGV2:
678 elif self._format_version == CHANGELOGV2:
678 self._parse_index = parse_index_cl_v2
679 self._parse_index = parse_index_cl_v2
679 elif devel_nodemap:
680 elif devel_nodemap:
680 self._parse_index = parse_index_v1_nodemap
681 self._parse_index = parse_index_v1_nodemap
681 elif use_rust_index:
682 elif use_rust_index:
682 self._parse_index = parse_index_v1_mixed
683 self._parse_index = parse_index_v1_mixed
683 try:
684 try:
684 d = self._parse_index(index_data, self._inline)
685 d = self._parse_index(index_data, self._inline)
685 index, _chunkcache = d
686 index, _chunkcache = d
686 use_nodemap = (
687 use_nodemap = (
687 not self._inline
688 not self._inline
688 and self._nodemap_file is not None
689 and self._nodemap_file is not None
689 and util.safehasattr(index, 'update_nodemap_data')
690 and util.safehasattr(index, 'update_nodemap_data')
690 )
691 )
691 if use_nodemap:
692 if use_nodemap:
692 nodemap_data = nodemaputil.persisted_data(self)
693 nodemap_data = nodemaputil.persisted_data(self)
693 if nodemap_data is not None:
694 if nodemap_data is not None:
694 docket = nodemap_data[0]
695 docket = nodemap_data[0]
695 if (
696 if (
696 len(d[0]) > docket.tip_rev
697 len(d[0]) > docket.tip_rev
697 and d[0][docket.tip_rev][7] == docket.tip_node
698 and d[0][docket.tip_rev][7] == docket.tip_node
698 ):
699 ):
699 # no changelog tampering
700 # no changelog tampering
700 self._nodemap_docket = docket
701 self._nodemap_docket = docket
701 index.update_nodemap_data(*nodemap_data)
702 index.update_nodemap_data(*nodemap_data)
702 except (ValueError, IndexError):
703 except (ValueError, IndexError):
703 raise error.RevlogError(
704 raise error.RevlogError(
704 _(b"index %s is corrupted") % self.display_id
705 _(b"index %s is corrupted") % self.display_id
705 )
706 )
706 self.index, self._chunkcache = d
707 self.index, self._chunkcache = d
707 if not self._chunkcache:
708 if not self._chunkcache:
708 self._chunkclear()
709 self._chunkclear()
709 # revnum -> (chain-length, sum-delta-length)
710 # revnum -> (chain-length, sum-delta-length)
710 self._chaininfocache = util.lrucachedict(500)
711 self._chaininfocache = util.lrucachedict(500)
711 # revlog header -> revlog compressor
712 # revlog header -> revlog compressor
712 self._decompressors = {}
713 self._decompressors = {}
713
714
714 @util.propertycache
715 @util.propertycache
715 def revlog_kind(self):
716 def revlog_kind(self):
716 return self.target[0]
717 return self.target[0]
717
718
718 @util.propertycache
719 @util.propertycache
719 def display_id(self):
720 def display_id(self):
720 """The public facing "ID" of the revlog that we use in message"""
721 """The public facing "ID" of the revlog that we use in message"""
721 # Maybe we should build a user facing representation of
722 # Maybe we should build a user facing representation of
722 # revlog.target instead of using `self.radix`
723 # revlog.target instead of using `self.radix`
723 return self.radix
724 return self.radix
724
725
725 def _get_decompressor(self, t):
726 def _get_decompressor(self, t):
726 try:
727 try:
727 compressor = self._decompressors[t]
728 compressor = self._decompressors[t]
728 except KeyError:
729 except KeyError:
729 try:
730 try:
730 engine = util.compengines.forrevlogheader(t)
731 engine = util.compengines.forrevlogheader(t)
731 compressor = engine.revlogcompressor(self._compengineopts)
732 compressor = engine.revlogcompressor(self._compengineopts)
732 self._decompressors[t] = compressor
733 self._decompressors[t] = compressor
733 except KeyError:
734 except KeyError:
734 raise error.RevlogError(
735 raise error.RevlogError(
735 _(b'unknown compression type %s') % binascii.hexlify(t)
736 _(b'unknown compression type %s') % binascii.hexlify(t)
736 )
737 )
737 return compressor
738 return compressor
738
739
739 @util.propertycache
740 @util.propertycache
740 def _compressor(self):
741 def _compressor(self):
741 engine = util.compengines[self._compengine]
742 engine = util.compengines[self._compengine]
742 return engine.revlogcompressor(self._compengineopts)
743 return engine.revlogcompressor(self._compengineopts)
743
744
744 @util.propertycache
745 @util.propertycache
745 def _decompressor(self):
746 def _decompressor(self):
746 """the default decompressor"""
747 """the default decompressor"""
747 if self._docket is None:
748 if self._docket is None:
748 return None
749 return None
749 t = self._docket.default_compression_header
750 t = self._docket.default_compression_header
750 c = self._get_decompressor(t)
751 c = self._get_decompressor(t)
751 return c.decompress
752 return c.decompress
752
753
753 def _indexfp(self):
754 def _indexfp(self):
754 """file object for the revlog's index file"""
755 """file object for the revlog's index file"""
755 return self.opener(self._indexfile, mode=b"r")
756 return self.opener(self._indexfile, mode=b"r")
756
757
757 def __index_write_fp(self):
758 def __index_write_fp(self):
758 # You should not use this directly and use `_writing` instead
759 # You should not use this directly and use `_writing` instead
759 try:
760 try:
760 f = self.opener(
761 f = self.opener(
761 self._indexfile, mode=b"r+", checkambig=self._checkambig
762 self._indexfile, mode=b"r+", checkambig=self._checkambig
762 )
763 )
763 if self._docket is None:
764 if self._docket is None:
764 f.seek(0, os.SEEK_END)
765 f.seek(0, os.SEEK_END)
765 else:
766 else:
766 f.seek(self._docket.index_end, os.SEEK_SET)
767 f.seek(self._docket.index_end, os.SEEK_SET)
767 return f
768 return f
768 except IOError as inst:
769 except IOError as inst:
769 if inst.errno != errno.ENOENT:
770 if inst.errno != errno.ENOENT:
770 raise
771 raise
771 return self.opener(
772 return self.opener(
772 self._indexfile, mode=b"w+", checkambig=self._checkambig
773 self._indexfile, mode=b"w+", checkambig=self._checkambig
773 )
774 )
774
775
775 def __index_new_fp(self):
776 def __index_new_fp(self):
776 # You should not use this unless you are upgrading from inline revlog
777 # You should not use this unless you are upgrading from inline revlog
777 return self.opener(
778 return self.opener(
778 self._indexfile,
779 self._indexfile,
779 mode=b"w",
780 mode=b"w",
780 checkambig=self._checkambig,
781 checkambig=self._checkambig,
781 atomictemp=True,
782 atomictemp=True,
782 )
783 )
783
784
784 def _datafp(self, mode=b'r'):
785 def _datafp(self, mode=b'r'):
785 """file object for the revlog's data file"""
786 """file object for the revlog's data file"""
786 return self.opener(self._datafile, mode=mode)
787 return self.opener(self._datafile, mode=mode)
787
788
788 @contextlib.contextmanager
789 @contextlib.contextmanager
789 def _datareadfp(self, existingfp=None):
790 def _datareadfp(self, existingfp=None):
790 """file object suitable to read data"""
791 """file object suitable to read data"""
791 # Use explicit file handle, if given.
792 # Use explicit file handle, if given.
792 if existingfp is not None:
793 if existingfp is not None:
793 yield existingfp
794 yield existingfp
794
795
795 # Use a file handle being actively used for writes, if available.
796 # Use a file handle being actively used for writes, if available.
796 # There is some danger to doing this because reads will seek the
797 # There is some danger to doing this because reads will seek the
797 # file. However, _writeentry() performs a SEEK_END before all writes,
798 # file. However, _writeentry() performs a SEEK_END before all writes,
798 # so we should be safe.
799 # so we should be safe.
799 elif self._writinghandles:
800 elif self._writinghandles:
800 if self._inline:
801 if self._inline:
801 yield self._writinghandles[0]
802 yield self._writinghandles[0]
802 else:
803 else:
803 yield self._writinghandles[1]
804 yield self._writinghandles[1]
804
805
805 # Otherwise open a new file handle.
806 # Otherwise open a new file handle.
806 else:
807 else:
807 if self._inline:
808 if self._inline:
808 func = self._indexfp
809 func = self._indexfp
809 else:
810 else:
810 func = self._datafp
811 func = self._datafp
811 with func() as fp:
812 with func() as fp:
812 yield fp
813 yield fp
813
814
814 @contextlib.contextmanager
815 @contextlib.contextmanager
815 def _sidedatareadfp(self):
816 def _sidedatareadfp(self):
816 """file object suitable to read sidedata"""
817 """file object suitable to read sidedata"""
817 if self._writinghandles:
818 if self._writinghandles:
818 yield self._writinghandles[2]
819 yield self._writinghandles[2]
819 else:
820 else:
820 with self.opener(self._sidedatafile) as fp:
821 with self.opener(self._sidedatafile) as fp:
821 yield fp
822 yield fp
822
823
823 def tiprev(self):
824 def tiprev(self):
824 return len(self.index) - 1
825 return len(self.index) - 1
825
826
826 def tip(self):
827 def tip(self):
827 return self.node(self.tiprev())
828 return self.node(self.tiprev())
828
829
829 def __contains__(self, rev):
830 def __contains__(self, rev):
830 return 0 <= rev < len(self)
831 return 0 <= rev < len(self)
831
832
832 def __len__(self):
833 def __len__(self):
833 return len(self.index)
834 return len(self.index)
834
835
835 def __iter__(self):
836 def __iter__(self):
836 return iter(pycompat.xrange(len(self)))
837 return iter(pycompat.xrange(len(self)))
837
838
838 def revs(self, start=0, stop=None):
839 def revs(self, start=0, stop=None):
839 """iterate over all rev in this revlog (from start to stop)"""
840 """iterate over all rev in this revlog (from start to stop)"""
840 return storageutil.iterrevs(len(self), start=start, stop=stop)
841 return storageutil.iterrevs(len(self), start=start, stop=stop)
841
842
842 @property
843 @property
843 def nodemap(self):
844 def nodemap(self):
844 msg = (
845 msg = (
845 b"revlog.nodemap is deprecated, "
846 b"revlog.nodemap is deprecated, "
846 b"use revlog.index.[has_node|rev|get_rev]"
847 b"use revlog.index.[has_node|rev|get_rev]"
847 )
848 )
848 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
849 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
849 return self.index.nodemap
850 return self.index.nodemap
850
851
851 @property
852 @property
852 def _nodecache(self):
853 def _nodecache(self):
853 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
854 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
854 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
855 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
855 return self.index.nodemap
856 return self.index.nodemap
856
857
857 def hasnode(self, node):
858 def hasnode(self, node):
858 try:
859 try:
859 self.rev(node)
860 self.rev(node)
860 return True
861 return True
861 except KeyError:
862 except KeyError:
862 return False
863 return False
863
864
864 def candelta(self, baserev, rev):
865 def candelta(self, baserev, rev):
865 """whether two revisions (baserev, rev) can be delta-ed or not"""
866 """whether two revisions (baserev, rev) can be delta-ed or not"""
866 # Disable delta if either rev requires a content-changing flag
867 # Disable delta if either rev requires a content-changing flag
867 # processor (ex. LFS). This is because such flag processor can alter
868 # processor (ex. LFS). This is because such flag processor can alter
868 # the rawtext content that the delta will be based on, and two clients
869 # the rawtext content that the delta will be based on, and two clients
869 # could have a same revlog node with different flags (i.e. different
870 # could have a same revlog node with different flags (i.e. different
870 # rawtext contents) and the delta could be incompatible.
871 # rawtext contents) and the delta could be incompatible.
871 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
872 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
872 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
873 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
873 ):
874 ):
874 return False
875 return False
875 return True
876 return True
876
877
877 def update_caches(self, transaction):
878 def update_caches(self, transaction):
878 if self._nodemap_file is not None:
879 if self._nodemap_file is not None:
879 if transaction is None:
880 if transaction is None:
880 nodemaputil.update_persistent_nodemap(self)
881 nodemaputil.update_persistent_nodemap(self)
881 else:
882 else:
882 nodemaputil.setup_persistent_nodemap(transaction, self)
883 nodemaputil.setup_persistent_nodemap(transaction, self)
883
884
884 def clearcaches(self):
885 def clearcaches(self):
885 self._revisioncache = None
886 self._revisioncache = None
886 self._chainbasecache.clear()
887 self._chainbasecache.clear()
887 self._chunkcache = (0, b'')
888 self._chunkcache = (0, b'')
888 self._pcache = {}
889 self._pcache = {}
889 self._nodemap_docket = None
890 self._nodemap_docket = None
890 self.index.clearcaches()
891 self.index.clearcaches()
891 # The python code is the one responsible for validating the docket, we
892 # The python code is the one responsible for validating the docket, we
892 # end up having to refresh it here.
893 # end up having to refresh it here.
893 use_nodemap = (
894 use_nodemap = (
894 not self._inline
895 not self._inline
895 and self._nodemap_file is not None
896 and self._nodemap_file is not None
896 and util.safehasattr(self.index, 'update_nodemap_data')
897 and util.safehasattr(self.index, 'update_nodemap_data')
897 )
898 )
898 if use_nodemap:
899 if use_nodemap:
899 nodemap_data = nodemaputil.persisted_data(self)
900 nodemap_data = nodemaputil.persisted_data(self)
900 if nodemap_data is not None:
901 if nodemap_data is not None:
901 self._nodemap_docket = nodemap_data[0]
902 self._nodemap_docket = nodemap_data[0]
902 self.index.update_nodemap_data(*nodemap_data)
903 self.index.update_nodemap_data(*nodemap_data)
903
904
904 def rev(self, node):
905 def rev(self, node):
905 try:
906 try:
906 return self.index.rev(node)
907 return self.index.rev(node)
907 except TypeError:
908 except TypeError:
908 raise
909 raise
909 except error.RevlogError:
910 except error.RevlogError:
910 # parsers.c radix tree lookup failed
911 # parsers.c radix tree lookup failed
911 if (
912 if (
912 node == self.nodeconstants.wdirid
913 node == self.nodeconstants.wdirid
913 or node in self.nodeconstants.wdirfilenodeids
914 or node in self.nodeconstants.wdirfilenodeids
914 ):
915 ):
915 raise error.WdirUnsupported
916 raise error.WdirUnsupported
916 raise error.LookupError(node, self.display_id, _(b'no node'))
917 raise error.LookupError(node, self.display_id, _(b'no node'))
917
918
918 # Accessors for index entries.
919 # Accessors for index entries.
919
920
920 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
921 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
921 # are flags.
922 # are flags.
922 def start(self, rev):
923 def start(self, rev):
923 return int(self.index[rev][0] >> 16)
924 return int(self.index[rev][0] >> 16)
924
925
925 def sidedata_cut_off(self, rev):
926 def sidedata_cut_off(self, rev):
926 sd_cut_off = self.index[rev][8]
927 sd_cut_off = self.index[rev][8]
927 if sd_cut_off != 0:
928 if sd_cut_off != 0:
928 return sd_cut_off
929 return sd_cut_off
929 # This is some annoying dance, because entries without sidedata
930 # This is some annoying dance, because entries without sidedata
930 # currently use 0 as their ofsset. (instead of previous-offset +
931 # currently use 0 as their ofsset. (instead of previous-offset +
931 # previous-size)
932 # previous-size)
932 #
933 #
933 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
934 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
934 # In the meantime, we need this.
935 # In the meantime, we need this.
935 while 0 <= rev:
936 while 0 <= rev:
936 e = self.index[rev]
937 e = self.index[rev]
937 if e[9] != 0:
938 if e[9] != 0:
938 return e[8] + e[9]
939 return e[8] + e[9]
939 rev -= 1
940 rev -= 1
940 return 0
941 return 0
941
942
942 def flags(self, rev):
943 def flags(self, rev):
943 return self.index[rev][0] & 0xFFFF
944 return self.index[rev][0] & 0xFFFF
944
945
945 def length(self, rev):
946 def length(self, rev):
946 return self.index[rev][1]
947 return self.index[rev][1]
947
948
948 def sidedata_length(self, rev):
949 def sidedata_length(self, rev):
949 if not self.hassidedata:
950 if not self.hassidedata:
950 return 0
951 return 0
951 return self.index[rev][9]
952 return self.index[rev][9]
952
953
953 def rawsize(self, rev):
954 def rawsize(self, rev):
954 """return the length of the uncompressed text for a given revision"""
955 """return the length of the uncompressed text for a given revision"""
955 l = self.index[rev][2]
956 l = self.index[rev][2]
956 if l >= 0:
957 if l >= 0:
957 return l
958 return l
958
959
959 t = self.rawdata(rev)
960 t = self.rawdata(rev)
960 return len(t)
961 return len(t)
961
962
962 def size(self, rev):
963 def size(self, rev):
963 """length of non-raw text (processed by a "read" flag processor)"""
964 """length of non-raw text (processed by a "read" flag processor)"""
964 # fast path: if no "read" flag processor could change the content,
965 # fast path: if no "read" flag processor could change the content,
965 # size is rawsize. note: ELLIPSIS is known to not change the content.
966 # size is rawsize. note: ELLIPSIS is known to not change the content.
966 flags = self.flags(rev)
967 flags = self.flags(rev)
967 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
968 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
968 return self.rawsize(rev)
969 return self.rawsize(rev)
969
970
970 return len(self.revision(rev, raw=False))
971 return len(self.revision(rev, raw=False))
971
972
972 def chainbase(self, rev):
973 def chainbase(self, rev):
973 base = self._chainbasecache.get(rev)
974 base = self._chainbasecache.get(rev)
974 if base is not None:
975 if base is not None:
975 return base
976 return base
976
977
977 index = self.index
978 index = self.index
978 iterrev = rev
979 iterrev = rev
979 base = index[iterrev][3]
980 base = index[iterrev][3]
980 while base != iterrev:
981 while base != iterrev:
981 iterrev = base
982 iterrev = base
982 base = index[iterrev][3]
983 base = index[iterrev][3]
983
984
984 self._chainbasecache[rev] = base
985 self._chainbasecache[rev] = base
985 return base
986 return base
986
987
987 def linkrev(self, rev):
988 def linkrev(self, rev):
988 return self.index[rev][4]
989 return self.index[rev][4]
989
990
990 def parentrevs(self, rev):
991 def parentrevs(self, rev):
991 try:
992 try:
992 entry = self.index[rev]
993 entry = self.index[rev]
993 except IndexError:
994 except IndexError:
994 if rev == wdirrev:
995 if rev == wdirrev:
995 raise error.WdirUnsupported
996 raise error.WdirUnsupported
996 raise
997 raise
997 if entry[5] == nullrev:
998 if entry[5] == nullrev:
998 return entry[6], entry[5]
999 return entry[6], entry[5]
999 else:
1000 else:
1000 return entry[5], entry[6]
1001 return entry[5], entry[6]
1001
1002
1002 # fast parentrevs(rev) where rev isn't filtered
1003 # fast parentrevs(rev) where rev isn't filtered
1003 _uncheckedparentrevs = parentrevs
1004 _uncheckedparentrevs = parentrevs
1004
1005
1005 def node(self, rev):
1006 def node(self, rev):
1006 try:
1007 try:
1007 return self.index[rev][7]
1008 return self.index[rev][7]
1008 except IndexError:
1009 except IndexError:
1009 if rev == wdirrev:
1010 if rev == wdirrev:
1010 raise error.WdirUnsupported
1011 raise error.WdirUnsupported
1011 raise
1012 raise
1012
1013
1013 # Derived from index values.
1014 # Derived from index values.
1014
1015
1015 def end(self, rev):
1016 def end(self, rev):
1016 return self.start(rev) + self.length(rev)
1017 return self.start(rev) + self.length(rev)
1017
1018
1018 def parents(self, node):
1019 def parents(self, node):
1019 i = self.index
1020 i = self.index
1020 d = i[self.rev(node)]
1021 d = i[self.rev(node)]
1021 # inline node() to avoid function call overhead
1022 # inline node() to avoid function call overhead
1022 if d[5] == self.nullid:
1023 if d[5] == self.nullid:
1023 return i[d[6]][7], i[d[5]][7]
1024 return i[d[6]][7], i[d[5]][7]
1024 else:
1025 else:
1025 return i[d[5]][7], i[d[6]][7]
1026 return i[d[5]][7], i[d[6]][7]
1026
1027
1027 def chainlen(self, rev):
1028 def chainlen(self, rev):
1028 return self._chaininfo(rev)[0]
1029 return self._chaininfo(rev)[0]
1029
1030
1030 def _chaininfo(self, rev):
1031 def _chaininfo(self, rev):
1031 chaininfocache = self._chaininfocache
1032 chaininfocache = self._chaininfocache
1032 if rev in chaininfocache:
1033 if rev in chaininfocache:
1033 return chaininfocache[rev]
1034 return chaininfocache[rev]
1034 index = self.index
1035 index = self.index
1035 generaldelta = self._generaldelta
1036 generaldelta = self._generaldelta
1036 iterrev = rev
1037 iterrev = rev
1037 e = index[iterrev]
1038 e = index[iterrev]
1038 clen = 0
1039 clen = 0
1039 compresseddeltalen = 0
1040 compresseddeltalen = 0
1040 while iterrev != e[3]:
1041 while iterrev != e[3]:
1041 clen += 1
1042 clen += 1
1042 compresseddeltalen += e[1]
1043 compresseddeltalen += e[1]
1043 if generaldelta:
1044 if generaldelta:
1044 iterrev = e[3]
1045 iterrev = e[3]
1045 else:
1046 else:
1046 iterrev -= 1
1047 iterrev -= 1
1047 if iterrev in chaininfocache:
1048 if iterrev in chaininfocache:
1048 t = chaininfocache[iterrev]
1049 t = chaininfocache[iterrev]
1049 clen += t[0]
1050 clen += t[0]
1050 compresseddeltalen += t[1]
1051 compresseddeltalen += t[1]
1051 break
1052 break
1052 e = index[iterrev]
1053 e = index[iterrev]
1053 else:
1054 else:
1054 # Add text length of base since decompressing that also takes
1055 # Add text length of base since decompressing that also takes
1055 # work. For cache hits the length is already included.
1056 # work. For cache hits the length is already included.
1056 compresseddeltalen += e[1]
1057 compresseddeltalen += e[1]
1057 r = (clen, compresseddeltalen)
1058 r = (clen, compresseddeltalen)
1058 chaininfocache[rev] = r
1059 chaininfocache[rev] = r
1059 return r
1060 return r
1060
1061
1061 def _deltachain(self, rev, stoprev=None):
1062 def _deltachain(self, rev, stoprev=None):
1062 """Obtain the delta chain for a revision.
1063 """Obtain the delta chain for a revision.
1063
1064
1064 ``stoprev`` specifies a revision to stop at. If not specified, we
1065 ``stoprev`` specifies a revision to stop at. If not specified, we
1065 stop at the base of the chain.
1066 stop at the base of the chain.
1066
1067
1067 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1068 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1068 revs in ascending order and ``stopped`` is a bool indicating whether
1069 revs in ascending order and ``stopped`` is a bool indicating whether
1069 ``stoprev`` was hit.
1070 ``stoprev`` was hit.
1070 """
1071 """
1071 # Try C implementation.
1072 # Try C implementation.
1072 try:
1073 try:
1073 return self.index.deltachain(rev, stoprev, self._generaldelta)
1074 return self.index.deltachain(rev, stoprev, self._generaldelta)
1074 except AttributeError:
1075 except AttributeError:
1075 pass
1076 pass
1076
1077
1077 chain = []
1078 chain = []
1078
1079
1079 # Alias to prevent attribute lookup in tight loop.
1080 # Alias to prevent attribute lookup in tight loop.
1080 index = self.index
1081 index = self.index
1081 generaldelta = self._generaldelta
1082 generaldelta = self._generaldelta
1082
1083
1083 iterrev = rev
1084 iterrev = rev
1084 e = index[iterrev]
1085 e = index[iterrev]
1085 while iterrev != e[3] and iterrev != stoprev:
1086 while iterrev != e[3] and iterrev != stoprev:
1086 chain.append(iterrev)
1087 chain.append(iterrev)
1087 if generaldelta:
1088 if generaldelta:
1088 iterrev = e[3]
1089 iterrev = e[3]
1089 else:
1090 else:
1090 iterrev -= 1
1091 iterrev -= 1
1091 e = index[iterrev]
1092 e = index[iterrev]
1092
1093
1093 if iterrev == stoprev:
1094 if iterrev == stoprev:
1094 stopped = True
1095 stopped = True
1095 else:
1096 else:
1096 chain.append(iterrev)
1097 chain.append(iterrev)
1097 stopped = False
1098 stopped = False
1098
1099
1099 chain.reverse()
1100 chain.reverse()
1100 return chain, stopped
1101 return chain, stopped
1101
1102
1102 def ancestors(self, revs, stoprev=0, inclusive=False):
1103 def ancestors(self, revs, stoprev=0, inclusive=False):
1103 """Generate the ancestors of 'revs' in reverse revision order.
1104 """Generate the ancestors of 'revs' in reverse revision order.
1104 Does not generate revs lower than stoprev.
1105 Does not generate revs lower than stoprev.
1105
1106
1106 See the documentation for ancestor.lazyancestors for more details."""
1107 See the documentation for ancestor.lazyancestors for more details."""
1107
1108
1108 # first, make sure start revisions aren't filtered
1109 # first, make sure start revisions aren't filtered
1109 revs = list(revs)
1110 revs = list(revs)
1110 checkrev = self.node
1111 checkrev = self.node
1111 for r in revs:
1112 for r in revs:
1112 checkrev(r)
1113 checkrev(r)
1113 # and we're sure ancestors aren't filtered as well
1114 # and we're sure ancestors aren't filtered as well
1114
1115
1115 if rustancestor is not None and self.index.rust_ext_compat:
1116 if rustancestor is not None and self.index.rust_ext_compat:
1116 lazyancestors = rustancestor.LazyAncestors
1117 lazyancestors = rustancestor.LazyAncestors
1117 arg = self.index
1118 arg = self.index
1118 else:
1119 else:
1119 lazyancestors = ancestor.lazyancestors
1120 lazyancestors = ancestor.lazyancestors
1120 arg = self._uncheckedparentrevs
1121 arg = self._uncheckedparentrevs
1121 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1122 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1122
1123
1123 def descendants(self, revs):
1124 def descendants(self, revs):
1124 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1125 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1125
1126
1126 def findcommonmissing(self, common=None, heads=None):
1127 def findcommonmissing(self, common=None, heads=None):
1127 """Return a tuple of the ancestors of common and the ancestors of heads
1128 """Return a tuple of the ancestors of common and the ancestors of heads
1128 that are not ancestors of common. In revset terminology, we return the
1129 that are not ancestors of common. In revset terminology, we return the
1129 tuple:
1130 tuple:
1130
1131
1131 ::common, (::heads) - (::common)
1132 ::common, (::heads) - (::common)
1132
1133
1133 The list is sorted by revision number, meaning it is
1134 The list is sorted by revision number, meaning it is
1134 topologically sorted.
1135 topologically sorted.
1135
1136
1136 'heads' and 'common' are both lists of node IDs. If heads is
1137 'heads' and 'common' are both lists of node IDs. If heads is
1137 not supplied, uses all of the revlog's heads. If common is not
1138 not supplied, uses all of the revlog's heads. If common is not
1138 supplied, uses nullid."""
1139 supplied, uses nullid."""
1139 if common is None:
1140 if common is None:
1140 common = [self.nullid]
1141 common = [self.nullid]
1141 if heads is None:
1142 if heads is None:
1142 heads = self.heads()
1143 heads = self.heads()
1143
1144
1144 common = [self.rev(n) for n in common]
1145 common = [self.rev(n) for n in common]
1145 heads = [self.rev(n) for n in heads]
1146 heads = [self.rev(n) for n in heads]
1146
1147
1147 # we want the ancestors, but inclusive
1148 # we want the ancestors, but inclusive
1148 class lazyset(object):
1149 class lazyset(object):
1149 def __init__(self, lazyvalues):
1150 def __init__(self, lazyvalues):
1150 self.addedvalues = set()
1151 self.addedvalues = set()
1151 self.lazyvalues = lazyvalues
1152 self.lazyvalues = lazyvalues
1152
1153
1153 def __contains__(self, value):
1154 def __contains__(self, value):
1154 return value in self.addedvalues or value in self.lazyvalues
1155 return value in self.addedvalues or value in self.lazyvalues
1155
1156
1156 def __iter__(self):
1157 def __iter__(self):
1157 added = self.addedvalues
1158 added = self.addedvalues
1158 for r in added:
1159 for r in added:
1159 yield r
1160 yield r
1160 for r in self.lazyvalues:
1161 for r in self.lazyvalues:
1161 if not r in added:
1162 if not r in added:
1162 yield r
1163 yield r
1163
1164
1164 def add(self, value):
1165 def add(self, value):
1165 self.addedvalues.add(value)
1166 self.addedvalues.add(value)
1166
1167
1167 def update(self, values):
1168 def update(self, values):
1168 self.addedvalues.update(values)
1169 self.addedvalues.update(values)
1169
1170
1170 has = lazyset(self.ancestors(common))
1171 has = lazyset(self.ancestors(common))
1171 has.add(nullrev)
1172 has.add(nullrev)
1172 has.update(common)
1173 has.update(common)
1173
1174
1174 # take all ancestors from heads that aren't in has
1175 # take all ancestors from heads that aren't in has
1175 missing = set()
1176 missing = set()
1176 visit = collections.deque(r for r in heads if r not in has)
1177 visit = collections.deque(r for r in heads if r not in has)
1177 while visit:
1178 while visit:
1178 r = visit.popleft()
1179 r = visit.popleft()
1179 if r in missing:
1180 if r in missing:
1180 continue
1181 continue
1181 else:
1182 else:
1182 missing.add(r)
1183 missing.add(r)
1183 for p in self.parentrevs(r):
1184 for p in self.parentrevs(r):
1184 if p not in has:
1185 if p not in has:
1185 visit.append(p)
1186 visit.append(p)
1186 missing = list(missing)
1187 missing = list(missing)
1187 missing.sort()
1188 missing.sort()
1188 return has, [self.node(miss) for miss in missing]
1189 return has, [self.node(miss) for miss in missing]
1189
1190
1190 def incrementalmissingrevs(self, common=None):
1191 def incrementalmissingrevs(self, common=None):
1191 """Return an object that can be used to incrementally compute the
1192 """Return an object that can be used to incrementally compute the
1192 revision numbers of the ancestors of arbitrary sets that are not
1193 revision numbers of the ancestors of arbitrary sets that are not
1193 ancestors of common. This is an ancestor.incrementalmissingancestors
1194 ancestors of common. This is an ancestor.incrementalmissingancestors
1194 object.
1195 object.
1195
1196
1196 'common' is a list of revision numbers. If common is not supplied, uses
1197 'common' is a list of revision numbers. If common is not supplied, uses
1197 nullrev.
1198 nullrev.
1198 """
1199 """
1199 if common is None:
1200 if common is None:
1200 common = [nullrev]
1201 common = [nullrev]
1201
1202
1202 if rustancestor is not None and self.index.rust_ext_compat:
1203 if rustancestor is not None and self.index.rust_ext_compat:
1203 return rustancestor.MissingAncestors(self.index, common)
1204 return rustancestor.MissingAncestors(self.index, common)
1204 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1205 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1205
1206
1206 def findmissingrevs(self, common=None, heads=None):
1207 def findmissingrevs(self, common=None, heads=None):
1207 """Return the revision numbers of the ancestors of heads that
1208 """Return the revision numbers of the ancestors of heads that
1208 are not ancestors of common.
1209 are not ancestors of common.
1209
1210
1210 More specifically, return a list of revision numbers corresponding to
1211 More specifically, return a list of revision numbers corresponding to
1211 nodes N such that every N satisfies the following constraints:
1212 nodes N such that every N satisfies the following constraints:
1212
1213
1213 1. N is an ancestor of some node in 'heads'
1214 1. N is an ancestor of some node in 'heads'
1214 2. N is not an ancestor of any node in 'common'
1215 2. N is not an ancestor of any node in 'common'
1215
1216
1216 The list is sorted by revision number, meaning it is
1217 The list is sorted by revision number, meaning it is
1217 topologically sorted.
1218 topologically sorted.
1218
1219
1219 'heads' and 'common' are both lists of revision numbers. If heads is
1220 'heads' and 'common' are both lists of revision numbers. If heads is
1220 not supplied, uses all of the revlog's heads. If common is not
1221 not supplied, uses all of the revlog's heads. If common is not
1221 supplied, uses nullid."""
1222 supplied, uses nullid."""
1222 if common is None:
1223 if common is None:
1223 common = [nullrev]
1224 common = [nullrev]
1224 if heads is None:
1225 if heads is None:
1225 heads = self.headrevs()
1226 heads = self.headrevs()
1226
1227
1227 inc = self.incrementalmissingrevs(common=common)
1228 inc = self.incrementalmissingrevs(common=common)
1228 return inc.missingancestors(heads)
1229 return inc.missingancestors(heads)
1229
1230
1230 def findmissing(self, common=None, heads=None):
1231 def findmissing(self, common=None, heads=None):
1231 """Return the ancestors of heads that are not ancestors of common.
1232 """Return the ancestors of heads that are not ancestors of common.
1232
1233
1233 More specifically, return a list of nodes N such that every N
1234 More specifically, return a list of nodes N such that every N
1234 satisfies the following constraints:
1235 satisfies the following constraints:
1235
1236
1236 1. N is an ancestor of some node in 'heads'
1237 1. N is an ancestor of some node in 'heads'
1237 2. N is not an ancestor of any node in 'common'
1238 2. N is not an ancestor of any node in 'common'
1238
1239
1239 The list is sorted by revision number, meaning it is
1240 The list is sorted by revision number, meaning it is
1240 topologically sorted.
1241 topologically sorted.
1241
1242
1242 'heads' and 'common' are both lists of node IDs. If heads is
1243 'heads' and 'common' are both lists of node IDs. If heads is
1243 not supplied, uses all of the revlog's heads. If common is not
1244 not supplied, uses all of the revlog's heads. If common is not
1244 supplied, uses nullid."""
1245 supplied, uses nullid."""
1245 if common is None:
1246 if common is None:
1246 common = [self.nullid]
1247 common = [self.nullid]
1247 if heads is None:
1248 if heads is None:
1248 heads = self.heads()
1249 heads = self.heads()
1249
1250
1250 common = [self.rev(n) for n in common]
1251 common = [self.rev(n) for n in common]
1251 heads = [self.rev(n) for n in heads]
1252 heads = [self.rev(n) for n in heads]
1252
1253
1253 inc = self.incrementalmissingrevs(common=common)
1254 inc = self.incrementalmissingrevs(common=common)
1254 return [self.node(r) for r in inc.missingancestors(heads)]
1255 return [self.node(r) for r in inc.missingancestors(heads)]
1255
1256
1256 def nodesbetween(self, roots=None, heads=None):
1257 def nodesbetween(self, roots=None, heads=None):
1257 """Return a topological path from 'roots' to 'heads'.
1258 """Return a topological path from 'roots' to 'heads'.
1258
1259
1259 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1260 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1260 topologically sorted list of all nodes N that satisfy both of
1261 topologically sorted list of all nodes N that satisfy both of
1261 these constraints:
1262 these constraints:
1262
1263
1263 1. N is a descendant of some node in 'roots'
1264 1. N is a descendant of some node in 'roots'
1264 2. N is an ancestor of some node in 'heads'
1265 2. N is an ancestor of some node in 'heads'
1265
1266
1266 Every node is considered to be both a descendant and an ancestor
1267 Every node is considered to be both a descendant and an ancestor
1267 of itself, so every reachable node in 'roots' and 'heads' will be
1268 of itself, so every reachable node in 'roots' and 'heads' will be
1268 included in 'nodes'.
1269 included in 'nodes'.
1269
1270
1270 'outroots' is the list of reachable nodes in 'roots', i.e., the
1271 'outroots' is the list of reachable nodes in 'roots', i.e., the
1271 subset of 'roots' that is returned in 'nodes'. Likewise,
1272 subset of 'roots' that is returned in 'nodes'. Likewise,
1272 'outheads' is the subset of 'heads' that is also in 'nodes'.
1273 'outheads' is the subset of 'heads' that is also in 'nodes'.
1273
1274
1274 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1275 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1275 unspecified, uses nullid as the only root. If 'heads' is
1276 unspecified, uses nullid as the only root. If 'heads' is
1276 unspecified, uses list of all of the revlog's heads."""
1277 unspecified, uses list of all of the revlog's heads."""
1277 nonodes = ([], [], [])
1278 nonodes = ([], [], [])
1278 if roots is not None:
1279 if roots is not None:
1279 roots = list(roots)
1280 roots = list(roots)
1280 if not roots:
1281 if not roots:
1281 return nonodes
1282 return nonodes
1282 lowestrev = min([self.rev(n) for n in roots])
1283 lowestrev = min([self.rev(n) for n in roots])
1283 else:
1284 else:
1284 roots = [self.nullid] # Everybody's a descendant of nullid
1285 roots = [self.nullid] # Everybody's a descendant of nullid
1285 lowestrev = nullrev
1286 lowestrev = nullrev
1286 if (lowestrev == nullrev) and (heads is None):
1287 if (lowestrev == nullrev) and (heads is None):
1287 # We want _all_ the nodes!
1288 # We want _all_ the nodes!
1288 return (
1289 return (
1289 [self.node(r) for r in self],
1290 [self.node(r) for r in self],
1290 [self.nullid],
1291 [self.nullid],
1291 list(self.heads()),
1292 list(self.heads()),
1292 )
1293 )
1293 if heads is None:
1294 if heads is None:
1294 # All nodes are ancestors, so the latest ancestor is the last
1295 # All nodes are ancestors, so the latest ancestor is the last
1295 # node.
1296 # node.
1296 highestrev = len(self) - 1
1297 highestrev = len(self) - 1
1297 # Set ancestors to None to signal that every node is an ancestor.
1298 # Set ancestors to None to signal that every node is an ancestor.
1298 ancestors = None
1299 ancestors = None
1299 # Set heads to an empty dictionary for later discovery of heads
1300 # Set heads to an empty dictionary for later discovery of heads
1300 heads = {}
1301 heads = {}
1301 else:
1302 else:
1302 heads = list(heads)
1303 heads = list(heads)
1303 if not heads:
1304 if not heads:
1304 return nonodes
1305 return nonodes
1305 ancestors = set()
1306 ancestors = set()
1306 # Turn heads into a dictionary so we can remove 'fake' heads.
1307 # Turn heads into a dictionary so we can remove 'fake' heads.
1307 # Also, later we will be using it to filter out the heads we can't
1308 # Also, later we will be using it to filter out the heads we can't
1308 # find from roots.
1309 # find from roots.
1309 heads = dict.fromkeys(heads, False)
1310 heads = dict.fromkeys(heads, False)
1310 # Start at the top and keep marking parents until we're done.
1311 # Start at the top and keep marking parents until we're done.
1311 nodestotag = set(heads)
1312 nodestotag = set(heads)
1312 # Remember where the top was so we can use it as a limit later.
1313 # Remember where the top was so we can use it as a limit later.
1313 highestrev = max([self.rev(n) for n in nodestotag])
1314 highestrev = max([self.rev(n) for n in nodestotag])
1314 while nodestotag:
1315 while nodestotag:
1315 # grab a node to tag
1316 # grab a node to tag
1316 n = nodestotag.pop()
1317 n = nodestotag.pop()
1317 # Never tag nullid
1318 # Never tag nullid
1318 if n == self.nullid:
1319 if n == self.nullid:
1319 continue
1320 continue
1320 # A node's revision number represents its place in a
1321 # A node's revision number represents its place in a
1321 # topologically sorted list of nodes.
1322 # topologically sorted list of nodes.
1322 r = self.rev(n)
1323 r = self.rev(n)
1323 if r >= lowestrev:
1324 if r >= lowestrev:
1324 if n not in ancestors:
1325 if n not in ancestors:
1325 # If we are possibly a descendant of one of the roots
1326 # If we are possibly a descendant of one of the roots
1326 # and we haven't already been marked as an ancestor
1327 # and we haven't already been marked as an ancestor
1327 ancestors.add(n) # Mark as ancestor
1328 ancestors.add(n) # Mark as ancestor
1328 # Add non-nullid parents to list of nodes to tag.
1329 # Add non-nullid parents to list of nodes to tag.
1329 nodestotag.update(
1330 nodestotag.update(
1330 [p for p in self.parents(n) if p != self.nullid]
1331 [p for p in self.parents(n) if p != self.nullid]
1331 )
1332 )
1332 elif n in heads: # We've seen it before, is it a fake head?
1333 elif n in heads: # We've seen it before, is it a fake head?
1333 # So it is, real heads should not be the ancestors of
1334 # So it is, real heads should not be the ancestors of
1334 # any other heads.
1335 # any other heads.
1335 heads.pop(n)
1336 heads.pop(n)
1336 if not ancestors:
1337 if not ancestors:
1337 return nonodes
1338 return nonodes
1338 # Now that we have our set of ancestors, we want to remove any
1339 # Now that we have our set of ancestors, we want to remove any
1339 # roots that are not ancestors.
1340 # roots that are not ancestors.
1340
1341
1341 # If one of the roots was nullid, everything is included anyway.
1342 # If one of the roots was nullid, everything is included anyway.
1342 if lowestrev > nullrev:
1343 if lowestrev > nullrev:
1343 # But, since we weren't, let's recompute the lowest rev to not
1344 # But, since we weren't, let's recompute the lowest rev to not
1344 # include roots that aren't ancestors.
1345 # include roots that aren't ancestors.
1345
1346
1346 # Filter out roots that aren't ancestors of heads
1347 # Filter out roots that aren't ancestors of heads
1347 roots = [root for root in roots if root in ancestors]
1348 roots = [root for root in roots if root in ancestors]
1348 # Recompute the lowest revision
1349 # Recompute the lowest revision
1349 if roots:
1350 if roots:
1350 lowestrev = min([self.rev(root) for root in roots])
1351 lowestrev = min([self.rev(root) for root in roots])
1351 else:
1352 else:
1352 # No more roots? Return empty list
1353 # No more roots? Return empty list
1353 return nonodes
1354 return nonodes
1354 else:
1355 else:
1355 # We are descending from nullid, and don't need to care about
1356 # We are descending from nullid, and don't need to care about
1356 # any other roots.
1357 # any other roots.
1357 lowestrev = nullrev
1358 lowestrev = nullrev
1358 roots = [self.nullid]
1359 roots = [self.nullid]
1359 # Transform our roots list into a set.
1360 # Transform our roots list into a set.
1360 descendants = set(roots)
1361 descendants = set(roots)
1361 # Also, keep the original roots so we can filter out roots that aren't
1362 # Also, keep the original roots so we can filter out roots that aren't
1362 # 'real' roots (i.e. are descended from other roots).
1363 # 'real' roots (i.e. are descended from other roots).
1363 roots = descendants.copy()
1364 roots = descendants.copy()
1364 # Our topologically sorted list of output nodes.
1365 # Our topologically sorted list of output nodes.
1365 orderedout = []
1366 orderedout = []
1366 # Don't start at nullid since we don't want nullid in our output list,
1367 # Don't start at nullid since we don't want nullid in our output list,
1367 # and if nullid shows up in descendants, empty parents will look like
1368 # and if nullid shows up in descendants, empty parents will look like
1368 # they're descendants.
1369 # they're descendants.
1369 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1370 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1370 n = self.node(r)
1371 n = self.node(r)
1371 isdescendant = False
1372 isdescendant = False
1372 if lowestrev == nullrev: # Everybody is a descendant of nullid
1373 if lowestrev == nullrev: # Everybody is a descendant of nullid
1373 isdescendant = True
1374 isdescendant = True
1374 elif n in descendants:
1375 elif n in descendants:
1375 # n is already a descendant
1376 # n is already a descendant
1376 isdescendant = True
1377 isdescendant = True
1377 # This check only needs to be done here because all the roots
1378 # This check only needs to be done here because all the roots
1378 # will start being marked is descendants before the loop.
1379 # will start being marked is descendants before the loop.
1379 if n in roots:
1380 if n in roots:
1380 # If n was a root, check if it's a 'real' root.
1381 # If n was a root, check if it's a 'real' root.
1381 p = tuple(self.parents(n))
1382 p = tuple(self.parents(n))
1382 # If any of its parents are descendants, it's not a root.
1383 # If any of its parents are descendants, it's not a root.
1383 if (p[0] in descendants) or (p[1] in descendants):
1384 if (p[0] in descendants) or (p[1] in descendants):
1384 roots.remove(n)
1385 roots.remove(n)
1385 else:
1386 else:
1386 p = tuple(self.parents(n))
1387 p = tuple(self.parents(n))
1387 # A node is a descendant if either of its parents are
1388 # A node is a descendant if either of its parents are
1388 # descendants. (We seeded the dependents list with the roots
1389 # descendants. (We seeded the dependents list with the roots
1389 # up there, remember?)
1390 # up there, remember?)
1390 if (p[0] in descendants) or (p[1] in descendants):
1391 if (p[0] in descendants) or (p[1] in descendants):
1391 descendants.add(n)
1392 descendants.add(n)
1392 isdescendant = True
1393 isdescendant = True
1393 if isdescendant and ((ancestors is None) or (n in ancestors)):
1394 if isdescendant and ((ancestors is None) or (n in ancestors)):
1394 # Only include nodes that are both descendants and ancestors.
1395 # Only include nodes that are both descendants and ancestors.
1395 orderedout.append(n)
1396 orderedout.append(n)
1396 if (ancestors is not None) and (n in heads):
1397 if (ancestors is not None) and (n in heads):
1397 # We're trying to figure out which heads are reachable
1398 # We're trying to figure out which heads are reachable
1398 # from roots.
1399 # from roots.
1399 # Mark this head as having been reached
1400 # Mark this head as having been reached
1400 heads[n] = True
1401 heads[n] = True
1401 elif ancestors is None:
1402 elif ancestors is None:
1402 # Otherwise, we're trying to discover the heads.
1403 # Otherwise, we're trying to discover the heads.
1403 # Assume this is a head because if it isn't, the next step
1404 # Assume this is a head because if it isn't, the next step
1404 # will eventually remove it.
1405 # will eventually remove it.
1405 heads[n] = True
1406 heads[n] = True
1406 # But, obviously its parents aren't.
1407 # But, obviously its parents aren't.
1407 for p in self.parents(n):
1408 for p in self.parents(n):
1408 heads.pop(p, None)
1409 heads.pop(p, None)
1409 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1410 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1410 roots = list(roots)
1411 roots = list(roots)
1411 assert orderedout
1412 assert orderedout
1412 assert roots
1413 assert roots
1413 assert heads
1414 assert heads
1414 return (orderedout, roots, heads)
1415 return (orderedout, roots, heads)
1415
1416
1416 def headrevs(self, revs=None):
1417 def headrevs(self, revs=None):
1417 if revs is None:
1418 if revs is None:
1418 try:
1419 try:
1419 return self.index.headrevs()
1420 return self.index.headrevs()
1420 except AttributeError:
1421 except AttributeError:
1421 return self._headrevs()
1422 return self._headrevs()
1422 if rustdagop is not None and self.index.rust_ext_compat:
1423 if rustdagop is not None and self.index.rust_ext_compat:
1423 return rustdagop.headrevs(self.index, revs)
1424 return rustdagop.headrevs(self.index, revs)
1424 return dagop.headrevs(revs, self._uncheckedparentrevs)
1425 return dagop.headrevs(revs, self._uncheckedparentrevs)
1425
1426
1426 def computephases(self, roots):
1427 def computephases(self, roots):
1427 return self.index.computephasesmapsets(roots)
1428 return self.index.computephasesmapsets(roots)
1428
1429
1429 def _headrevs(self):
1430 def _headrevs(self):
1430 count = len(self)
1431 count = len(self)
1431 if not count:
1432 if not count:
1432 return [nullrev]
1433 return [nullrev]
1433 # we won't iter over filtered rev so nobody is a head at start
1434 # we won't iter over filtered rev so nobody is a head at start
1434 ishead = [0] * (count + 1)
1435 ishead = [0] * (count + 1)
1435 index = self.index
1436 index = self.index
1436 for r in self:
1437 for r in self:
1437 ishead[r] = 1 # I may be an head
1438 ishead[r] = 1 # I may be an head
1438 e = index[r]
1439 e = index[r]
1439 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1440 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1440 return [r for r, val in enumerate(ishead) if val]
1441 return [r for r, val in enumerate(ishead) if val]
1441
1442
1442 def heads(self, start=None, stop=None):
1443 def heads(self, start=None, stop=None):
1443 """return the list of all nodes that have no children
1444 """return the list of all nodes that have no children
1444
1445
1445 if start is specified, only heads that are descendants of
1446 if start is specified, only heads that are descendants of
1446 start will be returned
1447 start will be returned
1447 if stop is specified, it will consider all the revs from stop
1448 if stop is specified, it will consider all the revs from stop
1448 as if they had no children
1449 as if they had no children
1449 """
1450 """
1450 if start is None and stop is None:
1451 if start is None and stop is None:
1451 if not len(self):
1452 if not len(self):
1452 return [self.nullid]
1453 return [self.nullid]
1453 return [self.node(r) for r in self.headrevs()]
1454 return [self.node(r) for r in self.headrevs()]
1454
1455
1455 if start is None:
1456 if start is None:
1456 start = nullrev
1457 start = nullrev
1457 else:
1458 else:
1458 start = self.rev(start)
1459 start = self.rev(start)
1459
1460
1460 stoprevs = {self.rev(n) for n in stop or []}
1461 stoprevs = {self.rev(n) for n in stop or []}
1461
1462
1462 revs = dagop.headrevssubset(
1463 revs = dagop.headrevssubset(
1463 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1464 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1464 )
1465 )
1465
1466
1466 return [self.node(rev) for rev in revs]
1467 return [self.node(rev) for rev in revs]
1467
1468
1468 def children(self, node):
1469 def children(self, node):
1469 """find the children of a given node"""
1470 """find the children of a given node"""
1470 c = []
1471 c = []
1471 p = self.rev(node)
1472 p = self.rev(node)
1472 for r in self.revs(start=p + 1):
1473 for r in self.revs(start=p + 1):
1473 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1474 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1474 if prevs:
1475 if prevs:
1475 for pr in prevs:
1476 for pr in prevs:
1476 if pr == p:
1477 if pr == p:
1477 c.append(self.node(r))
1478 c.append(self.node(r))
1478 elif p == nullrev:
1479 elif p == nullrev:
1479 c.append(self.node(r))
1480 c.append(self.node(r))
1480 return c
1481 return c
1481
1482
1482 def commonancestorsheads(self, a, b):
1483 def commonancestorsheads(self, a, b):
1483 """calculate all the heads of the common ancestors of nodes a and b"""
1484 """calculate all the heads of the common ancestors of nodes a and b"""
1484 a, b = self.rev(a), self.rev(b)
1485 a, b = self.rev(a), self.rev(b)
1485 ancs = self._commonancestorsheads(a, b)
1486 ancs = self._commonancestorsheads(a, b)
1486 return pycompat.maplist(self.node, ancs)
1487 return pycompat.maplist(self.node, ancs)
1487
1488
1488 def _commonancestorsheads(self, *revs):
1489 def _commonancestorsheads(self, *revs):
1489 """calculate all the heads of the common ancestors of revs"""
1490 """calculate all the heads of the common ancestors of revs"""
1490 try:
1491 try:
1491 ancs = self.index.commonancestorsheads(*revs)
1492 ancs = self.index.commonancestorsheads(*revs)
1492 except (AttributeError, OverflowError): # C implementation failed
1493 except (AttributeError, OverflowError): # C implementation failed
1493 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1494 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1494 return ancs
1495 return ancs
1495
1496
1496 def isancestor(self, a, b):
1497 def isancestor(self, a, b):
1497 """return True if node a is an ancestor of node b
1498 """return True if node a is an ancestor of node b
1498
1499
1499 A revision is considered an ancestor of itself."""
1500 A revision is considered an ancestor of itself."""
1500 a, b = self.rev(a), self.rev(b)
1501 a, b = self.rev(a), self.rev(b)
1501 return self.isancestorrev(a, b)
1502 return self.isancestorrev(a, b)
1502
1503
1503 def isancestorrev(self, a, b):
1504 def isancestorrev(self, a, b):
1504 """return True if revision a is an ancestor of revision b
1505 """return True if revision a is an ancestor of revision b
1505
1506
1506 A revision is considered an ancestor of itself.
1507 A revision is considered an ancestor of itself.
1507
1508
1508 The implementation of this is trivial but the use of
1509 The implementation of this is trivial but the use of
1509 reachableroots is not."""
1510 reachableroots is not."""
1510 if a == nullrev:
1511 if a == nullrev:
1511 return True
1512 return True
1512 elif a == b:
1513 elif a == b:
1513 return True
1514 return True
1514 elif a > b:
1515 elif a > b:
1515 return False
1516 return False
1516 return bool(self.reachableroots(a, [b], [a], includepath=False))
1517 return bool(self.reachableroots(a, [b], [a], includepath=False))
1517
1518
1518 def reachableroots(self, minroot, heads, roots, includepath=False):
1519 def reachableroots(self, minroot, heads, roots, includepath=False):
1519 """return (heads(::(<roots> and <roots>::<heads>)))
1520 """return (heads(::(<roots> and <roots>::<heads>)))
1520
1521
1521 If includepath is True, return (<roots>::<heads>)."""
1522 If includepath is True, return (<roots>::<heads>)."""
1522 try:
1523 try:
1523 return self.index.reachableroots2(
1524 return self.index.reachableroots2(
1524 minroot, heads, roots, includepath
1525 minroot, heads, roots, includepath
1525 )
1526 )
1526 except AttributeError:
1527 except AttributeError:
1527 return dagop._reachablerootspure(
1528 return dagop._reachablerootspure(
1528 self.parentrevs, minroot, roots, heads, includepath
1529 self.parentrevs, minroot, roots, heads, includepath
1529 )
1530 )
1530
1531
1531 def ancestor(self, a, b):
1532 def ancestor(self, a, b):
1532 """calculate the "best" common ancestor of nodes a and b"""
1533 """calculate the "best" common ancestor of nodes a and b"""
1533
1534
1534 a, b = self.rev(a), self.rev(b)
1535 a, b = self.rev(a), self.rev(b)
1535 try:
1536 try:
1536 ancs = self.index.ancestors(a, b)
1537 ancs = self.index.ancestors(a, b)
1537 except (AttributeError, OverflowError):
1538 except (AttributeError, OverflowError):
1538 ancs = ancestor.ancestors(self.parentrevs, a, b)
1539 ancs = ancestor.ancestors(self.parentrevs, a, b)
1539 if ancs:
1540 if ancs:
1540 # choose a consistent winner when there's a tie
1541 # choose a consistent winner when there's a tie
1541 return min(map(self.node, ancs))
1542 return min(map(self.node, ancs))
1542 return self.nullid
1543 return self.nullid
1543
1544
1544 def _match(self, id):
1545 def _match(self, id):
1545 if isinstance(id, int):
1546 if isinstance(id, int):
1546 # rev
1547 # rev
1547 return self.node(id)
1548 return self.node(id)
1548 if len(id) == self.nodeconstants.nodelen:
1549 if len(id) == self.nodeconstants.nodelen:
1549 # possibly a binary node
1550 # possibly a binary node
1550 # odds of a binary node being all hex in ASCII are 1 in 10**25
1551 # odds of a binary node being all hex in ASCII are 1 in 10**25
1551 try:
1552 try:
1552 node = id
1553 node = id
1553 self.rev(node) # quick search the index
1554 self.rev(node) # quick search the index
1554 return node
1555 return node
1555 except error.LookupError:
1556 except error.LookupError:
1556 pass # may be partial hex id
1557 pass # may be partial hex id
1557 try:
1558 try:
1558 # str(rev)
1559 # str(rev)
1559 rev = int(id)
1560 rev = int(id)
1560 if b"%d" % rev != id:
1561 if b"%d" % rev != id:
1561 raise ValueError
1562 raise ValueError
1562 if rev < 0:
1563 if rev < 0:
1563 rev = len(self) + rev
1564 rev = len(self) + rev
1564 if rev < 0 or rev >= len(self):
1565 if rev < 0 or rev >= len(self):
1565 raise ValueError
1566 raise ValueError
1566 return self.node(rev)
1567 return self.node(rev)
1567 except (ValueError, OverflowError):
1568 except (ValueError, OverflowError):
1568 pass
1569 pass
1569 if len(id) == 2 * self.nodeconstants.nodelen:
1570 if len(id) == 2 * self.nodeconstants.nodelen:
1570 try:
1571 try:
1571 # a full hex nodeid?
1572 # a full hex nodeid?
1572 node = bin(id)
1573 node = bin(id)
1573 self.rev(node)
1574 self.rev(node)
1574 return node
1575 return node
1575 except (TypeError, error.LookupError):
1576 except (TypeError, error.LookupError):
1576 pass
1577 pass
1577
1578
1578 def _partialmatch(self, id):
1579 def _partialmatch(self, id):
1579 # we don't care wdirfilenodeids as they should be always full hash
1580 # we don't care wdirfilenodeids as they should be always full hash
1580 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1581 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1581 ambiguous = False
1582 ambiguous = False
1582 try:
1583 try:
1583 partial = self.index.partialmatch(id)
1584 partial = self.index.partialmatch(id)
1584 if partial and self.hasnode(partial):
1585 if partial and self.hasnode(partial):
1585 if maybewdir:
1586 if maybewdir:
1586 # single 'ff...' match in radix tree, ambiguous with wdir
1587 # single 'ff...' match in radix tree, ambiguous with wdir
1587 ambiguous = True
1588 ambiguous = True
1588 else:
1589 else:
1589 return partial
1590 return partial
1590 elif maybewdir:
1591 elif maybewdir:
1591 # no 'ff...' match in radix tree, wdir identified
1592 # no 'ff...' match in radix tree, wdir identified
1592 raise error.WdirUnsupported
1593 raise error.WdirUnsupported
1593 else:
1594 else:
1594 return None
1595 return None
1595 except error.RevlogError:
1596 except error.RevlogError:
1596 # parsers.c radix tree lookup gave multiple matches
1597 # parsers.c radix tree lookup gave multiple matches
1597 # fast path: for unfiltered changelog, radix tree is accurate
1598 # fast path: for unfiltered changelog, radix tree is accurate
1598 if not getattr(self, 'filteredrevs', None):
1599 if not getattr(self, 'filteredrevs', None):
1599 ambiguous = True
1600 ambiguous = True
1600 # fall through to slow path that filters hidden revisions
1601 # fall through to slow path that filters hidden revisions
1601 except (AttributeError, ValueError):
1602 except (AttributeError, ValueError):
1602 # we are pure python, or key was too short to search radix tree
1603 # we are pure python, or key was too short to search radix tree
1603 pass
1604 pass
1604 if ambiguous:
1605 if ambiguous:
1605 raise error.AmbiguousPrefixLookupError(
1606 raise error.AmbiguousPrefixLookupError(
1606 id, self.display_id, _(b'ambiguous identifier')
1607 id, self.display_id, _(b'ambiguous identifier')
1607 )
1608 )
1608
1609
1609 if id in self._pcache:
1610 if id in self._pcache:
1610 return self._pcache[id]
1611 return self._pcache[id]
1611
1612
1612 if len(id) <= 40:
1613 if len(id) <= 40:
1613 try:
1614 try:
1614 # hex(node)[:...]
1615 # hex(node)[:...]
1615 l = len(id) // 2 # grab an even number of digits
1616 l = len(id) // 2 # grab an even number of digits
1616 prefix = bin(id[: l * 2])
1617 prefix = bin(id[: l * 2])
1617 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1618 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1618 nl = [
1619 nl = [
1619 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1620 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1620 ]
1621 ]
1621 if self.nodeconstants.nullhex.startswith(id):
1622 if self.nodeconstants.nullhex.startswith(id):
1622 nl.append(self.nullid)
1623 nl.append(self.nullid)
1623 if len(nl) > 0:
1624 if len(nl) > 0:
1624 if len(nl) == 1 and not maybewdir:
1625 if len(nl) == 1 and not maybewdir:
1625 self._pcache[id] = nl[0]
1626 self._pcache[id] = nl[0]
1626 return nl[0]
1627 return nl[0]
1627 raise error.AmbiguousPrefixLookupError(
1628 raise error.AmbiguousPrefixLookupError(
1628 id, self.display_id, _(b'ambiguous identifier')
1629 id, self.display_id, _(b'ambiguous identifier')
1629 )
1630 )
1630 if maybewdir:
1631 if maybewdir:
1631 raise error.WdirUnsupported
1632 raise error.WdirUnsupported
1632 return None
1633 return None
1633 except TypeError:
1634 except TypeError:
1634 pass
1635 pass
1635
1636
1636 def lookup(self, id):
1637 def lookup(self, id):
1637 """locate a node based on:
1638 """locate a node based on:
1638 - revision number or str(revision number)
1639 - revision number or str(revision number)
1639 - nodeid or subset of hex nodeid
1640 - nodeid or subset of hex nodeid
1640 """
1641 """
1641 n = self._match(id)
1642 n = self._match(id)
1642 if n is not None:
1643 if n is not None:
1643 return n
1644 return n
1644 n = self._partialmatch(id)
1645 n = self._partialmatch(id)
1645 if n:
1646 if n:
1646 return n
1647 return n
1647
1648
1648 raise error.LookupError(id, self.display_id, _(b'no match found'))
1649 raise error.LookupError(id, self.display_id, _(b'no match found'))
1649
1650
1650 def shortest(self, node, minlength=1):
1651 def shortest(self, node, minlength=1):
1651 """Find the shortest unambiguous prefix that matches node."""
1652 """Find the shortest unambiguous prefix that matches node."""
1652
1653
1653 def isvalid(prefix):
1654 def isvalid(prefix):
1654 try:
1655 try:
1655 matchednode = self._partialmatch(prefix)
1656 matchednode = self._partialmatch(prefix)
1656 except error.AmbiguousPrefixLookupError:
1657 except error.AmbiguousPrefixLookupError:
1657 return False
1658 return False
1658 except error.WdirUnsupported:
1659 except error.WdirUnsupported:
1659 # single 'ff...' match
1660 # single 'ff...' match
1660 return True
1661 return True
1661 if matchednode is None:
1662 if matchednode is None:
1662 raise error.LookupError(node, self.display_id, _(b'no node'))
1663 raise error.LookupError(node, self.display_id, _(b'no node'))
1663 return True
1664 return True
1664
1665
1665 def maybewdir(prefix):
1666 def maybewdir(prefix):
1666 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1667 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1667
1668
1668 hexnode = hex(node)
1669 hexnode = hex(node)
1669
1670
1670 def disambiguate(hexnode, minlength):
1671 def disambiguate(hexnode, minlength):
1671 """Disambiguate against wdirid."""
1672 """Disambiguate against wdirid."""
1672 for length in range(minlength, len(hexnode) + 1):
1673 for length in range(minlength, len(hexnode) + 1):
1673 prefix = hexnode[:length]
1674 prefix = hexnode[:length]
1674 if not maybewdir(prefix):
1675 if not maybewdir(prefix):
1675 return prefix
1676 return prefix
1676
1677
1677 if not getattr(self, 'filteredrevs', None):
1678 if not getattr(self, 'filteredrevs', None):
1678 try:
1679 try:
1679 length = max(self.index.shortest(node), minlength)
1680 length = max(self.index.shortest(node), minlength)
1680 return disambiguate(hexnode, length)
1681 return disambiguate(hexnode, length)
1681 except error.RevlogError:
1682 except error.RevlogError:
1682 if node != self.nodeconstants.wdirid:
1683 if node != self.nodeconstants.wdirid:
1683 raise error.LookupError(
1684 raise error.LookupError(
1684 node, self.display_id, _(b'no node')
1685 node, self.display_id, _(b'no node')
1685 )
1686 )
1686 except AttributeError:
1687 except AttributeError:
1687 # Fall through to pure code
1688 # Fall through to pure code
1688 pass
1689 pass
1689
1690
1690 if node == self.nodeconstants.wdirid:
1691 if node == self.nodeconstants.wdirid:
1691 for length in range(minlength, len(hexnode) + 1):
1692 for length in range(minlength, len(hexnode) + 1):
1692 prefix = hexnode[:length]
1693 prefix = hexnode[:length]
1693 if isvalid(prefix):
1694 if isvalid(prefix):
1694 return prefix
1695 return prefix
1695
1696
1696 for length in range(minlength, len(hexnode) + 1):
1697 for length in range(minlength, len(hexnode) + 1):
1697 prefix = hexnode[:length]
1698 prefix = hexnode[:length]
1698 if isvalid(prefix):
1699 if isvalid(prefix):
1699 return disambiguate(hexnode, length)
1700 return disambiguate(hexnode, length)
1700
1701
1701 def cmp(self, node, text):
1702 def cmp(self, node, text):
1702 """compare text with a given file revision
1703 """compare text with a given file revision
1703
1704
1704 returns True if text is different than what is stored.
1705 returns True if text is different than what is stored.
1705 """
1706 """
1706 p1, p2 = self.parents(node)
1707 p1, p2 = self.parents(node)
1707 return storageutil.hashrevisionsha1(text, p1, p2) != node
1708 return storageutil.hashrevisionsha1(text, p1, p2) != node
1708
1709
1709 def _cachesegment(self, offset, data):
1710 def _cachesegment(self, offset, data):
1710 """Add a segment to the revlog cache.
1711 """Add a segment to the revlog cache.
1711
1712
1712 Accepts an absolute offset and the data that is at that location.
1713 Accepts an absolute offset and the data that is at that location.
1713 """
1714 """
1714 o, d = self._chunkcache
1715 o, d = self._chunkcache
1715 # try to add to existing cache
1716 # try to add to existing cache
1716 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1717 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1717 self._chunkcache = o, d + data
1718 self._chunkcache = o, d + data
1718 else:
1719 else:
1719 self._chunkcache = offset, data
1720 self._chunkcache = offset, data
1720
1721
1721 def _readsegment(self, offset, length, df=None):
1722 def _readsegment(self, offset, length, df=None):
1722 """Load a segment of raw data from the revlog.
1723 """Load a segment of raw data from the revlog.
1723
1724
1724 Accepts an absolute offset, length to read, and an optional existing
1725 Accepts an absolute offset, length to read, and an optional existing
1725 file handle to read from.
1726 file handle to read from.
1726
1727
1727 If an existing file handle is passed, it will be seeked and the
1728 If an existing file handle is passed, it will be seeked and the
1728 original seek position will NOT be restored.
1729 original seek position will NOT be restored.
1729
1730
1730 Returns a str or buffer of raw byte data.
1731 Returns a str or buffer of raw byte data.
1731
1732
1732 Raises if the requested number of bytes could not be read.
1733 Raises if the requested number of bytes could not be read.
1733 """
1734 """
1734 # Cache data both forward and backward around the requested
1735 # Cache data both forward and backward around the requested
1735 # data, in a fixed size window. This helps speed up operations
1736 # data, in a fixed size window. This helps speed up operations
1736 # involving reading the revlog backwards.
1737 # involving reading the revlog backwards.
1737 cachesize = self._chunkcachesize
1738 cachesize = self._chunkcachesize
1738 realoffset = offset & ~(cachesize - 1)
1739 realoffset = offset & ~(cachesize - 1)
1739 reallength = (
1740 reallength = (
1740 (offset + length + cachesize) & ~(cachesize - 1)
1741 (offset + length + cachesize) & ~(cachesize - 1)
1741 ) - realoffset
1742 ) - realoffset
1742 with self._datareadfp(df) as df:
1743 with self._datareadfp(df) as df:
1743 df.seek(realoffset)
1744 df.seek(realoffset)
1744 d = df.read(reallength)
1745 d = df.read(reallength)
1745
1746
1746 self._cachesegment(realoffset, d)
1747 self._cachesegment(realoffset, d)
1747 if offset != realoffset or reallength != length:
1748 if offset != realoffset or reallength != length:
1748 startoffset = offset - realoffset
1749 startoffset = offset - realoffset
1749 if len(d) - startoffset < length:
1750 if len(d) - startoffset < length:
1750 filename = self._indexfile if self._inline else self._datafile
1751 filename = self._indexfile if self._inline else self._datafile
1751 got = len(d) - startoffset
1752 got = len(d) - startoffset
1752 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1753 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1753 raise error.RevlogError(m)
1754 raise error.RevlogError(m)
1754 return util.buffer(d, startoffset, length)
1755 return util.buffer(d, startoffset, length)
1755
1756
1756 if len(d) < length:
1757 if len(d) < length:
1757 filename = self._indexfile if self._inline else self._datafile
1758 filename = self._indexfile if self._inline else self._datafile
1758 got = len(d) - startoffset
1759 got = len(d) - startoffset
1759 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1760 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1760 raise error.RevlogError(m)
1761 raise error.RevlogError(m)
1761
1762
1762 return d
1763 return d
1763
1764
1764 def _getsegment(self, offset, length, df=None):
1765 def _getsegment(self, offset, length, df=None):
1765 """Obtain a segment of raw data from the revlog.
1766 """Obtain a segment of raw data from the revlog.
1766
1767
1767 Accepts an absolute offset, length of bytes to obtain, and an
1768 Accepts an absolute offset, length of bytes to obtain, and an
1768 optional file handle to the already-opened revlog. If the file
1769 optional file handle to the already-opened revlog. If the file
1769 handle is used, it's original seek position will not be preserved.
1770 handle is used, it's original seek position will not be preserved.
1770
1771
1771 Requests for data may be returned from a cache.
1772 Requests for data may be returned from a cache.
1772
1773
1773 Returns a str or a buffer instance of raw byte data.
1774 Returns a str or a buffer instance of raw byte data.
1774 """
1775 """
1775 o, d = self._chunkcache
1776 o, d = self._chunkcache
1776 l = len(d)
1777 l = len(d)
1777
1778
1778 # is it in the cache?
1779 # is it in the cache?
1779 cachestart = offset - o
1780 cachestart = offset - o
1780 cacheend = cachestart + length
1781 cacheend = cachestart + length
1781 if cachestart >= 0 and cacheend <= l:
1782 if cachestart >= 0 and cacheend <= l:
1782 if cachestart == 0 and cacheend == l:
1783 if cachestart == 0 and cacheend == l:
1783 return d # avoid a copy
1784 return d # avoid a copy
1784 return util.buffer(d, cachestart, cacheend - cachestart)
1785 return util.buffer(d, cachestart, cacheend - cachestart)
1785
1786
1786 return self._readsegment(offset, length, df=df)
1787 return self._readsegment(offset, length, df=df)
1787
1788
1788 def _getsegmentforrevs(self, startrev, endrev, df=None):
1789 def _getsegmentforrevs(self, startrev, endrev, df=None):
1789 """Obtain a segment of raw data corresponding to a range of revisions.
1790 """Obtain a segment of raw data corresponding to a range of revisions.
1790
1791
1791 Accepts the start and end revisions and an optional already-open
1792 Accepts the start and end revisions and an optional already-open
1792 file handle to be used for reading. If the file handle is read, its
1793 file handle to be used for reading. If the file handle is read, its
1793 seek position will not be preserved.
1794 seek position will not be preserved.
1794
1795
1795 Requests for data may be satisfied by a cache.
1796 Requests for data may be satisfied by a cache.
1796
1797
1797 Returns a 2-tuple of (offset, data) for the requested range of
1798 Returns a 2-tuple of (offset, data) for the requested range of
1798 revisions. Offset is the integer offset from the beginning of the
1799 revisions. Offset is the integer offset from the beginning of the
1799 revlog and data is a str or buffer of the raw byte data.
1800 revlog and data is a str or buffer of the raw byte data.
1800
1801
1801 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1802 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1802 to determine where each revision's data begins and ends.
1803 to determine where each revision's data begins and ends.
1803 """
1804 """
1804 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1805 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1805 # (functions are expensive).
1806 # (functions are expensive).
1806 index = self.index
1807 index = self.index
1807 istart = index[startrev]
1808 istart = index[startrev]
1808 start = int(istart[0] >> 16)
1809 start = int(istart[0] >> 16)
1809 if startrev == endrev:
1810 if startrev == endrev:
1810 end = start + istart[1]
1811 end = start + istart[1]
1811 else:
1812 else:
1812 iend = index[endrev]
1813 iend = index[endrev]
1813 end = int(iend[0] >> 16) + iend[1]
1814 end = int(iend[0] >> 16) + iend[1]
1814
1815
1815 if self._inline:
1816 if self._inline:
1816 start += (startrev + 1) * self.index.entry_size
1817 start += (startrev + 1) * self.index.entry_size
1817 end += (endrev + 1) * self.index.entry_size
1818 end += (endrev + 1) * self.index.entry_size
1818 length = end - start
1819 length = end - start
1819
1820
1820 return start, self._getsegment(start, length, df=df)
1821 return start, self._getsegment(start, length, df=df)
1821
1822
1822 def _chunk(self, rev, df=None):
1823 def _chunk(self, rev, df=None):
1823 """Obtain a single decompressed chunk for a revision.
1824 """Obtain a single decompressed chunk for a revision.
1824
1825
1825 Accepts an integer revision and an optional already-open file handle
1826 Accepts an integer revision and an optional already-open file handle
1826 to be used for reading. If used, the seek position of the file will not
1827 to be used for reading. If used, the seek position of the file will not
1827 be preserved.
1828 be preserved.
1828
1829
1829 Returns a str holding uncompressed data for the requested revision.
1830 Returns a str holding uncompressed data for the requested revision.
1830 """
1831 """
1831 compression_mode = self.index[rev][10]
1832 compression_mode = self.index[rev][10]
1832 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1833 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1833 if compression_mode == COMP_MODE_PLAIN:
1834 if compression_mode == COMP_MODE_PLAIN:
1834 return data
1835 return data
1835 elif compression_mode == COMP_MODE_DEFAULT:
1836 elif compression_mode == COMP_MODE_DEFAULT:
1836 return self._decompressor(data)
1837 return self._decompressor(data)
1837 elif compression_mode == COMP_MODE_INLINE:
1838 elif compression_mode == COMP_MODE_INLINE:
1838 return self.decompress(data)
1839 return self.decompress(data)
1839 else:
1840 else:
1840 msg = 'unknown compression mode %d'
1841 msg = 'unknown compression mode %d'
1841 msg %= compression_mode
1842 msg %= compression_mode
1842 raise error.RevlogError(msg)
1843 raise error.RevlogError(msg)
1843
1844
1844 def _chunks(self, revs, df=None, targetsize=None):
1845 def _chunks(self, revs, df=None, targetsize=None):
1845 """Obtain decompressed chunks for the specified revisions.
1846 """Obtain decompressed chunks for the specified revisions.
1846
1847
1847 Accepts an iterable of numeric revisions that are assumed to be in
1848 Accepts an iterable of numeric revisions that are assumed to be in
1848 ascending order. Also accepts an optional already-open file handle
1849 ascending order. Also accepts an optional already-open file handle
1849 to be used for reading. If used, the seek position of the file will
1850 to be used for reading. If used, the seek position of the file will
1850 not be preserved.
1851 not be preserved.
1851
1852
1852 This function is similar to calling ``self._chunk()`` multiple times,
1853 This function is similar to calling ``self._chunk()`` multiple times,
1853 but is faster.
1854 but is faster.
1854
1855
1855 Returns a list with decompressed data for each requested revision.
1856 Returns a list with decompressed data for each requested revision.
1856 """
1857 """
1857 if not revs:
1858 if not revs:
1858 return []
1859 return []
1859 start = self.start
1860 start = self.start
1860 length = self.length
1861 length = self.length
1861 inline = self._inline
1862 inline = self._inline
1862 iosize = self.index.entry_size
1863 iosize = self.index.entry_size
1863 buffer = util.buffer
1864 buffer = util.buffer
1864
1865
1865 l = []
1866 l = []
1866 ladd = l.append
1867 ladd = l.append
1867
1868
1868 if not self._withsparseread:
1869 if not self._withsparseread:
1869 slicedchunks = (revs,)
1870 slicedchunks = (revs,)
1870 else:
1871 else:
1871 slicedchunks = deltautil.slicechunk(
1872 slicedchunks = deltautil.slicechunk(
1872 self, revs, targetsize=targetsize
1873 self, revs, targetsize=targetsize
1873 )
1874 )
1874
1875
1875 for revschunk in slicedchunks:
1876 for revschunk in slicedchunks:
1876 firstrev = revschunk[0]
1877 firstrev = revschunk[0]
1877 # Skip trailing revisions with empty diff
1878 # Skip trailing revisions with empty diff
1878 for lastrev in revschunk[::-1]:
1879 for lastrev in revschunk[::-1]:
1879 if length(lastrev) != 0:
1880 if length(lastrev) != 0:
1880 break
1881 break
1881
1882
1882 try:
1883 try:
1883 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1884 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1884 except OverflowError:
1885 except OverflowError:
1885 # issue4215 - we can't cache a run of chunks greater than
1886 # issue4215 - we can't cache a run of chunks greater than
1886 # 2G on Windows
1887 # 2G on Windows
1887 return [self._chunk(rev, df=df) for rev in revschunk]
1888 return [self._chunk(rev, df=df) for rev in revschunk]
1888
1889
1889 decomp = self.decompress
1890 decomp = self.decompress
1890 # self._decompressor might be None, but will not be used in that case
1891 # self._decompressor might be None, but will not be used in that case
1891 def_decomp = self._decompressor
1892 def_decomp = self._decompressor
1892 for rev in revschunk:
1893 for rev in revschunk:
1893 chunkstart = start(rev)
1894 chunkstart = start(rev)
1894 if inline:
1895 if inline:
1895 chunkstart += (rev + 1) * iosize
1896 chunkstart += (rev + 1) * iosize
1896 chunklength = length(rev)
1897 chunklength = length(rev)
1897 comp_mode = self.index[rev][10]
1898 comp_mode = self.index[rev][10]
1898 c = buffer(data, chunkstart - offset, chunklength)
1899 c = buffer(data, chunkstart - offset, chunklength)
1899 if comp_mode == COMP_MODE_PLAIN:
1900 if comp_mode == COMP_MODE_PLAIN:
1900 ladd(c)
1901 ladd(c)
1901 elif comp_mode == COMP_MODE_INLINE:
1902 elif comp_mode == COMP_MODE_INLINE:
1902 ladd(decomp(c))
1903 ladd(decomp(c))
1903 elif comp_mode == COMP_MODE_DEFAULT:
1904 elif comp_mode == COMP_MODE_DEFAULT:
1904 ladd(def_decomp(c))
1905 ladd(def_decomp(c))
1905 else:
1906 else:
1906 msg = 'unknown compression mode %d'
1907 msg = 'unknown compression mode %d'
1907 msg %= comp_mode
1908 msg %= comp_mode
1908 raise error.RevlogError(msg)
1909 raise error.RevlogError(msg)
1909
1910
1910 return l
1911 return l
1911
1912
1912 def _chunkclear(self):
1913 def _chunkclear(self):
1913 """Clear the raw chunk cache."""
1914 """Clear the raw chunk cache."""
1914 self._chunkcache = (0, b'')
1915 self._chunkcache = (0, b'')
1915
1916
1916 def deltaparent(self, rev):
1917 def deltaparent(self, rev):
1917 """return deltaparent of the given revision"""
1918 """return deltaparent of the given revision"""
1918 base = self.index[rev][3]
1919 base = self.index[rev][3]
1919 if base == rev:
1920 if base == rev:
1920 return nullrev
1921 return nullrev
1921 elif self._generaldelta:
1922 elif self._generaldelta:
1922 return base
1923 return base
1923 else:
1924 else:
1924 return rev - 1
1925 return rev - 1
1925
1926
1926 def issnapshot(self, rev):
1927 def issnapshot(self, rev):
1927 """tells whether rev is a snapshot"""
1928 """tells whether rev is a snapshot"""
1928 if not self._sparserevlog:
1929 if not self._sparserevlog:
1929 return self.deltaparent(rev) == nullrev
1930 return self.deltaparent(rev) == nullrev
1930 elif util.safehasattr(self.index, b'issnapshot'):
1931 elif util.safehasattr(self.index, b'issnapshot'):
1931 # directly assign the method to cache the testing and access
1932 # directly assign the method to cache the testing and access
1932 self.issnapshot = self.index.issnapshot
1933 self.issnapshot = self.index.issnapshot
1933 return self.issnapshot(rev)
1934 return self.issnapshot(rev)
1934 if rev == nullrev:
1935 if rev == nullrev:
1935 return True
1936 return True
1936 entry = self.index[rev]
1937 entry = self.index[rev]
1937 base = entry[3]
1938 base = entry[3]
1938 if base == rev:
1939 if base == rev:
1939 return True
1940 return True
1940 if base == nullrev:
1941 if base == nullrev:
1941 return True
1942 return True
1942 p1 = entry[5]
1943 p1 = entry[5]
1943 p2 = entry[6]
1944 p2 = entry[6]
1944 if base == p1 or base == p2:
1945 if base == p1 or base == p2:
1945 return False
1946 return False
1946 return self.issnapshot(base)
1947 return self.issnapshot(base)
1947
1948
1948 def snapshotdepth(self, rev):
1949 def snapshotdepth(self, rev):
1949 """number of snapshot in the chain before this one"""
1950 """number of snapshot in the chain before this one"""
1950 if not self.issnapshot(rev):
1951 if not self.issnapshot(rev):
1951 raise error.ProgrammingError(b'revision %d not a snapshot')
1952 raise error.ProgrammingError(b'revision %d not a snapshot')
1952 return len(self._deltachain(rev)[0]) - 1
1953 return len(self._deltachain(rev)[0]) - 1
1953
1954
1954 def revdiff(self, rev1, rev2):
1955 def revdiff(self, rev1, rev2):
1955 """return or calculate a delta between two revisions
1956 """return or calculate a delta between two revisions
1956
1957
1957 The delta calculated is in binary form and is intended to be written to
1958 The delta calculated is in binary form and is intended to be written to
1958 revlog data directly. So this function needs raw revision data.
1959 revlog data directly. So this function needs raw revision data.
1959 """
1960 """
1960 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1961 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1961 return bytes(self._chunk(rev2))
1962 return bytes(self._chunk(rev2))
1962
1963
1963 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1964 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1964
1965
1965 def _processflags(self, text, flags, operation, raw=False):
1966 def _processflags(self, text, flags, operation, raw=False):
1966 """deprecated entry point to access flag processors"""
1967 """deprecated entry point to access flag processors"""
1967 msg = b'_processflag(...) use the specialized variant'
1968 msg = b'_processflag(...) use the specialized variant'
1968 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 if raw:
1970 if raw:
1970 return text, flagutil.processflagsraw(self, text, flags)
1971 return text, flagutil.processflagsraw(self, text, flags)
1971 elif operation == b'read':
1972 elif operation == b'read':
1972 return flagutil.processflagsread(self, text, flags)
1973 return flagutil.processflagsread(self, text, flags)
1973 else: # write operation
1974 else: # write operation
1974 return flagutil.processflagswrite(self, text, flags)
1975 return flagutil.processflagswrite(self, text, flags)
1975
1976
1976 def revision(self, nodeorrev, _df=None, raw=False):
1977 def revision(self, nodeorrev, _df=None, raw=False):
1977 """return an uncompressed revision of a given node or revision
1978 """return an uncompressed revision of a given node or revision
1978 number.
1979 number.
1979
1980
1980 _df - an existing file handle to read from. (internal-only)
1981 _df - an existing file handle to read from. (internal-only)
1981 raw - an optional argument specifying if the revision data is to be
1982 raw - an optional argument specifying if the revision data is to be
1982 treated as raw data when applying flag transforms. 'raw' should be set
1983 treated as raw data when applying flag transforms. 'raw' should be set
1983 to True when generating changegroups or in debug commands.
1984 to True when generating changegroups or in debug commands.
1984 """
1985 """
1985 if raw:
1986 if raw:
1986 msg = (
1987 msg = (
1987 b'revlog.revision(..., raw=True) is deprecated, '
1988 b'revlog.revision(..., raw=True) is deprecated, '
1988 b'use revlog.rawdata(...)'
1989 b'use revlog.rawdata(...)'
1989 )
1990 )
1990 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1991 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1991 return self._revisiondata(nodeorrev, _df, raw=raw)
1992 return self._revisiondata(nodeorrev, _df, raw=raw)
1992
1993
1993 def sidedata(self, nodeorrev, _df=None):
1994 def sidedata(self, nodeorrev, _df=None):
1994 """a map of extra data related to the changeset but not part of the hash
1995 """a map of extra data related to the changeset but not part of the hash
1995
1996
1996 This function currently return a dictionary. However, more advanced
1997 This function currently return a dictionary. However, more advanced
1997 mapping object will likely be used in the future for a more
1998 mapping object will likely be used in the future for a more
1998 efficient/lazy code.
1999 efficient/lazy code.
1999 """
2000 """
2000 # deal with <nodeorrev> argument type
2001 # deal with <nodeorrev> argument type
2001 if isinstance(nodeorrev, int):
2002 if isinstance(nodeorrev, int):
2002 rev = nodeorrev
2003 rev = nodeorrev
2003 else:
2004 else:
2004 rev = self.rev(nodeorrev)
2005 rev = self.rev(nodeorrev)
2005 return self._sidedata(rev)
2006 return self._sidedata(rev)
2006
2007
2007 def _revisiondata(self, nodeorrev, _df=None, raw=False):
2008 def _revisiondata(self, nodeorrev, _df=None, raw=False):
2008 # deal with <nodeorrev> argument type
2009 # deal with <nodeorrev> argument type
2009 if isinstance(nodeorrev, int):
2010 if isinstance(nodeorrev, int):
2010 rev = nodeorrev
2011 rev = nodeorrev
2011 node = self.node(rev)
2012 node = self.node(rev)
2012 else:
2013 else:
2013 node = nodeorrev
2014 node = nodeorrev
2014 rev = None
2015 rev = None
2015
2016
2016 # fast path the special `nullid` rev
2017 # fast path the special `nullid` rev
2017 if node == self.nullid:
2018 if node == self.nullid:
2018 return b""
2019 return b""
2019
2020
2020 # ``rawtext`` is the text as stored inside the revlog. Might be the
2021 # ``rawtext`` is the text as stored inside the revlog. Might be the
2021 # revision or might need to be processed to retrieve the revision.
2022 # revision or might need to be processed to retrieve the revision.
2022 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2023 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2023
2024
2024 if raw and validated:
2025 if raw and validated:
2025 # if we don't want to process the raw text and that raw
2026 # if we don't want to process the raw text and that raw
2026 # text is cached, we can exit early.
2027 # text is cached, we can exit early.
2027 return rawtext
2028 return rawtext
2028 if rev is None:
2029 if rev is None:
2029 rev = self.rev(node)
2030 rev = self.rev(node)
2030 # the revlog's flag for this revision
2031 # the revlog's flag for this revision
2031 # (usually alter its state or content)
2032 # (usually alter its state or content)
2032 flags = self.flags(rev)
2033 flags = self.flags(rev)
2033
2034
2034 if validated and flags == REVIDX_DEFAULT_FLAGS:
2035 if validated and flags == REVIDX_DEFAULT_FLAGS:
2035 # no extra flags set, no flag processor runs, text = rawtext
2036 # no extra flags set, no flag processor runs, text = rawtext
2036 return rawtext
2037 return rawtext
2037
2038
2038 if raw:
2039 if raw:
2039 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2040 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2040 text = rawtext
2041 text = rawtext
2041 else:
2042 else:
2042 r = flagutil.processflagsread(self, rawtext, flags)
2043 r = flagutil.processflagsread(self, rawtext, flags)
2043 text, validatehash = r
2044 text, validatehash = r
2044 if validatehash:
2045 if validatehash:
2045 self.checkhash(text, node, rev=rev)
2046 self.checkhash(text, node, rev=rev)
2046 if not validated:
2047 if not validated:
2047 self._revisioncache = (node, rev, rawtext)
2048 self._revisioncache = (node, rev, rawtext)
2048
2049
2049 return text
2050 return text
2050
2051
2051 def _rawtext(self, node, rev, _df=None):
2052 def _rawtext(self, node, rev, _df=None):
2052 """return the possibly unvalidated rawtext for a revision
2053 """return the possibly unvalidated rawtext for a revision
2053
2054
2054 returns (rev, rawtext, validated)
2055 returns (rev, rawtext, validated)
2055 """
2056 """
2056
2057
2057 # revision in the cache (could be useful to apply delta)
2058 # revision in the cache (could be useful to apply delta)
2058 cachedrev = None
2059 cachedrev = None
2059 # An intermediate text to apply deltas to
2060 # An intermediate text to apply deltas to
2060 basetext = None
2061 basetext = None
2061
2062
2062 # Check if we have the entry in cache
2063 # Check if we have the entry in cache
2063 # The cache entry looks like (node, rev, rawtext)
2064 # The cache entry looks like (node, rev, rawtext)
2064 if self._revisioncache:
2065 if self._revisioncache:
2065 if self._revisioncache[0] == node:
2066 if self._revisioncache[0] == node:
2066 return (rev, self._revisioncache[2], True)
2067 return (rev, self._revisioncache[2], True)
2067 cachedrev = self._revisioncache[1]
2068 cachedrev = self._revisioncache[1]
2068
2069
2069 if rev is None:
2070 if rev is None:
2070 rev = self.rev(node)
2071 rev = self.rev(node)
2071
2072
2072 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2073 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2073 if stopped:
2074 if stopped:
2074 basetext = self._revisioncache[2]
2075 basetext = self._revisioncache[2]
2075
2076
2076 # drop cache to save memory, the caller is expected to
2077 # drop cache to save memory, the caller is expected to
2077 # update self._revisioncache after validating the text
2078 # update self._revisioncache after validating the text
2078 self._revisioncache = None
2079 self._revisioncache = None
2079
2080
2080 targetsize = None
2081 targetsize = None
2081 rawsize = self.index[rev][2]
2082 rawsize = self.index[rev][2]
2082 if 0 <= rawsize:
2083 if 0 <= rawsize:
2083 targetsize = 4 * rawsize
2084 targetsize = 4 * rawsize
2084
2085
2085 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2086 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2086 if basetext is None:
2087 if basetext is None:
2087 basetext = bytes(bins[0])
2088 basetext = bytes(bins[0])
2088 bins = bins[1:]
2089 bins = bins[1:]
2089
2090
2090 rawtext = mdiff.patches(basetext, bins)
2091 rawtext = mdiff.patches(basetext, bins)
2091 del basetext # let us have a chance to free memory early
2092 del basetext # let us have a chance to free memory early
2092 return (rev, rawtext, False)
2093 return (rev, rawtext, False)
2093
2094
2094 def _sidedata(self, rev):
2095 def _sidedata(self, rev):
2095 """Return the sidedata for a given revision number."""
2096 """Return the sidedata for a given revision number."""
2096 index_entry = self.index[rev]
2097 index_entry = self.index[rev]
2097 sidedata_offset = index_entry[8]
2098 sidedata_offset = index_entry[8]
2098 sidedata_size = index_entry[9]
2099 sidedata_size = index_entry[9]
2099
2100
2100 if self._inline:
2101 if self._inline:
2101 sidedata_offset += self.index.entry_size * (1 + rev)
2102 sidedata_offset += self.index.entry_size * (1 + rev)
2102 if sidedata_size == 0:
2103 if sidedata_size == 0:
2103 return {}
2104 return {}
2104
2105
2105 # XXX this need caching, as we do for data
2106 # XXX this need caching, as we do for data
2106 with self._sidedatareadfp() as sdf:
2107 with self._sidedatareadfp() as sdf:
2107 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2108 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2108 filename = self._sidedatafile
2109 filename = self._sidedatafile
2109 end = self._docket.sidedata_end
2110 end = self._docket.sidedata_end
2110 offset = sidedata_offset
2111 offset = sidedata_offset
2111 length = sidedata_size
2112 length = sidedata_size
2112 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2113 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2113 raise error.RevlogError(m)
2114 raise error.RevlogError(m)
2114
2115
2115 sdf.seek(sidedata_offset, os.SEEK_SET)
2116 sdf.seek(sidedata_offset, os.SEEK_SET)
2116 comp_segment = sdf.read(sidedata_size)
2117 comp_segment = sdf.read(sidedata_size)
2117
2118
2118 if len(comp_segment) < sidedata_size:
2119 if len(comp_segment) < sidedata_size:
2119 filename = self._sidedatafile
2120 filename = self._sidedatafile
2120 length = sidedata_size
2121 length = sidedata_size
2121 offset = sidedata_offset
2122 offset = sidedata_offset
2122 got = len(comp_segment)
2123 got = len(comp_segment)
2123 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2124 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2124 raise error.RevlogError(m)
2125 raise error.RevlogError(m)
2125
2126
2126 comp = self.index[rev][11]
2127 comp = self.index[rev][11]
2127 if comp == COMP_MODE_PLAIN:
2128 if comp == COMP_MODE_PLAIN:
2128 segment = comp_segment
2129 segment = comp_segment
2129 elif comp == COMP_MODE_DEFAULT:
2130 elif comp == COMP_MODE_DEFAULT:
2130 segment = self._decompressor(comp_segment)
2131 segment = self._decompressor(comp_segment)
2131 elif comp == COMP_MODE_INLINE:
2132 elif comp == COMP_MODE_INLINE:
2132 segment = self.decompress(comp_segment)
2133 segment = self.decompress(comp_segment)
2133 else:
2134 else:
2134 msg = 'unknown compression mode %d'
2135 msg = 'unknown compression mode %d'
2135 msg %= comp
2136 msg %= comp
2136 raise error.RevlogError(msg)
2137 raise error.RevlogError(msg)
2137
2138
2138 sidedata = sidedatautil.deserialize_sidedata(segment)
2139 sidedata = sidedatautil.deserialize_sidedata(segment)
2139 return sidedata
2140 return sidedata
2140
2141
2141 def rawdata(self, nodeorrev, _df=None):
2142 def rawdata(self, nodeorrev, _df=None):
2142 """return an uncompressed raw data of a given node or revision number.
2143 """return an uncompressed raw data of a given node or revision number.
2143
2144
2144 _df - an existing file handle to read from. (internal-only)
2145 _df - an existing file handle to read from. (internal-only)
2145 """
2146 """
2146 return self._revisiondata(nodeorrev, _df, raw=True)
2147 return self._revisiondata(nodeorrev, _df, raw=True)
2147
2148
2148 def hash(self, text, p1, p2):
2149 def hash(self, text, p1, p2):
2149 """Compute a node hash.
2150 """Compute a node hash.
2150
2151
2151 Available as a function so that subclasses can replace the hash
2152 Available as a function so that subclasses can replace the hash
2152 as needed.
2153 as needed.
2153 """
2154 """
2154 return storageutil.hashrevisionsha1(text, p1, p2)
2155 return storageutil.hashrevisionsha1(text, p1, p2)
2155
2156
2156 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2157 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2157 """Check node hash integrity.
2158 """Check node hash integrity.
2158
2159
2159 Available as a function so that subclasses can extend hash mismatch
2160 Available as a function so that subclasses can extend hash mismatch
2160 behaviors as needed.
2161 behaviors as needed.
2161 """
2162 """
2162 try:
2163 try:
2163 if p1 is None and p2 is None:
2164 if p1 is None and p2 is None:
2164 p1, p2 = self.parents(node)
2165 p1, p2 = self.parents(node)
2165 if node != self.hash(text, p1, p2):
2166 if node != self.hash(text, p1, p2):
2166 # Clear the revision cache on hash failure. The revision cache
2167 # Clear the revision cache on hash failure. The revision cache
2167 # only stores the raw revision and clearing the cache does have
2168 # only stores the raw revision and clearing the cache does have
2168 # the side-effect that we won't have a cache hit when the raw
2169 # the side-effect that we won't have a cache hit when the raw
2169 # revision data is accessed. But this case should be rare and
2170 # revision data is accessed. But this case should be rare and
2170 # it is extra work to teach the cache about the hash
2171 # it is extra work to teach the cache about the hash
2171 # verification state.
2172 # verification state.
2172 if self._revisioncache and self._revisioncache[0] == node:
2173 if self._revisioncache and self._revisioncache[0] == node:
2173 self._revisioncache = None
2174 self._revisioncache = None
2174
2175
2175 revornode = rev
2176 revornode = rev
2176 if revornode is None:
2177 if revornode is None:
2177 revornode = templatefilters.short(hex(node))
2178 revornode = templatefilters.short(hex(node))
2178 raise error.RevlogError(
2179 raise error.RevlogError(
2179 _(b"integrity check failed on %s:%s")
2180 _(b"integrity check failed on %s:%s")
2180 % (self.display_id, pycompat.bytestr(revornode))
2181 % (self.display_id, pycompat.bytestr(revornode))
2181 )
2182 )
2182 except error.RevlogError:
2183 except error.RevlogError:
2183 if self._censorable and storageutil.iscensoredtext(text):
2184 if self._censorable and storageutil.iscensoredtext(text):
2184 raise error.CensoredNodeError(self.display_id, node, text)
2185 raise error.CensoredNodeError(self.display_id, node, text)
2185 raise
2186 raise
2186
2187
2187 def _enforceinlinesize(self, tr):
2188 def _enforceinlinesize(self, tr):
2188 """Check if the revlog is too big for inline and convert if so.
2189 """Check if the revlog is too big for inline and convert if so.
2189
2190
2190 This should be called after revisions are added to the revlog. If the
2191 This should be called after revisions are added to the revlog. If the
2191 revlog has grown too large to be an inline revlog, it will convert it
2192 revlog has grown too large to be an inline revlog, it will convert it
2192 to use multiple index and data files.
2193 to use multiple index and data files.
2193 """
2194 """
2194 tiprev = len(self) - 1
2195 tiprev = len(self) - 1
2195 total_size = self.start(tiprev) + self.length(tiprev)
2196 total_size = self.start(tiprev) + self.length(tiprev)
2196 if not self._inline or total_size < _maxinline:
2197 if not self._inline or total_size < _maxinline:
2197 return
2198 return
2198
2199
2199 troffset = tr.findoffset(self._indexfile)
2200 troffset = tr.findoffset(self._indexfile)
2200 if troffset is None:
2201 if troffset is None:
2201 raise error.RevlogError(
2202 raise error.RevlogError(
2202 _(b"%s not found in the transaction") % self._indexfile
2203 _(b"%s not found in the transaction") % self._indexfile
2203 )
2204 )
2204 trindex = 0
2205 trindex = 0
2205 tr.add(self._datafile, 0)
2206 tr.add(self._datafile, 0)
2206
2207
2207 existing_handles = False
2208 existing_handles = False
2208 if self._writinghandles is not None:
2209 if self._writinghandles is not None:
2209 existing_handles = True
2210 existing_handles = True
2210 fp = self._writinghandles[0]
2211 fp = self._writinghandles[0]
2211 fp.flush()
2212 fp.flush()
2212 fp.close()
2213 fp.close()
2213 # We can't use the cached file handle after close(). So prevent
2214 # We can't use the cached file handle after close(). So prevent
2214 # its usage.
2215 # its usage.
2215 self._writinghandles = None
2216 self._writinghandles = None
2216
2217
2217 new_dfh = self._datafp(b'w+')
2218 new_dfh = self._datafp(b'w+')
2218 new_dfh.truncate(0) # drop any potentially existing data
2219 new_dfh.truncate(0) # drop any potentially existing data
2219 try:
2220 try:
2220 with self._indexfp() as read_ifh:
2221 with self._indexfp() as read_ifh:
2221 for r in self:
2222 for r in self:
2222 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2223 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2223 if troffset <= self.start(r) + r * self.index.entry_size:
2224 if troffset <= self.start(r) + r * self.index.entry_size:
2224 trindex = r
2225 trindex = r
2225 new_dfh.flush()
2226 new_dfh.flush()
2226
2227
2227 with self.__index_new_fp() as fp:
2228 with self.__index_new_fp() as fp:
2228 self._format_flags &= ~FLAG_INLINE_DATA
2229 self._format_flags &= ~FLAG_INLINE_DATA
2229 self._inline = False
2230 self._inline = False
2230 for i in self:
2231 for i in self:
2231 e = self.index.entry_binary(i)
2232 e = self.index.entry_binary(i)
2232 if i == 0 and self._docket is None:
2233 if i == 0 and self._docket is None:
2233 header = self._format_flags | self._format_version
2234 header = self._format_flags | self._format_version
2234 header = self.index.pack_header(header)
2235 header = self.index.pack_header(header)
2235 e = header + e
2236 e = header + e
2236 fp.write(e)
2237 fp.write(e)
2237 if self._docket is not None:
2238 if self._docket is not None:
2238 self._docket.index_end = fp.tell()
2239 self._docket.index_end = fp.tell()
2239
2240
2240 # There is a small transactional race here. If the rename of
2241 # There is a small transactional race here. If the rename of
2241 # the index fails, we should remove the datafile. It is more
2242 # the index fails, we should remove the datafile. It is more
2242 # important to ensure that the data file is not truncated
2243 # important to ensure that the data file is not truncated
2243 # when the index is replaced as otherwise data is lost.
2244 # when the index is replaced as otherwise data is lost.
2244 tr.replace(self._datafile, self.start(trindex))
2245 tr.replace(self._datafile, self.start(trindex))
2245
2246
2246 # the temp file replace the real index when we exit the context
2247 # the temp file replace the real index when we exit the context
2247 # manager
2248 # manager
2248
2249
2249 tr.replace(self._indexfile, trindex * self.index.entry_size)
2250 tr.replace(self._indexfile, trindex * self.index.entry_size)
2250 nodemaputil.setup_persistent_nodemap(tr, self)
2251 nodemaputil.setup_persistent_nodemap(tr, self)
2251 self._chunkclear()
2252 self._chunkclear()
2252
2253
2253 if existing_handles:
2254 if existing_handles:
2254 # switched from inline to conventional reopen the index
2255 # switched from inline to conventional reopen the index
2255 ifh = self.__index_write_fp()
2256 ifh = self.__index_write_fp()
2256 self._writinghandles = (ifh, new_dfh, None)
2257 self._writinghandles = (ifh, new_dfh, None)
2257 new_dfh = None
2258 new_dfh = None
2258 finally:
2259 finally:
2259 if new_dfh is not None:
2260 if new_dfh is not None:
2260 new_dfh.close()
2261 new_dfh.close()
2261
2262
2262 def _nodeduplicatecallback(self, transaction, node):
2263 def _nodeduplicatecallback(self, transaction, node):
2263 """called when trying to add a node already stored."""
2264 """called when trying to add a node already stored."""
2264
2265
2265 @contextlib.contextmanager
2266 @contextlib.contextmanager
2266 def _writing(self, transaction):
2267 def _writing(self, transaction):
2267 if self._trypending:
2268 if self._trypending:
2268 msg = b'try to write in a `trypending` revlog: %s'
2269 msg = b'try to write in a `trypending` revlog: %s'
2269 msg %= self.display_id
2270 msg %= self.display_id
2270 raise error.ProgrammingError(msg)
2271 raise error.ProgrammingError(msg)
2271 if self._writinghandles is not None:
2272 if self._writinghandles is not None:
2272 yield
2273 yield
2273 else:
2274 else:
2274 ifh = dfh = sdfh = None
2275 ifh = dfh = sdfh = None
2275 try:
2276 try:
2276 r = len(self)
2277 r = len(self)
2277 # opening the data file.
2278 # opening the data file.
2278 dsize = 0
2279 dsize = 0
2279 if r:
2280 if r:
2280 dsize = self.end(r - 1)
2281 dsize = self.end(r - 1)
2281 dfh = None
2282 dfh = None
2282 if not self._inline:
2283 if not self._inline:
2283 try:
2284 try:
2284 dfh = self._datafp(b"r+")
2285 dfh = self._datafp(b"r+")
2285 if self._docket is None:
2286 if self._docket is None:
2286 dfh.seek(0, os.SEEK_END)
2287 dfh.seek(0, os.SEEK_END)
2287 else:
2288 else:
2288 dfh.seek(self._docket.data_end, os.SEEK_SET)
2289 dfh.seek(self._docket.data_end, os.SEEK_SET)
2289 except IOError as inst:
2290 except IOError as inst:
2290 if inst.errno != errno.ENOENT:
2291 if inst.errno != errno.ENOENT:
2291 raise
2292 raise
2292 dfh = self._datafp(b"w+")
2293 dfh = self._datafp(b"w+")
2293 transaction.add(self._datafile, dsize)
2294 transaction.add(self._datafile, dsize)
2294 if self._sidedatafile is not None:
2295 if self._sidedatafile is not None:
2295 try:
2296 try:
2296 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2297 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2297 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2298 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2298 except IOError as inst:
2299 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2300 if inst.errno != errno.ENOENT:
2300 raise
2301 raise
2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 transaction.add(
2303 transaction.add(
2303 self._sidedatafile, self._docket.sidedata_end
2304 self._sidedatafile, self._docket.sidedata_end
2304 )
2305 )
2305
2306
2306 # opening the index file.
2307 # opening the index file.
2307 isize = r * self.index.entry_size
2308 isize = r * self.index.entry_size
2308 ifh = self.__index_write_fp()
2309 ifh = self.__index_write_fp()
2309 if self._inline:
2310 if self._inline:
2310 transaction.add(self._indexfile, dsize + isize)
2311 transaction.add(self._indexfile, dsize + isize)
2311 else:
2312 else:
2312 transaction.add(self._indexfile, isize)
2313 transaction.add(self._indexfile, isize)
2313 # exposing all file handle for writing.
2314 # exposing all file handle for writing.
2314 self._writinghandles = (ifh, dfh, sdfh)
2315 self._writinghandles = (ifh, dfh, sdfh)
2315 yield
2316 yield
2316 if self._docket is not None:
2317 if self._docket is not None:
2317 self._write_docket(transaction)
2318 self._write_docket(transaction)
2318 finally:
2319 finally:
2319 self._writinghandles = None
2320 self._writinghandles = None
2320 if dfh is not None:
2321 if dfh is not None:
2321 dfh.close()
2322 dfh.close()
2322 if sdfh is not None:
2323 if sdfh is not None:
2323 dfh.close()
2324 dfh.close()
2324 # closing the index file last to avoid exposing referent to
2325 # closing the index file last to avoid exposing referent to
2325 # potential unflushed data content.
2326 # potential unflushed data content.
2326 if ifh is not None:
2327 if ifh is not None:
2327 ifh.close()
2328 ifh.close()
2328
2329
2329 def _write_docket(self, transaction):
2330 def _write_docket(self, transaction):
2330 """write the current docket on disk
2331 """write the current docket on disk
2331
2332
2332 Exist as a method to help changelog to implement transaction logic
2333 Exist as a method to help changelog to implement transaction logic
2333
2334
2334 We could also imagine using the same transaction logic for all revlog
2335 We could also imagine using the same transaction logic for all revlog
2335 since docket are cheap."""
2336 since docket are cheap."""
2336 self._docket.write(transaction)
2337 self._docket.write(transaction)
2337
2338
2338 def addrevision(
2339 def addrevision(
2339 self,
2340 self,
2340 text,
2341 text,
2341 transaction,
2342 transaction,
2342 link,
2343 link,
2343 p1,
2344 p1,
2344 p2,
2345 p2,
2345 cachedelta=None,
2346 cachedelta=None,
2346 node=None,
2347 node=None,
2347 flags=REVIDX_DEFAULT_FLAGS,
2348 flags=REVIDX_DEFAULT_FLAGS,
2348 deltacomputer=None,
2349 deltacomputer=None,
2349 sidedata=None,
2350 sidedata=None,
2350 ):
2351 ):
2351 """add a revision to the log
2352 """add a revision to the log
2352
2353
2353 text - the revision data to add
2354 text - the revision data to add
2354 transaction - the transaction object used for rollback
2355 transaction - the transaction object used for rollback
2355 link - the linkrev data to add
2356 link - the linkrev data to add
2356 p1, p2 - the parent nodeids of the revision
2357 p1, p2 - the parent nodeids of the revision
2357 cachedelta - an optional precomputed delta
2358 cachedelta - an optional precomputed delta
2358 node - nodeid of revision; typically node is not specified, and it is
2359 node - nodeid of revision; typically node is not specified, and it is
2359 computed by default as hash(text, p1, p2), however subclasses might
2360 computed by default as hash(text, p1, p2), however subclasses might
2360 use different hashing method (and override checkhash() in such case)
2361 use different hashing method (and override checkhash() in such case)
2361 flags - the known flags to set on the revision
2362 flags - the known flags to set on the revision
2362 deltacomputer - an optional deltacomputer instance shared between
2363 deltacomputer - an optional deltacomputer instance shared between
2363 multiple calls
2364 multiple calls
2364 """
2365 """
2365 if link == nullrev:
2366 if link == nullrev:
2366 raise error.RevlogError(
2367 raise error.RevlogError(
2367 _(b"attempted to add linkrev -1 to %s") % self.display_id
2368 _(b"attempted to add linkrev -1 to %s") % self.display_id
2368 )
2369 )
2369
2370
2370 if sidedata is None:
2371 if sidedata is None:
2371 sidedata = {}
2372 sidedata = {}
2372 elif sidedata and not self.hassidedata:
2373 elif sidedata and not self.hassidedata:
2373 raise error.ProgrammingError(
2374 raise error.ProgrammingError(
2374 _(b"trying to add sidedata to a revlog who don't support them")
2375 _(b"trying to add sidedata to a revlog who don't support them")
2375 )
2376 )
2376
2377
2377 if flags:
2378 if flags:
2378 node = node or self.hash(text, p1, p2)
2379 node = node or self.hash(text, p1, p2)
2379
2380
2380 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2381 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2381
2382
2382 # If the flag processor modifies the revision data, ignore any provided
2383 # If the flag processor modifies the revision data, ignore any provided
2383 # cachedelta.
2384 # cachedelta.
2384 if rawtext != text:
2385 if rawtext != text:
2385 cachedelta = None
2386 cachedelta = None
2386
2387
2387 if len(rawtext) > _maxentrysize:
2388 if len(rawtext) > _maxentrysize:
2388 raise error.RevlogError(
2389 raise error.RevlogError(
2389 _(
2390 _(
2390 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2391 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2391 )
2392 )
2392 % (self.display_id, len(rawtext))
2393 % (self.display_id, len(rawtext))
2393 )
2394 )
2394
2395
2395 node = node or self.hash(rawtext, p1, p2)
2396 node = node or self.hash(rawtext, p1, p2)
2396 rev = self.index.get_rev(node)
2397 rev = self.index.get_rev(node)
2397 if rev is not None:
2398 if rev is not None:
2398 return rev
2399 return rev
2399
2400
2400 if validatehash:
2401 if validatehash:
2401 self.checkhash(rawtext, node, p1=p1, p2=p2)
2402 self.checkhash(rawtext, node, p1=p1, p2=p2)
2402
2403
2403 return self.addrawrevision(
2404 return self.addrawrevision(
2404 rawtext,
2405 rawtext,
2405 transaction,
2406 transaction,
2406 link,
2407 link,
2407 p1,
2408 p1,
2408 p2,
2409 p2,
2409 node,
2410 node,
2410 flags,
2411 flags,
2411 cachedelta=cachedelta,
2412 cachedelta=cachedelta,
2412 deltacomputer=deltacomputer,
2413 deltacomputer=deltacomputer,
2413 sidedata=sidedata,
2414 sidedata=sidedata,
2414 )
2415 )
2415
2416
2416 def addrawrevision(
2417 def addrawrevision(
2417 self,
2418 self,
2418 rawtext,
2419 rawtext,
2419 transaction,
2420 transaction,
2420 link,
2421 link,
2421 p1,
2422 p1,
2422 p2,
2423 p2,
2423 node,
2424 node,
2424 flags,
2425 flags,
2425 cachedelta=None,
2426 cachedelta=None,
2426 deltacomputer=None,
2427 deltacomputer=None,
2427 sidedata=None,
2428 sidedata=None,
2428 ):
2429 ):
2429 """add a raw revision with known flags, node and parents
2430 """add a raw revision with known flags, node and parents
2430 useful when reusing a revision not stored in this revlog (ex: received
2431 useful when reusing a revision not stored in this revlog (ex: received
2431 over wire, or read from an external bundle).
2432 over wire, or read from an external bundle).
2432 """
2433 """
2433 with self._writing(transaction):
2434 with self._writing(transaction):
2434 return self._addrevision(
2435 return self._addrevision(
2435 node,
2436 node,
2436 rawtext,
2437 rawtext,
2437 transaction,
2438 transaction,
2438 link,
2439 link,
2439 p1,
2440 p1,
2440 p2,
2441 p2,
2441 flags,
2442 flags,
2442 cachedelta,
2443 cachedelta,
2443 deltacomputer=deltacomputer,
2444 deltacomputer=deltacomputer,
2444 sidedata=sidedata,
2445 sidedata=sidedata,
2445 )
2446 )
2446
2447
2447 def compress(self, data):
2448 def compress(self, data):
2448 """Generate a possibly-compressed representation of data."""
2449 """Generate a possibly-compressed representation of data."""
2449 if not data:
2450 if not data:
2450 return b'', data
2451 return b'', data
2451
2452
2452 compressed = self._compressor.compress(data)
2453 compressed = self._compressor.compress(data)
2453
2454
2454 if compressed:
2455 if compressed:
2455 # The revlog compressor added the header in the returned data.
2456 # The revlog compressor added the header in the returned data.
2456 return b'', compressed
2457 return b'', compressed
2457
2458
2458 if data[0:1] == b'\0':
2459 if data[0:1] == b'\0':
2459 return b'', data
2460 return b'', data
2460 return b'u', data
2461 return b'u', data
2461
2462
2462 def decompress(self, data):
2463 def decompress(self, data):
2463 """Decompress a revlog chunk.
2464 """Decompress a revlog chunk.
2464
2465
2465 The chunk is expected to begin with a header identifying the
2466 The chunk is expected to begin with a header identifying the
2466 format type so it can be routed to an appropriate decompressor.
2467 format type so it can be routed to an appropriate decompressor.
2467 """
2468 """
2468 if not data:
2469 if not data:
2469 return data
2470 return data
2470
2471
2471 # Revlogs are read much more frequently than they are written and many
2472 # Revlogs are read much more frequently than they are written and many
2472 # chunks only take microseconds to decompress, so performance is
2473 # chunks only take microseconds to decompress, so performance is
2473 # important here.
2474 # important here.
2474 #
2475 #
2475 # We can make a few assumptions about revlogs:
2476 # We can make a few assumptions about revlogs:
2476 #
2477 #
2477 # 1) the majority of chunks will be compressed (as opposed to inline
2478 # 1) the majority of chunks will be compressed (as opposed to inline
2478 # raw data).
2479 # raw data).
2479 # 2) decompressing *any* data will likely by at least 10x slower than
2480 # 2) decompressing *any* data will likely by at least 10x slower than
2480 # returning raw inline data.
2481 # returning raw inline data.
2481 # 3) we want to prioritize common and officially supported compression
2482 # 3) we want to prioritize common and officially supported compression
2482 # engines
2483 # engines
2483 #
2484 #
2484 # It follows that we want to optimize for "decompress compressed data
2485 # It follows that we want to optimize for "decompress compressed data
2485 # when encoded with common and officially supported compression engines"
2486 # when encoded with common and officially supported compression engines"
2486 # case over "raw data" and "data encoded by less common or non-official
2487 # case over "raw data" and "data encoded by less common or non-official
2487 # compression engines." That is why we have the inline lookup first
2488 # compression engines." That is why we have the inline lookup first
2488 # followed by the compengines lookup.
2489 # followed by the compengines lookup.
2489 #
2490 #
2490 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2491 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2491 # compressed chunks. And this matters for changelog and manifest reads.
2492 # compressed chunks. And this matters for changelog and manifest reads.
2492 t = data[0:1]
2493 t = data[0:1]
2493
2494
2494 if t == b'x':
2495 if t == b'x':
2495 try:
2496 try:
2496 return _zlibdecompress(data)
2497 return _zlibdecompress(data)
2497 except zlib.error as e:
2498 except zlib.error as e:
2498 raise error.RevlogError(
2499 raise error.RevlogError(
2499 _(b'revlog decompress error: %s')
2500 _(b'revlog decompress error: %s')
2500 % stringutil.forcebytestr(e)
2501 % stringutil.forcebytestr(e)
2501 )
2502 )
2502 # '\0' is more common than 'u' so it goes first.
2503 # '\0' is more common than 'u' so it goes first.
2503 elif t == b'\0':
2504 elif t == b'\0':
2504 return data
2505 return data
2505 elif t == b'u':
2506 elif t == b'u':
2506 return util.buffer(data, 1)
2507 return util.buffer(data, 1)
2507
2508
2508 compressor = self._get_decompressor(t)
2509 compressor = self._get_decompressor(t)
2509
2510
2510 return compressor.decompress(data)
2511 return compressor.decompress(data)
2511
2512
2512 def _addrevision(
2513 def _addrevision(
2513 self,
2514 self,
2514 node,
2515 node,
2515 rawtext,
2516 rawtext,
2516 transaction,
2517 transaction,
2517 link,
2518 link,
2518 p1,
2519 p1,
2519 p2,
2520 p2,
2520 flags,
2521 flags,
2521 cachedelta,
2522 cachedelta,
2522 alwayscache=False,
2523 alwayscache=False,
2523 deltacomputer=None,
2524 deltacomputer=None,
2524 sidedata=None,
2525 sidedata=None,
2525 ):
2526 ):
2526 """internal function to add revisions to the log
2527 """internal function to add revisions to the log
2527
2528
2528 see addrevision for argument descriptions.
2529 see addrevision for argument descriptions.
2529
2530
2530 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2531 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2531
2532
2532 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2533 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2533 be used.
2534 be used.
2534
2535
2535 invariants:
2536 invariants:
2536 - rawtext is optional (can be None); if not set, cachedelta must be set.
2537 - rawtext is optional (can be None); if not set, cachedelta must be set.
2537 if both are set, they must correspond to each other.
2538 if both are set, they must correspond to each other.
2538 """
2539 """
2539 if node == self.nullid:
2540 if node == self.nullid:
2540 raise error.RevlogError(
2541 raise error.RevlogError(
2541 _(b"%s: attempt to add null revision") % self.display_id
2542 _(b"%s: attempt to add null revision") % self.display_id
2542 )
2543 )
2543 if (
2544 if (
2544 node == self.nodeconstants.wdirid
2545 node == self.nodeconstants.wdirid
2545 or node in self.nodeconstants.wdirfilenodeids
2546 or node in self.nodeconstants.wdirfilenodeids
2546 ):
2547 ):
2547 raise error.RevlogError(
2548 raise error.RevlogError(
2548 _(b"%s: attempt to add wdir revision") % self.display_id
2549 _(b"%s: attempt to add wdir revision") % self.display_id
2549 )
2550 )
2550 if self._writinghandles is None:
2551 if self._writinghandles is None:
2551 msg = b'adding revision outside `revlog._writing` context'
2552 msg = b'adding revision outside `revlog._writing` context'
2552 raise error.ProgrammingError(msg)
2553 raise error.ProgrammingError(msg)
2553
2554
2554 if self._inline:
2555 if self._inline:
2555 fh = self._writinghandles[0]
2556 fh = self._writinghandles[0]
2556 else:
2557 else:
2557 fh = self._writinghandles[1]
2558 fh = self._writinghandles[1]
2558
2559
2559 btext = [rawtext]
2560 btext = [rawtext]
2560
2561
2561 curr = len(self)
2562 curr = len(self)
2562 prev = curr - 1
2563 prev = curr - 1
2563
2564
2564 offset = self._get_data_offset(prev)
2565 offset = self._get_data_offset(prev)
2565
2566
2566 if self._concurrencychecker:
2567 if self._concurrencychecker:
2567 ifh, dfh, sdfh = self._writinghandles
2568 ifh, dfh, sdfh = self._writinghandles
2568 # XXX no checking for the sidedata file
2569 # XXX no checking for the sidedata file
2569 if self._inline:
2570 if self._inline:
2570 # offset is "as if" it were in the .d file, so we need to add on
2571 # offset is "as if" it were in the .d file, so we need to add on
2571 # the size of the entry metadata.
2572 # the size of the entry metadata.
2572 self._concurrencychecker(
2573 self._concurrencychecker(
2573 ifh, self._indexfile, offset + curr * self.index.entry_size
2574 ifh, self._indexfile, offset + curr * self.index.entry_size
2574 )
2575 )
2575 else:
2576 else:
2576 # Entries in the .i are a consistent size.
2577 # Entries in the .i are a consistent size.
2577 self._concurrencychecker(
2578 self._concurrencychecker(
2578 ifh, self._indexfile, curr * self.index.entry_size
2579 ifh, self._indexfile, curr * self.index.entry_size
2579 )
2580 )
2580 self._concurrencychecker(dfh, self._datafile, offset)
2581 self._concurrencychecker(dfh, self._datafile, offset)
2581
2582
2582 p1r, p2r = self.rev(p1), self.rev(p2)
2583 p1r, p2r = self.rev(p1), self.rev(p2)
2583
2584
2584 # full versions are inserted when the needed deltas
2585 # full versions are inserted when the needed deltas
2585 # become comparable to the uncompressed text
2586 # become comparable to the uncompressed text
2586 if rawtext is None:
2587 if rawtext is None:
2587 # need rawtext size, before changed by flag processors, which is
2588 # need rawtext size, before changed by flag processors, which is
2588 # the non-raw size. use revlog explicitly to avoid filelog's extra
2589 # the non-raw size. use revlog explicitly to avoid filelog's extra
2589 # logic that might remove metadata size.
2590 # logic that might remove metadata size.
2590 textlen = mdiff.patchedsize(
2591 textlen = mdiff.patchedsize(
2591 revlog.size(self, cachedelta[0]), cachedelta[1]
2592 revlog.size(self, cachedelta[0]), cachedelta[1]
2592 )
2593 )
2593 else:
2594 else:
2594 textlen = len(rawtext)
2595 textlen = len(rawtext)
2595
2596
2596 if deltacomputer is None:
2597 if deltacomputer is None:
2597 deltacomputer = deltautil.deltacomputer(self)
2598 deltacomputer = deltautil.deltacomputer(self)
2598
2599
2599 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2600 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2600
2601
2601 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2602 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2602
2603
2603 compression_mode = COMP_MODE_INLINE
2604 compression_mode = COMP_MODE_INLINE
2604 if self._docket is not None:
2605 if self._docket is not None:
2605 h, d = deltainfo.data
2606 h, d = deltainfo.data
2606 if not h and not d:
2607 if not h and not d:
2607 # not data to store at all... declare them uncompressed
2608 # not data to store at all... declare them uncompressed
2608 compression_mode = COMP_MODE_PLAIN
2609 compression_mode = COMP_MODE_PLAIN
2609 elif not h:
2610 elif not h:
2610 t = d[0:1]
2611 t = d[0:1]
2611 if t == b'\0':
2612 if t == b'\0':
2612 compression_mode = COMP_MODE_PLAIN
2613 compression_mode = COMP_MODE_PLAIN
2613 elif t == self._docket.default_compression_header:
2614 elif t == self._docket.default_compression_header:
2614 compression_mode = COMP_MODE_DEFAULT
2615 compression_mode = COMP_MODE_DEFAULT
2615 elif h == b'u':
2616 elif h == b'u':
2616 # we have a more efficient way to declare uncompressed
2617 # we have a more efficient way to declare uncompressed
2617 h = b''
2618 h = b''
2618 compression_mode = COMP_MODE_PLAIN
2619 compression_mode = COMP_MODE_PLAIN
2619 deltainfo = deltautil.drop_u_compression(deltainfo)
2620 deltainfo = deltautil.drop_u_compression(deltainfo)
2620
2621
2621 sidedata_compression_mode = COMP_MODE_INLINE
2622 sidedata_compression_mode = COMP_MODE_INLINE
2622 if sidedata and self.hassidedata:
2623 if sidedata and self.hassidedata:
2623 sidedata_compression_mode = COMP_MODE_PLAIN
2624 sidedata_compression_mode = COMP_MODE_PLAIN
2624 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2625 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2625 sidedata_offset = self._docket.sidedata_end
2626 sidedata_offset = self._docket.sidedata_end
2626 h, comp_sidedata = self.compress(serialized_sidedata)
2627 h, comp_sidedata = self.compress(serialized_sidedata)
2627 if (
2628 if (
2628 h != b'u'
2629 h != b'u'
2629 and comp_sidedata[0:1] != b'\0'
2630 and comp_sidedata[0:1] != b'\0'
2630 and len(comp_sidedata) < len(serialized_sidedata)
2631 and len(comp_sidedata) < len(serialized_sidedata)
2631 ):
2632 ):
2632 assert not h
2633 assert not h
2633 if (
2634 if (
2634 comp_sidedata[0:1]
2635 comp_sidedata[0:1]
2635 == self._docket.default_compression_header
2636 == self._docket.default_compression_header
2636 ):
2637 ):
2637 sidedata_compression_mode = COMP_MODE_DEFAULT
2638 sidedata_compression_mode = COMP_MODE_DEFAULT
2638 serialized_sidedata = comp_sidedata
2639 serialized_sidedata = comp_sidedata
2639 else:
2640 else:
2640 sidedata_compression_mode = COMP_MODE_INLINE
2641 sidedata_compression_mode = COMP_MODE_INLINE
2641 serialized_sidedata = comp_sidedata
2642 serialized_sidedata = comp_sidedata
2642 else:
2643 else:
2643 serialized_sidedata = b""
2644 serialized_sidedata = b""
2644 # Don't store the offset if the sidedata is empty, that way
2645 # Don't store the offset if the sidedata is empty, that way
2645 # we can easily detect empty sidedata and they will be no different
2646 # we can easily detect empty sidedata and they will be no different
2646 # than ones we manually add.
2647 # than ones we manually add.
2647 sidedata_offset = 0
2648 sidedata_offset = 0
2648
2649
2649 e = (
2650 e = (
2650 offset_type(offset, flags),
2651 offset_type(offset, flags),
2651 deltainfo.deltalen,
2652 deltainfo.deltalen,
2652 textlen,
2653 textlen,
2653 deltainfo.base,
2654 deltainfo.base,
2654 link,
2655 link,
2655 p1r,
2656 p1r,
2656 p2r,
2657 p2r,
2657 node,
2658 node,
2658 sidedata_offset,
2659 sidedata_offset,
2659 len(serialized_sidedata),
2660 len(serialized_sidedata),
2660 compression_mode,
2661 compression_mode,
2661 sidedata_compression_mode,
2662 sidedata_compression_mode,
2662 )
2663 )
2663
2664
2664 self.index.append(e)
2665 self.index.append(e)
2665 entry = self.index.entry_binary(curr)
2666 entry = self.index.entry_binary(curr)
2666 if curr == 0 and self._docket is None:
2667 if curr == 0 and self._docket is None:
2667 header = self._format_flags | self._format_version
2668 header = self._format_flags | self._format_version
2668 header = self.index.pack_header(header)
2669 header = self.index.pack_header(header)
2669 entry = header + entry
2670 entry = header + entry
2670 self._writeentry(
2671 self._writeentry(
2671 transaction,
2672 transaction,
2672 entry,
2673 entry,
2673 deltainfo.data,
2674 deltainfo.data,
2674 link,
2675 link,
2675 offset,
2676 offset,
2676 serialized_sidedata,
2677 serialized_sidedata,
2677 sidedata_offset,
2678 sidedata_offset,
2678 )
2679 )
2679
2680
2680 rawtext = btext[0]
2681 rawtext = btext[0]
2681
2682
2682 if alwayscache and rawtext is None:
2683 if alwayscache and rawtext is None:
2683 rawtext = deltacomputer.buildtext(revinfo, fh)
2684 rawtext = deltacomputer.buildtext(revinfo, fh)
2684
2685
2685 if type(rawtext) == bytes: # only accept immutable objects
2686 if type(rawtext) == bytes: # only accept immutable objects
2686 self._revisioncache = (node, curr, rawtext)
2687 self._revisioncache = (node, curr, rawtext)
2687 self._chainbasecache[curr] = deltainfo.chainbase
2688 self._chainbasecache[curr] = deltainfo.chainbase
2688 return curr
2689 return curr
2689
2690
2690 def _get_data_offset(self, prev):
2691 def _get_data_offset(self, prev):
2691 """Returns the current offset in the (in-transaction) data file.
2692 """Returns the current offset in the (in-transaction) data file.
2692 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2693 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2693 file to store that information: since sidedata can be rewritten to the
2694 file to store that information: since sidedata can be rewritten to the
2694 end of the data file within a transaction, you can have cases where, for
2695 end of the data file within a transaction, you can have cases where, for
2695 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2696 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2696 to `n - 1`'s sidedata being written after `n`'s data.
2697 to `n - 1`'s sidedata being written after `n`'s data.
2697
2698
2698 TODO cache this in a docket file before getting out of experimental."""
2699 TODO cache this in a docket file before getting out of experimental."""
2699 if self._docket is None:
2700 if self._docket is None:
2700 return self.end(prev)
2701 return self.end(prev)
2701 else:
2702 else:
2702 return self._docket.data_end
2703 return self._docket.data_end
2703
2704
2704 def _writeentry(
2705 def _writeentry(
2705 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2706 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2706 ):
2707 ):
2707 # Files opened in a+ mode have inconsistent behavior on various
2708 # Files opened in a+ mode have inconsistent behavior on various
2708 # platforms. Windows requires that a file positioning call be made
2709 # platforms. Windows requires that a file positioning call be made
2709 # when the file handle transitions between reads and writes. See
2710 # when the file handle transitions between reads and writes. See
2710 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2711 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2711 # platforms, Python or the platform itself can be buggy. Some versions
2712 # platforms, Python or the platform itself can be buggy. Some versions
2712 # of Solaris have been observed to not append at the end of the file
2713 # of Solaris have been observed to not append at the end of the file
2713 # if the file was seeked to before the end. See issue4943 for more.
2714 # if the file was seeked to before the end. See issue4943 for more.
2714 #
2715 #
2715 # We work around this issue by inserting a seek() before writing.
2716 # We work around this issue by inserting a seek() before writing.
2716 # Note: This is likely not necessary on Python 3. However, because
2717 # Note: This is likely not necessary on Python 3. However, because
2717 # the file handle is reused for reads and may be seeked there, we need
2718 # the file handle is reused for reads and may be seeked there, we need
2718 # to be careful before changing this.
2719 # to be careful before changing this.
2719 if self._writinghandles is None:
2720 if self._writinghandles is None:
2720 msg = b'adding revision outside `revlog._writing` context'
2721 msg = b'adding revision outside `revlog._writing` context'
2721 raise error.ProgrammingError(msg)
2722 raise error.ProgrammingError(msg)
2722 ifh, dfh, sdfh = self._writinghandles
2723 ifh, dfh, sdfh = self._writinghandles
2723 if self._docket is None:
2724 if self._docket is None:
2724 ifh.seek(0, os.SEEK_END)
2725 ifh.seek(0, os.SEEK_END)
2725 else:
2726 else:
2726 ifh.seek(self._docket.index_end, os.SEEK_SET)
2727 ifh.seek(self._docket.index_end, os.SEEK_SET)
2727 if dfh:
2728 if dfh:
2728 if self._docket is None:
2729 if self._docket is None:
2729 dfh.seek(0, os.SEEK_END)
2730 dfh.seek(0, os.SEEK_END)
2730 else:
2731 else:
2731 dfh.seek(self._docket.data_end, os.SEEK_SET)
2732 dfh.seek(self._docket.data_end, os.SEEK_SET)
2732 if sdfh:
2733 if sdfh:
2733 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2734 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2734
2735
2735 curr = len(self) - 1
2736 curr = len(self) - 1
2736 if not self._inline:
2737 if not self._inline:
2737 transaction.add(self._datafile, offset)
2738 transaction.add(self._datafile, offset)
2738 if self._sidedatafile:
2739 if self._sidedatafile:
2739 transaction.add(self._sidedatafile, sidedata_offset)
2740 transaction.add(self._sidedatafile, sidedata_offset)
2740 transaction.add(self._indexfile, curr * len(entry))
2741 transaction.add(self._indexfile, curr * len(entry))
2741 if data[0]:
2742 if data[0]:
2742 dfh.write(data[0])
2743 dfh.write(data[0])
2743 dfh.write(data[1])
2744 dfh.write(data[1])
2744 if sidedata:
2745 if sidedata:
2745 sdfh.write(sidedata)
2746 sdfh.write(sidedata)
2746 ifh.write(entry)
2747 ifh.write(entry)
2747 else:
2748 else:
2748 offset += curr * self.index.entry_size
2749 offset += curr * self.index.entry_size
2749 transaction.add(self._indexfile, offset)
2750 transaction.add(self._indexfile, offset)
2750 ifh.write(entry)
2751 ifh.write(entry)
2751 ifh.write(data[0])
2752 ifh.write(data[0])
2752 ifh.write(data[1])
2753 ifh.write(data[1])
2753 assert not sidedata
2754 assert not sidedata
2754 self._enforceinlinesize(transaction)
2755 self._enforceinlinesize(transaction)
2755 if self._docket is not None:
2756 if self._docket is not None:
2756 self._docket.index_end = self._writinghandles[0].tell()
2757 self._docket.index_end = self._writinghandles[0].tell()
2757 self._docket.data_end = self._writinghandles[1].tell()
2758 self._docket.data_end = self._writinghandles[1].tell()
2758 self._docket.sidedata_end = self._writinghandles[2].tell()
2759 self._docket.sidedata_end = self._writinghandles[2].tell()
2759
2760
2760 nodemaputil.setup_persistent_nodemap(transaction, self)
2761 nodemaputil.setup_persistent_nodemap(transaction, self)
2761
2762
2762 def addgroup(
2763 def addgroup(
2763 self,
2764 self,
2764 deltas,
2765 deltas,
2765 linkmapper,
2766 linkmapper,
2766 transaction,
2767 transaction,
2767 alwayscache=False,
2768 alwayscache=False,
2768 addrevisioncb=None,
2769 addrevisioncb=None,
2769 duplicaterevisioncb=None,
2770 duplicaterevisioncb=None,
2770 ):
2771 ):
2771 """
2772 """
2772 add a delta group
2773 add a delta group
2773
2774
2774 given a set of deltas, add them to the revision log. the
2775 given a set of deltas, add them to the revision log. the
2775 first delta is against its parent, which should be in our
2776 first delta is against its parent, which should be in our
2776 log, the rest are against the previous delta.
2777 log, the rest are against the previous delta.
2777
2778
2778 If ``addrevisioncb`` is defined, it will be called with arguments of
2779 If ``addrevisioncb`` is defined, it will be called with arguments of
2779 this revlog and the node that was added.
2780 this revlog and the node that was added.
2780 """
2781 """
2781
2782
2782 if self._adding_group:
2783 if self._adding_group:
2783 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2784 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2784
2785
2785 self._adding_group = True
2786 self._adding_group = True
2786 empty = True
2787 empty = True
2787 try:
2788 try:
2788 with self._writing(transaction):
2789 with self._writing(transaction):
2789 deltacomputer = deltautil.deltacomputer(self)
2790 deltacomputer = deltautil.deltacomputer(self)
2790 # loop through our set of deltas
2791 # loop through our set of deltas
2791 for data in deltas:
2792 for data in deltas:
2792 (
2793 (
2793 node,
2794 node,
2794 p1,
2795 p1,
2795 p2,
2796 p2,
2796 linknode,
2797 linknode,
2797 deltabase,
2798 deltabase,
2798 delta,
2799 delta,
2799 flags,
2800 flags,
2800 sidedata,
2801 sidedata,
2801 ) = data
2802 ) = data
2802 link = linkmapper(linknode)
2803 link = linkmapper(linknode)
2803 flags = flags or REVIDX_DEFAULT_FLAGS
2804 flags = flags or REVIDX_DEFAULT_FLAGS
2804
2805
2805 rev = self.index.get_rev(node)
2806 rev = self.index.get_rev(node)
2806 if rev is not None:
2807 if rev is not None:
2807 # this can happen if two branches make the same change
2808 # this can happen if two branches make the same change
2808 self._nodeduplicatecallback(transaction, rev)
2809 self._nodeduplicatecallback(transaction, rev)
2809 if duplicaterevisioncb:
2810 if duplicaterevisioncb:
2810 duplicaterevisioncb(self, rev)
2811 duplicaterevisioncb(self, rev)
2811 empty = False
2812 empty = False
2812 continue
2813 continue
2813
2814
2814 for p in (p1, p2):
2815 for p in (p1, p2):
2815 if not self.index.has_node(p):
2816 if not self.index.has_node(p):
2816 raise error.LookupError(
2817 raise error.LookupError(
2817 p, self.radix, _(b'unknown parent')
2818 p, self.radix, _(b'unknown parent')
2818 )
2819 )
2819
2820
2820 if not self.index.has_node(deltabase):
2821 if not self.index.has_node(deltabase):
2821 raise error.LookupError(
2822 raise error.LookupError(
2822 deltabase, self.display_id, _(b'unknown delta base')
2823 deltabase, self.display_id, _(b'unknown delta base')
2823 )
2824 )
2824
2825
2825 baserev = self.rev(deltabase)
2826 baserev = self.rev(deltabase)
2826
2827
2827 if baserev != nullrev and self.iscensored(baserev):
2828 if baserev != nullrev and self.iscensored(baserev):
2828 # if base is censored, delta must be full replacement in a
2829 # if base is censored, delta must be full replacement in a
2829 # single patch operation
2830 # single patch operation
2830 hlen = struct.calcsize(b">lll")
2831 hlen = struct.calcsize(b">lll")
2831 oldlen = self.rawsize(baserev)
2832 oldlen = self.rawsize(baserev)
2832 newlen = len(delta) - hlen
2833 newlen = len(delta) - hlen
2833 if delta[:hlen] != mdiff.replacediffheader(
2834 if delta[:hlen] != mdiff.replacediffheader(
2834 oldlen, newlen
2835 oldlen, newlen
2835 ):
2836 ):
2836 raise error.CensoredBaseError(
2837 raise error.CensoredBaseError(
2837 self.display_id, self.node(baserev)
2838 self.display_id, self.node(baserev)
2838 )
2839 )
2839
2840
2840 if not flags and self._peek_iscensored(baserev, delta):
2841 if not flags and self._peek_iscensored(baserev, delta):
2841 flags |= REVIDX_ISCENSORED
2842 flags |= REVIDX_ISCENSORED
2842
2843
2843 # We assume consumers of addrevisioncb will want to retrieve
2844 # We assume consumers of addrevisioncb will want to retrieve
2844 # the added revision, which will require a call to
2845 # the added revision, which will require a call to
2845 # revision(). revision() will fast path if there is a cache
2846 # revision(). revision() will fast path if there is a cache
2846 # hit. So, we tell _addrevision() to always cache in this case.
2847 # hit. So, we tell _addrevision() to always cache in this case.
2847 # We're only using addgroup() in the context of changegroup
2848 # We're only using addgroup() in the context of changegroup
2848 # generation so the revision data can always be handled as raw
2849 # generation so the revision data can always be handled as raw
2849 # by the flagprocessor.
2850 # by the flagprocessor.
2850 rev = self._addrevision(
2851 rev = self._addrevision(
2851 node,
2852 node,
2852 None,
2853 None,
2853 transaction,
2854 transaction,
2854 link,
2855 link,
2855 p1,
2856 p1,
2856 p2,
2857 p2,
2857 flags,
2858 flags,
2858 (baserev, delta),
2859 (baserev, delta),
2859 alwayscache=alwayscache,
2860 alwayscache=alwayscache,
2860 deltacomputer=deltacomputer,
2861 deltacomputer=deltacomputer,
2861 sidedata=sidedata,
2862 sidedata=sidedata,
2862 )
2863 )
2863
2864
2864 if addrevisioncb:
2865 if addrevisioncb:
2865 addrevisioncb(self, rev)
2866 addrevisioncb(self, rev)
2866 empty = False
2867 empty = False
2867 finally:
2868 finally:
2868 self._adding_group = False
2869 self._adding_group = False
2869 return not empty
2870 return not empty
2870
2871
2871 def iscensored(self, rev):
2872 def iscensored(self, rev):
2872 """Check if a file revision is censored."""
2873 """Check if a file revision is censored."""
2873 if not self._censorable:
2874 if not self._censorable:
2874 return False
2875 return False
2875
2876
2876 return self.flags(rev) & REVIDX_ISCENSORED
2877 return self.flags(rev) & REVIDX_ISCENSORED
2877
2878
2878 def _peek_iscensored(self, baserev, delta):
2879 def _peek_iscensored(self, baserev, delta):
2879 """Quickly check if a delta produces a censored revision."""
2880 """Quickly check if a delta produces a censored revision."""
2880 if not self._censorable:
2881 if not self._censorable:
2881 return False
2882 return False
2882
2883
2883 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2884 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2884
2885
2885 def getstrippoint(self, minlink):
2886 def getstrippoint(self, minlink):
2886 """find the minimum rev that must be stripped to strip the linkrev
2887 """find the minimum rev that must be stripped to strip the linkrev
2887
2888
2888 Returns a tuple containing the minimum rev and a set of all revs that
2889 Returns a tuple containing the minimum rev and a set of all revs that
2889 have linkrevs that will be broken by this strip.
2890 have linkrevs that will be broken by this strip.
2890 """
2891 """
2891 return storageutil.resolvestripinfo(
2892 return storageutil.resolvestripinfo(
2892 minlink,
2893 minlink,
2893 len(self) - 1,
2894 len(self) - 1,
2894 self.headrevs(),
2895 self.headrevs(),
2895 self.linkrev,
2896 self.linkrev,
2896 self.parentrevs,
2897 self.parentrevs,
2897 )
2898 )
2898
2899
2899 def strip(self, minlink, transaction):
2900 def strip(self, minlink, transaction):
2900 """truncate the revlog on the first revision with a linkrev >= minlink
2901 """truncate the revlog on the first revision with a linkrev >= minlink
2901
2902
2902 This function is called when we're stripping revision minlink and
2903 This function is called when we're stripping revision minlink and
2903 its descendants from the repository.
2904 its descendants from the repository.
2904
2905
2905 We have to remove all revisions with linkrev >= minlink, because
2906 We have to remove all revisions with linkrev >= minlink, because
2906 the equivalent changelog revisions will be renumbered after the
2907 the equivalent changelog revisions will be renumbered after the
2907 strip.
2908 strip.
2908
2909
2909 So we truncate the revlog on the first of these revisions, and
2910 So we truncate the revlog on the first of these revisions, and
2910 trust that the caller has saved the revisions that shouldn't be
2911 trust that the caller has saved the revisions that shouldn't be
2911 removed and that it'll re-add them after this truncation.
2912 removed and that it'll re-add them after this truncation.
2912 """
2913 """
2913 if len(self) == 0:
2914 if len(self) == 0:
2914 return
2915 return
2915
2916
2916 rev, _ = self.getstrippoint(minlink)
2917 rev, _ = self.getstrippoint(minlink)
2917 if rev == len(self):
2918 if rev == len(self):
2918 return
2919 return
2919
2920
2920 # first truncate the files on disk
2921 # first truncate the files on disk
2921 data_end = self.start(rev)
2922 data_end = self.start(rev)
2922 if not self._inline:
2923 if not self._inline:
2923 transaction.add(self._datafile, data_end)
2924 transaction.add(self._datafile, data_end)
2924 end = rev * self.index.entry_size
2925 end = rev * self.index.entry_size
2925 else:
2926 else:
2926 end = data_end + (rev * self.index.entry_size)
2927 end = data_end + (rev * self.index.entry_size)
2927
2928
2928 if self._sidedatafile:
2929 if self._sidedatafile:
2929 sidedata_end = self.sidedata_cut_off(rev)
2930 sidedata_end = self.sidedata_cut_off(rev)
2930 transaction.add(self._sidedatafile, sidedata_end)
2931 transaction.add(self._sidedatafile, sidedata_end)
2931
2932
2932 transaction.add(self._indexfile, end)
2933 transaction.add(self._indexfile, end)
2933 if self._docket is not None:
2934 if self._docket is not None:
2934 # XXX we could, leverage the docket while stripping. However it is
2935 # XXX we could, leverage the docket while stripping. However it is
2935 # not powerfull enough at the time of this comment
2936 # not powerfull enough at the time of this comment
2936 self._docket.index_end = end
2937 self._docket.index_end = end
2937 self._docket.data_end = data_end
2938 self._docket.data_end = data_end
2938 self._docket.sidedata_end = sidedata_end
2939 self._docket.sidedata_end = sidedata_end
2939 self._docket.write(transaction, stripping=True)
2940 self._docket.write(transaction, stripping=True)
2940
2941
2941 # then reset internal state in memory to forget those revisions
2942 # then reset internal state in memory to forget those revisions
2942 self._revisioncache = None
2943 self._revisioncache = None
2943 self._chaininfocache = util.lrucachedict(500)
2944 self._chaininfocache = util.lrucachedict(500)
2944 self._chunkclear()
2945 self._chunkclear()
2945
2946
2946 del self.index[rev:-1]
2947 del self.index[rev:-1]
2947
2948
2948 def checksize(self):
2949 def checksize(self):
2949 """Check size of index and data files
2950 """Check size of index and data files
2950
2951
2951 return a (dd, di) tuple.
2952 return a (dd, di) tuple.
2952 - dd: extra bytes for the "data" file
2953 - dd: extra bytes for the "data" file
2953 - di: extra bytes for the "index" file
2954 - di: extra bytes for the "index" file
2954
2955
2955 A healthy revlog will return (0, 0).
2956 A healthy revlog will return (0, 0).
2956 """
2957 """
2957 expected = 0
2958 expected = 0
2958 if len(self):
2959 if len(self):
2959 expected = max(0, self.end(len(self) - 1))
2960 expected = max(0, self.end(len(self) - 1))
2960
2961
2961 try:
2962 try:
2962 with self._datafp() as f:
2963 with self._datafp() as f:
2963 f.seek(0, io.SEEK_END)
2964 f.seek(0, io.SEEK_END)
2964 actual = f.tell()
2965 actual = f.tell()
2965 dd = actual - expected
2966 dd = actual - expected
2966 except IOError as inst:
2967 except IOError as inst:
2967 if inst.errno != errno.ENOENT:
2968 if inst.errno != errno.ENOENT:
2968 raise
2969 raise
2969 dd = 0
2970 dd = 0
2970
2971
2971 try:
2972 try:
2972 f = self.opener(self._indexfile)
2973 f = self.opener(self._indexfile)
2973 f.seek(0, io.SEEK_END)
2974 f.seek(0, io.SEEK_END)
2974 actual = f.tell()
2975 actual = f.tell()
2975 f.close()
2976 f.close()
2976 s = self.index.entry_size
2977 s = self.index.entry_size
2977 i = max(0, actual // s)
2978 i = max(0, actual // s)
2978 di = actual - (i * s)
2979 di = actual - (i * s)
2979 if self._inline:
2980 if self._inline:
2980 databytes = 0
2981 databytes = 0
2981 for r in self:
2982 for r in self:
2982 databytes += max(0, self.length(r))
2983 databytes += max(0, self.length(r))
2983 dd = 0
2984 dd = 0
2984 di = actual - len(self) * s - databytes
2985 di = actual - len(self) * s - databytes
2985 except IOError as inst:
2986 except IOError as inst:
2986 if inst.errno != errno.ENOENT:
2987 if inst.errno != errno.ENOENT:
2987 raise
2988 raise
2988 di = 0
2989 di = 0
2989
2990
2990 return (dd, di)
2991 return (dd, di)
2991
2992
2992 def files(self):
2993 def files(self):
2993 res = [self._indexfile]
2994 res = [self._indexfile]
2994 if not self._inline:
2995 if not self._inline:
2995 res.append(self._datafile)
2996 res.append(self._datafile)
2996 return res
2997 return res
2997
2998
2998 def emitrevisions(
2999 def emitrevisions(
2999 self,
3000 self,
3000 nodes,
3001 nodes,
3001 nodesorder=None,
3002 nodesorder=None,
3002 revisiondata=False,
3003 revisiondata=False,
3003 assumehaveparentrevisions=False,
3004 assumehaveparentrevisions=False,
3004 deltamode=repository.CG_DELTAMODE_STD,
3005 deltamode=repository.CG_DELTAMODE_STD,
3005 sidedata_helpers=None,
3006 sidedata_helpers=None,
3006 ):
3007 ):
3007 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3008 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3008 raise error.ProgrammingError(
3009 raise error.ProgrammingError(
3009 b'unhandled value for nodesorder: %s' % nodesorder
3010 b'unhandled value for nodesorder: %s' % nodesorder
3010 )
3011 )
3011
3012
3012 if nodesorder is None and not self._generaldelta:
3013 if nodesorder is None and not self._generaldelta:
3013 nodesorder = b'storage'
3014 nodesorder = b'storage'
3014
3015
3015 if (
3016 if (
3016 not self._storedeltachains
3017 not self._storedeltachains
3017 and deltamode != repository.CG_DELTAMODE_PREV
3018 and deltamode != repository.CG_DELTAMODE_PREV
3018 ):
3019 ):
3019 deltamode = repository.CG_DELTAMODE_FULL
3020 deltamode = repository.CG_DELTAMODE_FULL
3020
3021
3021 return storageutil.emitrevisions(
3022 return storageutil.emitrevisions(
3022 self,
3023 self,
3023 nodes,
3024 nodes,
3024 nodesorder,
3025 nodesorder,
3025 revlogrevisiondelta,
3026 revlogrevisiondelta,
3026 deltaparentfn=self.deltaparent,
3027 deltaparentfn=self.deltaparent,
3027 candeltafn=self.candelta,
3028 candeltafn=self.candelta,
3028 rawsizefn=self.rawsize,
3029 rawsizefn=self.rawsize,
3029 revdifffn=self.revdiff,
3030 revdifffn=self.revdiff,
3030 flagsfn=self.flags,
3031 flagsfn=self.flags,
3031 deltamode=deltamode,
3032 deltamode=deltamode,
3032 revisiondata=revisiondata,
3033 revisiondata=revisiondata,
3033 assumehaveparentrevisions=assumehaveparentrevisions,
3034 assumehaveparentrevisions=assumehaveparentrevisions,
3034 sidedata_helpers=sidedata_helpers,
3035 sidedata_helpers=sidedata_helpers,
3035 )
3036 )
3036
3037
3037 DELTAREUSEALWAYS = b'always'
3038 DELTAREUSEALWAYS = b'always'
3038 DELTAREUSESAMEREVS = b'samerevs'
3039 DELTAREUSESAMEREVS = b'samerevs'
3039 DELTAREUSENEVER = b'never'
3040 DELTAREUSENEVER = b'never'
3040
3041
3041 DELTAREUSEFULLADD = b'fulladd'
3042 DELTAREUSEFULLADD = b'fulladd'
3042
3043
3043 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3044 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3044
3045
3045 def clone(
3046 def clone(
3046 self,
3047 self,
3047 tr,
3048 tr,
3048 destrevlog,
3049 destrevlog,
3049 addrevisioncb=None,
3050 addrevisioncb=None,
3050 deltareuse=DELTAREUSESAMEREVS,
3051 deltareuse=DELTAREUSESAMEREVS,
3051 forcedeltabothparents=None,
3052 forcedeltabothparents=None,
3052 sidedata_helpers=None,
3053 sidedata_helpers=None,
3053 ):
3054 ):
3054 """Copy this revlog to another, possibly with format changes.
3055 """Copy this revlog to another, possibly with format changes.
3055
3056
3056 The destination revlog will contain the same revisions and nodes.
3057 The destination revlog will contain the same revisions and nodes.
3057 However, it may not be bit-for-bit identical due to e.g. delta encoding
3058 However, it may not be bit-for-bit identical due to e.g. delta encoding
3058 differences.
3059 differences.
3059
3060
3060 The ``deltareuse`` argument control how deltas from the existing revlog
3061 The ``deltareuse`` argument control how deltas from the existing revlog
3061 are preserved in the destination revlog. The argument can have the
3062 are preserved in the destination revlog. The argument can have the
3062 following values:
3063 following values:
3063
3064
3064 DELTAREUSEALWAYS
3065 DELTAREUSEALWAYS
3065 Deltas will always be reused (if possible), even if the destination
3066 Deltas will always be reused (if possible), even if the destination
3066 revlog would not select the same revisions for the delta. This is the
3067 revlog would not select the same revisions for the delta. This is the
3067 fastest mode of operation.
3068 fastest mode of operation.
3068 DELTAREUSESAMEREVS
3069 DELTAREUSESAMEREVS
3069 Deltas will be reused if the destination revlog would pick the same
3070 Deltas will be reused if the destination revlog would pick the same
3070 revisions for the delta. This mode strikes a balance between speed
3071 revisions for the delta. This mode strikes a balance between speed
3071 and optimization.
3072 and optimization.
3072 DELTAREUSENEVER
3073 DELTAREUSENEVER
3073 Deltas will never be reused. This is the slowest mode of execution.
3074 Deltas will never be reused. This is the slowest mode of execution.
3074 This mode can be used to recompute deltas (e.g. if the diff/delta
3075 This mode can be used to recompute deltas (e.g. if the diff/delta
3075 algorithm changes).
3076 algorithm changes).
3076 DELTAREUSEFULLADD
3077 DELTAREUSEFULLADD
3077 Revision will be re-added as if their were new content. This is
3078 Revision will be re-added as if their were new content. This is
3078 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3079 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3079 eg: large file detection and handling.
3080 eg: large file detection and handling.
3080
3081
3081 Delta computation can be slow, so the choice of delta reuse policy can
3082 Delta computation can be slow, so the choice of delta reuse policy can
3082 significantly affect run time.
3083 significantly affect run time.
3083
3084
3084 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3085 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3085 two extremes. Deltas will be reused if they are appropriate. But if the
3086 two extremes. Deltas will be reused if they are appropriate. But if the
3086 delta could choose a better revision, it will do so. This means if you
3087 delta could choose a better revision, it will do so. This means if you
3087 are converting a non-generaldelta revlog to a generaldelta revlog,
3088 are converting a non-generaldelta revlog to a generaldelta revlog,
3088 deltas will be recomputed if the delta's parent isn't a parent of the
3089 deltas will be recomputed if the delta's parent isn't a parent of the
3089 revision.
3090 revision.
3090
3091
3091 In addition to the delta policy, the ``forcedeltabothparents``
3092 In addition to the delta policy, the ``forcedeltabothparents``
3092 argument controls whether to force compute deltas against both parents
3093 argument controls whether to force compute deltas against both parents
3093 for merges. By default, the current default is used.
3094 for merges. By default, the current default is used.
3094
3095
3095 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3096 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3096 `sidedata_helpers`.
3097 `sidedata_helpers`.
3097 """
3098 """
3098 if deltareuse not in self.DELTAREUSEALL:
3099 if deltareuse not in self.DELTAREUSEALL:
3099 raise ValueError(
3100 raise ValueError(
3100 _(b'value for deltareuse invalid: %s') % deltareuse
3101 _(b'value for deltareuse invalid: %s') % deltareuse
3101 )
3102 )
3102
3103
3103 if len(destrevlog):
3104 if len(destrevlog):
3104 raise ValueError(_(b'destination revlog is not empty'))
3105 raise ValueError(_(b'destination revlog is not empty'))
3105
3106
3106 if getattr(self, 'filteredrevs', None):
3107 if getattr(self, 'filteredrevs', None):
3107 raise ValueError(_(b'source revlog has filtered revisions'))
3108 raise ValueError(_(b'source revlog has filtered revisions'))
3108 if getattr(destrevlog, 'filteredrevs', None):
3109 if getattr(destrevlog, 'filteredrevs', None):
3109 raise ValueError(_(b'destination revlog has filtered revisions'))
3110 raise ValueError(_(b'destination revlog has filtered revisions'))
3110
3111
3111 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3112 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3112 # if possible.
3113 # if possible.
3113 oldlazydelta = destrevlog._lazydelta
3114 oldlazydelta = destrevlog._lazydelta
3114 oldlazydeltabase = destrevlog._lazydeltabase
3115 oldlazydeltabase = destrevlog._lazydeltabase
3115 oldamd = destrevlog._deltabothparents
3116 oldamd = destrevlog._deltabothparents
3116
3117
3117 try:
3118 try:
3118 if deltareuse == self.DELTAREUSEALWAYS:
3119 if deltareuse == self.DELTAREUSEALWAYS:
3119 destrevlog._lazydeltabase = True
3120 destrevlog._lazydeltabase = True
3120 destrevlog._lazydelta = True
3121 destrevlog._lazydelta = True
3121 elif deltareuse == self.DELTAREUSESAMEREVS:
3122 elif deltareuse == self.DELTAREUSESAMEREVS:
3122 destrevlog._lazydeltabase = False
3123 destrevlog._lazydeltabase = False
3123 destrevlog._lazydelta = True
3124 destrevlog._lazydelta = True
3124 elif deltareuse == self.DELTAREUSENEVER:
3125 elif deltareuse == self.DELTAREUSENEVER:
3125 destrevlog._lazydeltabase = False
3126 destrevlog._lazydeltabase = False
3126 destrevlog._lazydelta = False
3127 destrevlog._lazydelta = False
3127
3128
3128 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3129 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3129
3130
3130 self._clone(
3131 self._clone(
3131 tr,
3132 tr,
3132 destrevlog,
3133 destrevlog,
3133 addrevisioncb,
3134 addrevisioncb,
3134 deltareuse,
3135 deltareuse,
3135 forcedeltabothparents,
3136 forcedeltabothparents,
3136 sidedata_helpers,
3137 sidedata_helpers,
3137 )
3138 )
3138
3139
3139 finally:
3140 finally:
3140 destrevlog._lazydelta = oldlazydelta
3141 destrevlog._lazydelta = oldlazydelta
3141 destrevlog._lazydeltabase = oldlazydeltabase
3142 destrevlog._lazydeltabase = oldlazydeltabase
3142 destrevlog._deltabothparents = oldamd
3143 destrevlog._deltabothparents = oldamd
3143
3144
3144 def _clone(
3145 def _clone(
3145 self,
3146 self,
3146 tr,
3147 tr,
3147 destrevlog,
3148 destrevlog,
3148 addrevisioncb,
3149 addrevisioncb,
3149 deltareuse,
3150 deltareuse,
3150 forcedeltabothparents,
3151 forcedeltabothparents,
3151 sidedata_helpers,
3152 sidedata_helpers,
3152 ):
3153 ):
3153 """perform the core duty of `revlog.clone` after parameter processing"""
3154 """perform the core duty of `revlog.clone` after parameter processing"""
3154 deltacomputer = deltautil.deltacomputer(destrevlog)
3155 deltacomputer = deltautil.deltacomputer(destrevlog)
3155 index = self.index
3156 index = self.index
3156 for rev in self:
3157 for rev in self:
3157 entry = index[rev]
3158 entry = index[rev]
3158
3159
3159 # Some classes override linkrev to take filtered revs into
3160 # Some classes override linkrev to take filtered revs into
3160 # account. Use raw entry from index.
3161 # account. Use raw entry from index.
3161 flags = entry[0] & 0xFFFF
3162 flags = entry[0] & 0xFFFF
3162 linkrev = entry[4]
3163 linkrev = entry[4]
3163 p1 = index[entry[5]][7]
3164 p1 = index[entry[5]][7]
3164 p2 = index[entry[6]][7]
3165 p2 = index[entry[6]][7]
3165 node = entry[7]
3166 node = entry[7]
3166
3167
3167 # (Possibly) reuse the delta from the revlog if allowed and
3168 # (Possibly) reuse the delta from the revlog if allowed and
3168 # the revlog chunk is a delta.
3169 # the revlog chunk is a delta.
3169 cachedelta = None
3170 cachedelta = None
3170 rawtext = None
3171 rawtext = None
3171 if deltareuse == self.DELTAREUSEFULLADD:
3172 if deltareuse == self.DELTAREUSEFULLADD:
3172 text = self._revisiondata(rev)
3173 text = self._revisiondata(rev)
3173 sidedata = self.sidedata(rev)
3174 sidedata = self.sidedata(rev)
3174
3175
3175 if sidedata_helpers is not None:
3176 if sidedata_helpers is not None:
3176 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3177 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3177 self, sidedata_helpers, sidedata, rev
3178 self, sidedata_helpers, sidedata, rev
3178 )
3179 )
3179 flags = flags | new_flags[0] & ~new_flags[1]
3180 flags = flags | new_flags[0] & ~new_flags[1]
3180
3181
3181 destrevlog.addrevision(
3182 destrevlog.addrevision(
3182 text,
3183 text,
3183 tr,
3184 tr,
3184 linkrev,
3185 linkrev,
3185 p1,
3186 p1,
3186 p2,
3187 p2,
3187 cachedelta=cachedelta,
3188 cachedelta=cachedelta,
3188 node=node,
3189 node=node,
3189 flags=flags,
3190 flags=flags,
3190 deltacomputer=deltacomputer,
3191 deltacomputer=deltacomputer,
3191 sidedata=sidedata,
3192 sidedata=sidedata,
3192 )
3193 )
3193 else:
3194 else:
3194 if destrevlog._lazydelta:
3195 if destrevlog._lazydelta:
3195 dp = self.deltaparent(rev)
3196 dp = self.deltaparent(rev)
3196 if dp != nullrev:
3197 if dp != nullrev:
3197 cachedelta = (dp, bytes(self._chunk(rev)))
3198 cachedelta = (dp, bytes(self._chunk(rev)))
3198
3199
3199 sidedata = None
3200 sidedata = None
3200 if not cachedelta:
3201 if not cachedelta:
3201 rawtext = self._revisiondata(rev)
3202 rawtext = self._revisiondata(rev)
3202 sidedata = self.sidedata(rev)
3203 sidedata = self.sidedata(rev)
3203 if sidedata is None:
3204 if sidedata is None:
3204 sidedata = self.sidedata(rev)
3205 sidedata = self.sidedata(rev)
3205
3206
3206 if sidedata_helpers is not None:
3207 if sidedata_helpers is not None:
3207 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3208 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3208 self, sidedata_helpers, sidedata, rev
3209 self, sidedata_helpers, sidedata, rev
3209 )
3210 )
3210 flags = flags | new_flags[0] & ~new_flags[1]
3211 flags = flags | new_flags[0] & ~new_flags[1]
3211
3212
3212 with destrevlog._writing(tr):
3213 with destrevlog._writing(tr):
3213 destrevlog._addrevision(
3214 destrevlog._addrevision(
3214 node,
3215 node,
3215 rawtext,
3216 rawtext,
3216 tr,
3217 tr,
3217 linkrev,
3218 linkrev,
3218 p1,
3219 p1,
3219 p2,
3220 p2,
3220 flags,
3221 flags,
3221 cachedelta,
3222 cachedelta,
3222 deltacomputer=deltacomputer,
3223 deltacomputer=deltacomputer,
3223 sidedata=sidedata,
3224 sidedata=sidedata,
3224 )
3225 )
3225
3226
3226 if addrevisioncb:
3227 if addrevisioncb:
3227 addrevisioncb(self, rev, node)
3228 addrevisioncb(self, rev, node)
3228
3229
3229 def censorrevision(self, tr, censornode, tombstone=b''):
3230 def censorrevision(self, tr, censornode, tombstone=b''):
3230 if self._format_version == REVLOGV0:
3231 if self._format_version == REVLOGV0:
3231 raise error.RevlogError(
3232 raise error.RevlogError(
3232 _(b'cannot censor with version %d revlogs')
3233 _(b'cannot censor with version %d revlogs')
3233 % self._format_version
3234 % self._format_version
3234 )
3235 )
3235
3236 elif self._format_version == REVLOGV1:
3236 censorrev = self.rev(censornode)
3237 censor.v1_censor(self, tr, censornode, tombstone)
3237 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3238 else:
3238
3239 # revlog v2
3239 if len(tombstone) > self.rawsize(censorrev):
3240 raise error.RevlogError(
3240 raise error.Abort(
3241 _(b'cannot censor with version %d revlogs')
3241 _(b'censor tombstone must be no longer than censored data')
3242 % self._format_version
3242 )
3243 )
3243
3244
3244 # Rewriting the revlog in place is hard. Our strategy for censoring is
3245 # to create a new revlog, copy all revisions to it, then replace the
3246 # revlogs on transaction close.
3247 #
3248 # This is a bit dangerous. We could easily have a mismatch of state.
3249 newrl = revlog(
3250 self.opener,
3251 target=self.target,
3252 radix=self.radix,
3253 postfix=b'tmpcensored',
3254 censorable=True,
3255 )
3256 newrl._format_version = self._format_version
3257 newrl._format_flags = self._format_flags
3258 newrl._generaldelta = self._generaldelta
3259 newrl._parse_index = self._parse_index
3260
3261 for rev in self.revs():
3262 node = self.node(rev)
3263 p1, p2 = self.parents(node)
3264
3265 if rev == censorrev:
3266 newrl.addrawrevision(
3267 tombstone,
3268 tr,
3269 self.linkrev(censorrev),
3270 p1,
3271 p2,
3272 censornode,
3273 REVIDX_ISCENSORED,
3274 )
3275
3276 if newrl.deltaparent(rev) != nullrev:
3277 raise error.Abort(
3278 _(
3279 b'censored revision stored as delta; '
3280 b'cannot censor'
3281 ),
3282 hint=_(
3283 b'censoring of revlogs is not '
3284 b'fully implemented; please report '
3285 b'this bug'
3286 ),
3287 )
3288 continue
3289
3290 if self.iscensored(rev):
3291 if self.deltaparent(rev) != nullrev:
3292 raise error.Abort(
3293 _(
3294 b'cannot censor due to censored '
3295 b'revision having delta stored'
3296 )
3297 )
3298 rawtext = self._chunk(rev)
3299 else:
3300 rawtext = self.rawdata(rev)
3301
3302 newrl.addrawrevision(
3303 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3304 )
3305
3306 tr.addbackup(self._indexfile, location=b'store')
3307 if not self._inline:
3308 tr.addbackup(self._datafile, location=b'store')
3309
3310 self.opener.rename(newrl._indexfile, self._indexfile)
3311 if not self._inline:
3312 self.opener.rename(newrl._datafile, self._datafile)
3313
3314 self.clearcaches()
3315 self._loadindex()
3316
3317 def verifyintegrity(self, state):
3245 def verifyintegrity(self, state):
3318 """Verifies the integrity of the revlog.
3246 """Verifies the integrity of the revlog.
3319
3247
3320 Yields ``revlogproblem`` instances describing problems that are
3248 Yields ``revlogproblem`` instances describing problems that are
3321 found.
3249 found.
3322 """
3250 """
3323 dd, di = self.checksize()
3251 dd, di = self.checksize()
3324 if dd:
3252 if dd:
3325 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3253 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3326 if di:
3254 if di:
3327 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3255 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3328
3256
3329 version = self._format_version
3257 version = self._format_version
3330
3258
3331 # The verifier tells us what version revlog we should be.
3259 # The verifier tells us what version revlog we should be.
3332 if version != state[b'expectedversion']:
3260 if version != state[b'expectedversion']:
3333 yield revlogproblem(
3261 yield revlogproblem(
3334 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3262 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3335 % (self.display_id, version, state[b'expectedversion'])
3263 % (self.display_id, version, state[b'expectedversion'])
3336 )
3264 )
3337
3265
3338 state[b'skipread'] = set()
3266 state[b'skipread'] = set()
3339 state[b'safe_renamed'] = set()
3267 state[b'safe_renamed'] = set()
3340
3268
3341 for rev in self:
3269 for rev in self:
3342 node = self.node(rev)
3270 node = self.node(rev)
3343
3271
3344 # Verify contents. 4 cases to care about:
3272 # Verify contents. 4 cases to care about:
3345 #
3273 #
3346 # common: the most common case
3274 # common: the most common case
3347 # rename: with a rename
3275 # rename: with a rename
3348 # meta: file content starts with b'\1\n', the metadata
3276 # meta: file content starts with b'\1\n', the metadata
3349 # header defined in filelog.py, but without a rename
3277 # header defined in filelog.py, but without a rename
3350 # ext: content stored externally
3278 # ext: content stored externally
3351 #
3279 #
3352 # More formally, their differences are shown below:
3280 # More formally, their differences are shown below:
3353 #
3281 #
3354 # | common | rename | meta | ext
3282 # | common | rename | meta | ext
3355 # -------------------------------------------------------
3283 # -------------------------------------------------------
3356 # flags() | 0 | 0 | 0 | not 0
3284 # flags() | 0 | 0 | 0 | not 0
3357 # renamed() | False | True | False | ?
3285 # renamed() | False | True | False | ?
3358 # rawtext[0:2]=='\1\n'| False | True | True | ?
3286 # rawtext[0:2]=='\1\n'| False | True | True | ?
3359 #
3287 #
3360 # "rawtext" means the raw text stored in revlog data, which
3288 # "rawtext" means the raw text stored in revlog data, which
3361 # could be retrieved by "rawdata(rev)". "text"
3289 # could be retrieved by "rawdata(rev)". "text"
3362 # mentioned below is "revision(rev)".
3290 # mentioned below is "revision(rev)".
3363 #
3291 #
3364 # There are 3 different lengths stored physically:
3292 # There are 3 different lengths stored physically:
3365 # 1. L1: rawsize, stored in revlog index
3293 # 1. L1: rawsize, stored in revlog index
3366 # 2. L2: len(rawtext), stored in revlog data
3294 # 2. L2: len(rawtext), stored in revlog data
3367 # 3. L3: len(text), stored in revlog data if flags==0, or
3295 # 3. L3: len(text), stored in revlog data if flags==0, or
3368 # possibly somewhere else if flags!=0
3296 # possibly somewhere else if flags!=0
3369 #
3297 #
3370 # L1 should be equal to L2. L3 could be different from them.
3298 # L1 should be equal to L2. L3 could be different from them.
3371 # "text" may or may not affect commit hash depending on flag
3299 # "text" may or may not affect commit hash depending on flag
3372 # processors (see flagutil.addflagprocessor).
3300 # processors (see flagutil.addflagprocessor).
3373 #
3301 #
3374 # | common | rename | meta | ext
3302 # | common | rename | meta | ext
3375 # -------------------------------------------------
3303 # -------------------------------------------------
3376 # rawsize() | L1 | L1 | L1 | L1
3304 # rawsize() | L1 | L1 | L1 | L1
3377 # size() | L1 | L2-LM | L1(*) | L1 (?)
3305 # size() | L1 | L2-LM | L1(*) | L1 (?)
3378 # len(rawtext) | L2 | L2 | L2 | L2
3306 # len(rawtext) | L2 | L2 | L2 | L2
3379 # len(text) | L2 | L2 | L2 | L3
3307 # len(text) | L2 | L2 | L2 | L3
3380 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3308 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3381 #
3309 #
3382 # LM: length of metadata, depending on rawtext
3310 # LM: length of metadata, depending on rawtext
3383 # (*): not ideal, see comment in filelog.size
3311 # (*): not ideal, see comment in filelog.size
3384 # (?): could be "- len(meta)" if the resolved content has
3312 # (?): could be "- len(meta)" if the resolved content has
3385 # rename metadata
3313 # rename metadata
3386 #
3314 #
3387 # Checks needed to be done:
3315 # Checks needed to be done:
3388 # 1. length check: L1 == L2, in all cases.
3316 # 1. length check: L1 == L2, in all cases.
3389 # 2. hash check: depending on flag processor, we may need to
3317 # 2. hash check: depending on flag processor, we may need to
3390 # use either "text" (external), or "rawtext" (in revlog).
3318 # use either "text" (external), or "rawtext" (in revlog).
3391
3319
3392 try:
3320 try:
3393 skipflags = state.get(b'skipflags', 0)
3321 skipflags = state.get(b'skipflags', 0)
3394 if skipflags:
3322 if skipflags:
3395 skipflags &= self.flags(rev)
3323 skipflags &= self.flags(rev)
3396
3324
3397 _verify_revision(self, skipflags, state, node)
3325 _verify_revision(self, skipflags, state, node)
3398
3326
3399 l1 = self.rawsize(rev)
3327 l1 = self.rawsize(rev)
3400 l2 = len(self.rawdata(node))
3328 l2 = len(self.rawdata(node))
3401
3329
3402 if l1 != l2:
3330 if l1 != l2:
3403 yield revlogproblem(
3331 yield revlogproblem(
3404 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3332 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3405 node=node,
3333 node=node,
3406 )
3334 )
3407
3335
3408 except error.CensoredNodeError:
3336 except error.CensoredNodeError:
3409 if state[b'erroroncensored']:
3337 if state[b'erroroncensored']:
3410 yield revlogproblem(
3338 yield revlogproblem(
3411 error=_(b'censored file data'), node=node
3339 error=_(b'censored file data'), node=node
3412 )
3340 )
3413 state[b'skipread'].add(node)
3341 state[b'skipread'].add(node)
3414 except Exception as e:
3342 except Exception as e:
3415 yield revlogproblem(
3343 yield revlogproblem(
3416 error=_(b'unpacking %s: %s')
3344 error=_(b'unpacking %s: %s')
3417 % (short(node), stringutil.forcebytestr(e)),
3345 % (short(node), stringutil.forcebytestr(e)),
3418 node=node,
3346 node=node,
3419 )
3347 )
3420 state[b'skipread'].add(node)
3348 state[b'skipread'].add(node)
3421
3349
3422 def storageinfo(
3350 def storageinfo(
3423 self,
3351 self,
3424 exclusivefiles=False,
3352 exclusivefiles=False,
3425 sharedfiles=False,
3353 sharedfiles=False,
3426 revisionscount=False,
3354 revisionscount=False,
3427 trackedsize=False,
3355 trackedsize=False,
3428 storedsize=False,
3356 storedsize=False,
3429 ):
3357 ):
3430 d = {}
3358 d = {}
3431
3359
3432 if exclusivefiles:
3360 if exclusivefiles:
3433 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3361 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3434 if not self._inline:
3362 if not self._inline:
3435 d[b'exclusivefiles'].append((self.opener, self._datafile))
3363 d[b'exclusivefiles'].append((self.opener, self._datafile))
3436
3364
3437 if sharedfiles:
3365 if sharedfiles:
3438 d[b'sharedfiles'] = []
3366 d[b'sharedfiles'] = []
3439
3367
3440 if revisionscount:
3368 if revisionscount:
3441 d[b'revisionscount'] = len(self)
3369 d[b'revisionscount'] = len(self)
3442
3370
3443 if trackedsize:
3371 if trackedsize:
3444 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3372 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3445
3373
3446 if storedsize:
3374 if storedsize:
3447 d[b'storedsize'] = sum(
3375 d[b'storedsize'] = sum(
3448 self.opener.stat(path).st_size for path in self.files()
3376 self.opener.stat(path).st_size for path in self.files()
3449 )
3377 )
3450
3378
3451 return d
3379 return d
3452
3380
3453 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3381 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3454 if not self.hassidedata:
3382 if not self.hassidedata:
3455 return
3383 return
3456 # revlog formats with sidedata support does not support inline
3384 # revlog formats with sidedata support does not support inline
3457 assert not self._inline
3385 assert not self._inline
3458 if not helpers[1] and not helpers[2]:
3386 if not helpers[1] and not helpers[2]:
3459 # Nothing to generate or remove
3387 # Nothing to generate or remove
3460 return
3388 return
3461
3389
3462 new_entries = []
3390 new_entries = []
3463 # append the new sidedata
3391 # append the new sidedata
3464 with self._writing(transaction):
3392 with self._writing(transaction):
3465 ifh, dfh, sdfh = self._writinghandles
3393 ifh, dfh, sdfh = self._writinghandles
3466 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3394 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3467
3395
3468 current_offset = sdfh.tell()
3396 current_offset = sdfh.tell()
3469 for rev in range(startrev, endrev + 1):
3397 for rev in range(startrev, endrev + 1):
3470 entry = self.index[rev]
3398 entry = self.index[rev]
3471 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3399 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3472 store=self,
3400 store=self,
3473 sidedata_helpers=helpers,
3401 sidedata_helpers=helpers,
3474 sidedata={},
3402 sidedata={},
3475 rev=rev,
3403 rev=rev,
3476 )
3404 )
3477
3405
3478 serialized_sidedata = sidedatautil.serialize_sidedata(
3406 serialized_sidedata = sidedatautil.serialize_sidedata(
3479 new_sidedata
3407 new_sidedata
3480 )
3408 )
3481
3409
3482 sidedata_compression_mode = COMP_MODE_INLINE
3410 sidedata_compression_mode = COMP_MODE_INLINE
3483 if serialized_sidedata and self.hassidedata:
3411 if serialized_sidedata and self.hassidedata:
3484 sidedata_compression_mode = COMP_MODE_PLAIN
3412 sidedata_compression_mode = COMP_MODE_PLAIN
3485 h, comp_sidedata = self.compress(serialized_sidedata)
3413 h, comp_sidedata = self.compress(serialized_sidedata)
3486 if (
3414 if (
3487 h != b'u'
3415 h != b'u'
3488 and comp_sidedata[0] != b'\0'
3416 and comp_sidedata[0] != b'\0'
3489 and len(comp_sidedata) < len(serialized_sidedata)
3417 and len(comp_sidedata) < len(serialized_sidedata)
3490 ):
3418 ):
3491 assert not h
3419 assert not h
3492 if (
3420 if (
3493 comp_sidedata[0]
3421 comp_sidedata[0]
3494 == self._docket.default_compression_header
3422 == self._docket.default_compression_header
3495 ):
3423 ):
3496 sidedata_compression_mode = COMP_MODE_DEFAULT
3424 sidedata_compression_mode = COMP_MODE_DEFAULT
3497 serialized_sidedata = comp_sidedata
3425 serialized_sidedata = comp_sidedata
3498 else:
3426 else:
3499 sidedata_compression_mode = COMP_MODE_INLINE
3427 sidedata_compression_mode = COMP_MODE_INLINE
3500 serialized_sidedata = comp_sidedata
3428 serialized_sidedata = comp_sidedata
3501 if entry[8] != 0 or entry[9] != 0:
3429 if entry[8] != 0 or entry[9] != 0:
3502 # rewriting entries that already have sidedata is not
3430 # rewriting entries that already have sidedata is not
3503 # supported yet, because it introduces garbage data in the
3431 # supported yet, because it introduces garbage data in the
3504 # revlog.
3432 # revlog.
3505 msg = b"rewriting existing sidedata is not supported yet"
3433 msg = b"rewriting existing sidedata is not supported yet"
3506 raise error.Abort(msg)
3434 raise error.Abort(msg)
3507
3435
3508 # Apply (potential) flags to add and to remove after running
3436 # Apply (potential) flags to add and to remove after running
3509 # the sidedata helpers
3437 # the sidedata helpers
3510 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3438 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3511 entry_update = (
3439 entry_update = (
3512 current_offset,
3440 current_offset,
3513 len(serialized_sidedata),
3441 len(serialized_sidedata),
3514 new_offset_flags,
3442 new_offset_flags,
3515 sidedata_compression_mode,
3443 sidedata_compression_mode,
3516 )
3444 )
3517
3445
3518 # the sidedata computation might have move the file cursors around
3446 # the sidedata computation might have move the file cursors around
3519 sdfh.seek(current_offset, os.SEEK_SET)
3447 sdfh.seek(current_offset, os.SEEK_SET)
3520 sdfh.write(serialized_sidedata)
3448 sdfh.write(serialized_sidedata)
3521 new_entries.append(entry_update)
3449 new_entries.append(entry_update)
3522 current_offset += len(serialized_sidedata)
3450 current_offset += len(serialized_sidedata)
3523 self._docket.sidedata_end = sdfh.tell()
3451 self._docket.sidedata_end = sdfh.tell()
3524
3452
3525 # rewrite the new index entries
3453 # rewrite the new index entries
3526 ifh.seek(startrev * self.index.entry_size)
3454 ifh.seek(startrev * self.index.entry_size)
3527 for i, e in enumerate(new_entries):
3455 for i, e in enumerate(new_entries):
3528 rev = startrev + i
3456 rev = startrev + i
3529 self.index.replace_sidedata_info(rev, *e)
3457 self.index.replace_sidedata_info(rev, *e)
3530 packed = self.index.entry_binary(rev)
3458 packed = self.index.entry_binary(rev)
3531 if rev == 0 and self._docket is None:
3459 if rev == 0 and self._docket is None:
3532 header = self._format_flags | self._format_version
3460 header = self._format_flags | self._format_version
3533 header = self.index.pack_header(header)
3461 header = self.index.pack_header(header)
3534 packed = header + packed
3462 packed = header + packed
3535 ifh.write(packed)
3463 ifh.write(packed)
This diff has been collapsed as it changes many lines, (3583 lines changed) Show them Hide them
@@ -1,3535 +1,102 b''
1 # revlog.py - storage back-end for mercurial
1 # censor code related to censoring revision
2 # coding: utf8
3 #
2 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 from ..node import (
10
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
13 """
14
15 from __future__ import absolute_import
16
17 import binascii
18 import collections
19 import contextlib
20 import errno
21 import io
22 import os
23 import struct
24 import zlib
25
26 # import stuff from node for others to import from revlog
27 from .node import (
28 bin,
29 hex,
30 nullrev,
10 nullrev,
31 sha1nodeconstants,
32 short,
33 wdirrev,
34 )
35 from .i18n import _
36 from .pycompat import getattr
37 from .revlogutils.constants import (
38 ALL_KINDS,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
48 REVLOGV0,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
57 )
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
67 from .thirdparty import attr
68 from . import (
69 ancestor,
70 dagop,
71 error,
72 mdiff,
73 policy,
74 pycompat,
75 templatefilters,
76 util,
77 )
78 from .interfaces import (
79 repository,
80 util as interfaceutil,
81 )
82 from .revlogutils import (
83 deltas as deltautil,
84 docket as docketutil,
85 flagutil,
86 nodemap as nodemaputil,
87 revlogv0,
88 sidedata as sidedatautil,
89 )
90 from .utils import (
91 storageutil,
92 stringutil,
93 )
94
95 # blanked usage of all the name to prevent pyflakes constraints
96 # We need these name available in the module for extensions.
97
98 REVLOGV0
99 REVLOGV1
100 REVLOGV2
101 FLAG_INLINE_DATA
102 FLAG_GENERALDELTA
103 REVLOG_DEFAULT_FLAGS
104 REVLOG_DEFAULT_FORMAT
105 REVLOG_DEFAULT_VERSION
106 REVLOGV1_FLAGS
107 REVLOGV2_FLAGS
108 REVIDX_ISCENSORED
109 REVIDX_ELLIPSIS
110 REVIDX_HASCOPIESINFO
111 REVIDX_EXTSTORED
112 REVIDX_DEFAULT_FLAGS
113 REVIDX_FLAGS_ORDER
114 REVIDX_RAWTEXT_CHANGING_FLAGS
115
116 parsers = policy.importmod('parsers')
117 rustancestor = policy.importrust('ancestor')
118 rustdagop = policy.importrust('dagop')
119 rustrevlog = policy.importrust('revlog')
120
121 # Aliased for performance.
122 _zlibdecompress = zlib.decompress
123
124 # max size of revlog with inline data
125 _maxinline = 131072
126 _chunksize = 1048576
127
128 # Flag processors for REVIDX_ELLIPSIS.
129 def ellipsisreadprocessor(rl, text):
130 return text, False
131
132
133 def ellipsiswriteprocessor(rl, text):
134 return text, False
135
136
137 def ellipsisrawprocessor(rl, text):
138 return False
139
140
141 ellipsisprocessor = (
142 ellipsisreadprocessor,
143 ellipsiswriteprocessor,
144 ellipsisrawprocessor,
145 )
11 )
146
12 from ..i18n import _
147
13 from .. import (
148 def offset_type(offset, type):
14 error,
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 raise ValueError(b'unknown revlog index flags')
151 return int(int(offset) << 16 | type)
152
153
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
157 if skipflags:
158 state[b'skipread'].add(node)
159 else:
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
162
163
164 # True if a fast implementation for persistent-nodemap is available
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
171 )
15 )
172
16 from ..utils import (
173
17 storageutil,
174 @attr.s(slots=True, frozen=True)
175 class _revisioninfo(object):
176 """Information about a revision that allows building its fulltext
177 node: expected hash of the revision
178 p1, p2: parent revs of the revision
179 btext: built text cache consisting of a one-element list
180 cachedelta: (baserev, uncompressed_delta) or None
181 flags: flags associated to the revision storage
182
183 One of btext[0] or cachedelta must be set.
184 """
185
186 node = attr.ib()
187 p1 = attr.ib()
188 p2 = attr.ib()
189 btext = attr.ib()
190 textlen = attr.ib()
191 cachedelta = attr.ib()
192 flags = attr.ib()
193
194
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
198 node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
206 sidedata = attr.ib()
207 protocol_flags = attr.ib()
208 linknode = attr.ib(default=None)
209
210
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
217
218
219 def parse_index_v1(data, inline):
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline)
222 return index, cache
223
224
225 def parse_index_v2(data, inline):
226 # call the C implementation to parse the index data
227 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
228 return index, cache
229
230
231 def parse_index_cl_v2(data, inline):
232 # call the C implementation to parse the index data
233 assert not inline
234 from .pure.parsers import parse_index_cl_v2
235
236 index, cache = parse_index_cl_v2(data)
237 return index, cache
238
239
240 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
241
242 def parse_index_v1_nodemap(data, inline):
243 index, cache = parsers.parse_index_devel_nodemap(data, inline)
244 return index, cache
245
246
247 else:
248 parse_index_v1_nodemap = None
249
250
251 def parse_index_v1_mixed(data, inline):
252 index, cache = parse_index_v1(data, inline)
253 return rustrevlog.MixedIndex(index), cache
254
255
256 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
257 # signed integer)
258 _maxentrysize = 0x7FFFFFFF
259
260 PARTIAL_READ_MSG = _(
261 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
262 )
18 )
263
19 from . import constants
264 FILE_TOO_SHORT_MSG = _(
265 b'cannot read from revlog %s;'
266 b' expected %d bytes from offset %d, data size is %d'
267 )
268
269
270 class revlog(object):
271 """
272 the underlying revision storage object
273
274 A revlog consists of two parts, an index and the revision data.
275
276 The index is a file with a fixed record size containing
277 information on each revision, including its nodeid (hash), the
278 nodeids of its parents, the position and offset of its data within
279 the data file, and the revision it's based on. Finally, each entry
280 contains a linkrev entry that can serve as a pointer to external
281 data.
282
283 The revision data itself is a linear collection of data chunks.
284 Each chunk represents a revision and is usually represented as a
285 delta against the previous chunk. To bound lookup time, runs of
286 deltas are limited to about 2 times the length of the original
287 version data. This makes retrieval of a version proportional to
288 its size, or O(1) relative to the number of revisions.
289
290 Both pieces of the revlog are written to in an append-only
291 fashion, which means we never need to rewrite a file to insert or
292 remove data, and can use some simple techniques to avoid the need
293 for locking while reading.
294
295 If checkambig, indexfile is opened with checkambig=True at
296 writing, to avoid file stat ambiguity.
297
298 If mmaplargeindex is True, and an mmapindexthreshold is set, the
299 index will be mmapped rather than read if it is larger than the
300 configured threshold.
301
302 If censorable is True, the revlog can have censored revisions.
303
304 If `upperboundcomp` is not None, this is the expected maximal gain from
305 compression for the data content.
306
307 `concurrencychecker` is an optional function that receives 3 arguments: a
308 file handle, a filename, and an expected position. It should check whether
309 the current position in the file handle is valid, and log/warn/fail (by
310 raising).
311
20
312
21
313 Internal details
22 def v1_censor(rl, tr, censornode, tombstone=b''):
314 ----------------
23 """censors a revision in a "version 1" revlog"""
315
24 assert rl._format_version == constants.REVLOGV1, rl._format_version
316 A large part of the revlog logic deals with revisions' "index entries", tuple
317 objects that contains the same "items" whatever the revlog version.
318 Different versions will have different ways of storing these items (sometimes
319 not having them at all), but the tuple will always be the same. New fields
320 are usually added at the end to avoid breaking existing code that relies
321 on the existing order. The field are defined as follows:
322
323 [0] offset:
324 The byte index of the start of revision data chunk.
325 That value is shifted up by 16 bits. use "offset = field >> 16" to
326 retrieve it.
327
328 flags:
329 A flag field that carries special information or changes the behavior
330 of the revision. (see `REVIDX_*` constants for details)
331 The flag field only occupies the first 16 bits of this field,
332 use "flags = field & 0xFFFF" to retrieve the value.
333
334 [1] compressed length:
335 The size, in bytes, of the chunk on disk
336
337 [2] uncompressed length:
338 The size, in bytes, of the full revision once reconstructed.
339
340 [3] base rev:
341 Either the base of the revision delta chain (without general
342 delta), or the base of the delta (stored in the data chunk)
343 with general delta.
344
345 [4] link rev:
346 Changelog revision number of the changeset introducing this
347 revision.
348
349 [5] parent 1 rev:
350 Revision number of the first parent
351
352 [6] parent 2 rev:
353 Revision number of the second parent
354
355 [7] node id:
356 The node id of the current revision
357
358 [8] sidedata offset:
359 The byte index of the start of the revision's side-data chunk.
360
361 [9] sidedata chunk length:
362 The size, in bytes, of the revision's side-data chunk.
363
364 [10] data compression mode:
365 two bits that detail the way the data chunk is compressed on disk.
366 (see "COMP_MODE_*" constants for details). For revlog version 0 and
367 1 this will always be COMP_MODE_INLINE.
368
369 [11] side-data compression mode:
370 two bits that detail the way the sidedata chunk is compressed on disk.
371 (see "COMP_MODE_*" constants for details)
372 """
373
374 _flagserrorclass = error.RevlogError
375
376 def __init__(
377 self,
378 opener,
379 target,
380 radix,
381 postfix=None, # only exist for `tmpcensored` now
382 checkambig=False,
383 mmaplargeindex=False,
384 censorable=False,
385 upperboundcomp=None,
386 persistentnodemap=False,
387 concurrencychecker=None,
388 trypending=False,
389 ):
390 """
391 create a revlog object
392
393 opener is a function that abstracts the file opening operation
394 and can be used to implement COW semantics or the like.
395
396 `target`: a (KIND, ID) tuple that identify the content stored in
397 this revlog. It help the rest of the code to understand what the revlog
398 is about without having to resort to heuristic and index filename
399 analysis. Note: that this must be reliably be set by normal code, but
400 that test, debug, or performance measurement code might not set this to
401 accurate value.
402 """
403 self.upperboundcomp = upperboundcomp
404
405 self.radix = radix
406
407 self._docket_file = None
408 self._indexfile = None
409 self._datafile = None
410 self._sidedatafile = None
411 self._nodemap_file = None
412 self.postfix = postfix
413 self._trypending = trypending
414 self.opener = opener
415 if persistentnodemap:
416 self._nodemap_file = nodemaputil.get_nodemap_file(self)
417
25
418 assert target[0] in ALL_KINDS
26 # avoid cycle
419 assert len(target) == 2
27 from .. import revlog
420 self.target = target
421 # When True, indexfile is opened with checkambig=True at writing, to
422 # avoid file stat ambiguity.
423 self._checkambig = checkambig
424 self._mmaplargeindex = mmaplargeindex
425 self._censorable = censorable
426 # 3-tuple of (node, rev, text) for a raw revision.
427 self._revisioncache = None
428 # Maps rev to chain base rev.
429 self._chainbasecache = util.lrucachedict(100)
430 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
431 self._chunkcache = (0, b'')
432 # How much data to read and cache into the raw revlog data cache.
433 self._chunkcachesize = 65536
434 self._maxchainlen = None
435 self._deltabothparents = True
436 self.index = None
437 self._docket = None
438 self._nodemap_docket = None
439 # Mapping of partial identifiers to full nodes.
440 self._pcache = {}
441 # Mapping of revision integer to full node.
442 self._compengine = b'zlib'
443 self._compengineopts = {}
444 self._maxdeltachainspan = -1
445 self._withsparseread = False
446 self._sparserevlog = False
447 self.hassidedata = False
448 self._srdensitythreshold = 0.50
449 self._srmingapsize = 262144
450
451 # Make copy of flag processors so each revlog instance can support
452 # custom flags.
453 self._flagprocessors = dict(flagutil.flagprocessors)
454
455 # 3-tuple of file handles being used for active writing.
456 self._writinghandles = None
457 # prevent nesting of addgroup
458 self._adding_group = None
459
460 self._loadindex()
461
462 self._concurrencychecker = concurrencychecker
463
464 def _init_opts(self):
465 """process options (from above/config) to setup associated default revlog mode
466
467 These values might be affected when actually reading on disk information.
468
469 The relevant values are returned for use in _loadindex().
470
471 * newversionflags:
472 version header to use if we need to create a new revlog
473
474 * mmapindexthreshold:
475 minimal index size for start to use mmap
476
477 * force_nodemap:
478 force the usage of a "development" version of the nodemap code
479 """
480 mmapindexthreshold = None
481 opts = self.opener.options
482
483 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
484 new_header = CHANGELOGV2
485 elif b'revlogv2' in opts:
486 new_header = REVLOGV2
487 elif b'revlogv1' in opts:
488 new_header = REVLOGV1 | FLAG_INLINE_DATA
489 if b'generaldelta' in opts:
490 new_header |= FLAG_GENERALDELTA
491 elif b'revlogv0' in self.opener.options:
492 new_header = REVLOGV0
493 else:
494 new_header = REVLOG_DEFAULT_VERSION
495
496 if b'chunkcachesize' in opts:
497 self._chunkcachesize = opts[b'chunkcachesize']
498 if b'maxchainlen' in opts:
499 self._maxchainlen = opts[b'maxchainlen']
500 if b'deltabothparents' in opts:
501 self._deltabothparents = opts[b'deltabothparents']
502 self._lazydelta = bool(opts.get(b'lazydelta', True))
503 self._lazydeltabase = False
504 if self._lazydelta:
505 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
506 if b'compengine' in opts:
507 self._compengine = opts[b'compengine']
508 if b'zlib.level' in opts:
509 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
510 if b'zstd.level' in opts:
511 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
512 if b'maxdeltachainspan' in opts:
513 self._maxdeltachainspan = opts[b'maxdeltachainspan']
514 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
515 mmapindexthreshold = opts[b'mmapindexthreshold']
516 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 # sparse-revlog forces sparse-read
519 self._withsparseread = self._sparserevlog or withsparseread
520 if b'sparse-read-density-threshold' in opts:
521 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 if b'sparse-read-min-gap-size' in opts:
523 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 if opts.get(b'enableellipsis'):
525 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526
527 # revlog v0 doesn't have flag processors
528 for flag, processor in pycompat.iteritems(
529 opts.get(b'flagprocessors', {})
530 ):
531 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532
533 if self._chunkcachesize <= 0:
534 raise error.RevlogError(
535 _(b'revlog chunk cache size %r is not greater than 0')
536 % self._chunkcachesize
537 )
538 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 raise error.RevlogError(
540 _(b'revlog chunk cache size %r is not a power of 2')
541 % self._chunkcachesize
542 )
543 force_nodemap = opts.get(b'devel-force-nodemap', False)
544 return new_header, mmapindexthreshold, force_nodemap
545
28
546 def _get_data(self, filepath, mmap_threshold, size=None):
29 censorrev = rl.rev(censornode)
547 """return a file content with or without mmap
30 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
548
549 If the file is missing return the empty string"""
550 try:
551 with self.opener(filepath) as fp:
552 if mmap_threshold is not None:
553 file_size = self.opener.fstat(fp).st_size
554 if file_size >= mmap_threshold:
555 if size is not None:
556 # avoid potentiel mmap crash
557 size = min(file_size, size)
558 # TODO: should .close() to release resources without
559 # relying on Python GC
560 if size is None:
561 return util.buffer(util.mmapread(fp))
562 else:
563 return util.buffer(util.mmapread(fp, size))
564 if size is None:
565 return fp.read()
566 else:
567 return fp.read(size)
568 except IOError as inst:
569 if inst.errno != errno.ENOENT:
570 raise
571 return b''
572
573 def _loadindex(self):
574
575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
576
577 if self.postfix is not None:
578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
580 entry_point = b'%s.i.a' % self.radix
581 else:
582 entry_point = b'%s.i' % self.radix
583
584 entry_data = b''
585 self._initempty = True
586 entry_data = self._get_data(entry_point, mmapindexthreshold)
587 if len(entry_data) > 0:
588 header = INDEX_HEADER.unpack(entry_data[:4])[0]
589 self._initempty = False
590 else:
591 header = new_header
592
593 self._format_flags = header & ~0xFFFF
594 self._format_version = header & 0xFFFF
595
596 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
597 if supported_flags is None:
598 msg = _(b'unknown version (%d) in revlog %s')
599 msg %= (self._format_version, self.display_id)
600 raise error.RevlogError(msg)
601 elif self._format_flags & ~supported_flags:
602 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
603 display_flag = self._format_flags >> 16
604 msg %= (display_flag, self._format_version, self.display_id)
605 raise error.RevlogError(msg)
606
607 features = FEATURES_BY_VERSION[self._format_version]
608 self._inline = features[b'inline'](self._format_flags)
609 self._generaldelta = features[b'generaldelta'](self._format_flags)
610 self.hassidedata = features[b'sidedata']
611
612 if not features[b'docket']:
613 self._indexfile = entry_point
614 index_data = entry_data
615 else:
616 self._docket_file = entry_point
617 if self._initempty:
618 self._docket = docketutil.default_docket(self, header)
619 else:
620 self._docket = docketutil.parse_docket(
621 self, entry_data, use_pending=self._trypending
622 )
623 self._indexfile = self._docket.index_filepath()
624 index_data = b''
625 index_size = self._docket.index_end
626 if index_size > 0:
627 index_data = self._get_data(
628 self._indexfile, mmapindexthreshold, size=index_size
629 )
630 if len(index_data) < index_size:
631 msg = _(b'too few index data for %s: got %d, expected %d')
632 msg %= (self.display_id, len(index_data), index_size)
633 raise error.RevlogError(msg)
634
635 self._inline = False
636 # generaldelta implied by version 2 revlogs.
637 self._generaldelta = True
638 # the logic for persistent nodemap will be dealt with within the
639 # main docket, so disable it for now.
640 self._nodemap_file = None
641
642 if self._docket is not None:
643 self._datafile = self._docket.data_filepath()
644 self._sidedatafile = self._docket.sidedata_filepath()
645 elif self.postfix is None:
646 self._datafile = b'%s.d' % self.radix
647 else:
648 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
649
650 self.nodeconstants = sha1nodeconstants
651 self.nullid = self.nodeconstants.nullid
652
653 # sparse-revlog can't be on without general-delta (issue6056)
654 if not self._generaldelta:
655 self._sparserevlog = False
656
657 self._storedeltachains = True
658
31
659 devel_nodemap = (
32 if len(tombstone) > rl.rawsize(censorrev):
660 self._nodemap_file
33 raise error.Abort(
661 and force_nodemap
34 _(b'censor tombstone must be no longer than censored data')
662 and parse_index_v1_nodemap is not None
663 )
664
665 use_rust_index = False
666 if rustrevlog is not None:
667 if self._nodemap_file is not None:
668 use_rust_index = True
669 else:
670 use_rust_index = self.opener.options.get(b'rust.index')
671
672 self._parse_index = parse_index_v1
673 if self._format_version == REVLOGV0:
674 self._parse_index = revlogv0.parse_index_v0
675 elif self._format_version == REVLOGV2:
676 self._parse_index = parse_index_v2
677 elif self._format_version == CHANGELOGV2:
678 self._parse_index = parse_index_cl_v2
679 elif devel_nodemap:
680 self._parse_index = parse_index_v1_nodemap
681 elif use_rust_index:
682 self._parse_index = parse_index_v1_mixed
683 try:
684 d = self._parse_index(index_data, self._inline)
685 index, _chunkcache = d
686 use_nodemap = (
687 not self._inline
688 and self._nodemap_file is not None
689 and util.safehasattr(index, 'update_nodemap_data')
690 )
691 if use_nodemap:
692 nodemap_data = nodemaputil.persisted_data(self)
693 if nodemap_data is not None:
694 docket = nodemap_data[0]
695 if (
696 len(d[0]) > docket.tip_rev
697 and d[0][docket.tip_rev][7] == docket.tip_node
698 ):
699 # no changelog tampering
700 self._nodemap_docket = docket
701 index.update_nodemap_data(*nodemap_data)
702 except (ValueError, IndexError):
703 raise error.RevlogError(
704 _(b"index %s is corrupted") % self.display_id
705 )
706 self.index, self._chunkcache = d
707 if not self._chunkcache:
708 self._chunkclear()
709 # revnum -> (chain-length, sum-delta-length)
710 self._chaininfocache = util.lrucachedict(500)
711 # revlog header -> revlog compressor
712 self._decompressors = {}
713
714 @util.propertycache
715 def revlog_kind(self):
716 return self.target[0]
717
718 @util.propertycache
719 def display_id(self):
720 """The public facing "ID" of the revlog that we use in message"""
721 # Maybe we should build a user facing representation of
722 # revlog.target instead of using `self.radix`
723 return self.radix
724
725 def _get_decompressor(self, t):
726 try:
727 compressor = self._decompressors[t]
728 except KeyError:
729 try:
730 engine = util.compengines.forrevlogheader(t)
731 compressor = engine.revlogcompressor(self._compengineopts)
732 self._decompressors[t] = compressor
733 except KeyError:
734 raise error.RevlogError(
735 _(b'unknown compression type %s') % binascii.hexlify(t)
736 )
737 return compressor
738
739 @util.propertycache
740 def _compressor(self):
741 engine = util.compengines[self._compengine]
742 return engine.revlogcompressor(self._compengineopts)
743
744 @util.propertycache
745 def _decompressor(self):
746 """the default decompressor"""
747 if self._docket is None:
748 return None
749 t = self._docket.default_compression_header
750 c = self._get_decompressor(t)
751 return c.decompress
752
753 def _indexfp(self):
754 """file object for the revlog's index file"""
755 return self.opener(self._indexfile, mode=b"r")
756
757 def __index_write_fp(self):
758 # You should not use this directly and use `_writing` instead
759 try:
760 f = self.opener(
761 self._indexfile, mode=b"r+", checkambig=self._checkambig
762 )
763 if self._docket is None:
764 f.seek(0, os.SEEK_END)
765 else:
766 f.seek(self._docket.index_end, os.SEEK_SET)
767 return f
768 except IOError as inst:
769 if inst.errno != errno.ENOENT:
770 raise
771 return self.opener(
772 self._indexfile, mode=b"w+", checkambig=self._checkambig
773 )
774
775 def __index_new_fp(self):
776 # You should not use this unless you are upgrading from inline revlog
777 return self.opener(
778 self._indexfile,
779 mode=b"w",
780 checkambig=self._checkambig,
781 atomictemp=True,
782 )
35 )
783
36
784 def _datafp(self, mode=b'r'):
37 # Rewriting the revlog in place is hard. Our strategy for censoring is
785 """file object for the revlog's data file"""
38 # to create a new revlog, copy all revisions to it, then replace the
786 return self.opener(self._datafile, mode=mode)
39 # revlogs on transaction close.
787
40 #
788 @contextlib.contextmanager
41 # This is a bit dangerous. We could easily have a mismatch of state.
789 def _datareadfp(self, existingfp=None):
42 newrl = revlog.revlog(
790 """file object suitable to read data"""
43 rl.opener,
791 # Use explicit file handle, if given.
44 target=rl.target,
792 if existingfp is not None:
45 radix=rl.radix,
793 yield existingfp
46 postfix=b'tmpcensored',
794
47 censorable=True,
795 # Use a file handle being actively used for writes, if available.
48 )
796 # There is some danger to doing this because reads will seek the
49 newrl._format_version = rl._format_version
797 # file. However, _writeentry() performs a SEEK_END before all writes,
50 newrl._format_flags = rl._format_flags
798 # so we should be safe.
51 newrl._generaldelta = rl._generaldelta
799 elif self._writinghandles:
52 newrl._parse_index = rl._parse_index
800 if self._inline:
801 yield self._writinghandles[0]
802 else:
803 yield self._writinghandles[1]
804
805 # Otherwise open a new file handle.
806 else:
807 if self._inline:
808 func = self._indexfp
809 else:
810 func = self._datafp
811 with func() as fp:
812 yield fp
813
814 @contextlib.contextmanager
815 def _sidedatareadfp(self):
816 """file object suitable to read sidedata"""
817 if self._writinghandles:
818 yield self._writinghandles[2]
819 else:
820 with self.opener(self._sidedatafile) as fp:
821 yield fp
822
823 def tiprev(self):
824 return len(self.index) - 1
825
826 def tip(self):
827 return self.node(self.tiprev())
828
829 def __contains__(self, rev):
830 return 0 <= rev < len(self)
831
832 def __len__(self):
833 return len(self.index)
834
835 def __iter__(self):
836 return iter(pycompat.xrange(len(self)))
837
838 def revs(self, start=0, stop=None):
839 """iterate over all rev in this revlog (from start to stop)"""
840 return storageutil.iterrevs(len(self), start=start, stop=stop)
841
842 @property
843 def nodemap(self):
844 msg = (
845 b"revlog.nodemap is deprecated, "
846 b"use revlog.index.[has_node|rev|get_rev]"
847 )
848 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
849 return self.index.nodemap
850
851 @property
852 def _nodecache(self):
853 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
854 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
855 return self.index.nodemap
856
857 def hasnode(self, node):
858 try:
859 self.rev(node)
860 return True
861 except KeyError:
862 return False
863
864 def candelta(self, baserev, rev):
865 """whether two revisions (baserev, rev) can be delta-ed or not"""
866 # Disable delta if either rev requires a content-changing flag
867 # processor (ex. LFS). This is because such flag processor can alter
868 # the rawtext content that the delta will be based on, and two clients
869 # could have a same revlog node with different flags (i.e. different
870 # rawtext contents) and the delta could be incompatible.
871 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
872 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
873 ):
874 return False
875 return True
876
877 def update_caches(self, transaction):
878 if self._nodemap_file is not None:
879 if transaction is None:
880 nodemaputil.update_persistent_nodemap(self)
881 else:
882 nodemaputil.setup_persistent_nodemap(transaction, self)
883
884 def clearcaches(self):
885 self._revisioncache = None
886 self._chainbasecache.clear()
887 self._chunkcache = (0, b'')
888 self._pcache = {}
889 self._nodemap_docket = None
890 self.index.clearcaches()
891 # The python code is the one responsible for validating the docket, we
892 # end up having to refresh it here.
893 use_nodemap = (
894 not self._inline
895 and self._nodemap_file is not None
896 and util.safehasattr(self.index, 'update_nodemap_data')
897 )
898 if use_nodemap:
899 nodemap_data = nodemaputil.persisted_data(self)
900 if nodemap_data is not None:
901 self._nodemap_docket = nodemap_data[0]
902 self.index.update_nodemap_data(*nodemap_data)
903
904 def rev(self, node):
905 try:
906 return self.index.rev(node)
907 except TypeError:
908 raise
909 except error.RevlogError:
910 # parsers.c radix tree lookup failed
911 if (
912 node == self.nodeconstants.wdirid
913 or node in self.nodeconstants.wdirfilenodeids
914 ):
915 raise error.WdirUnsupported
916 raise error.LookupError(node, self.display_id, _(b'no node'))
917
918 # Accessors for index entries.
919
920 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
921 # are flags.
922 def start(self, rev):
923 return int(self.index[rev][0] >> 16)
924
925 def sidedata_cut_off(self, rev):
926 sd_cut_off = self.index[rev][8]
927 if sd_cut_off != 0:
928 return sd_cut_off
929 # This is some annoying dance, because entries without sidedata
930 # currently use 0 as their ofsset. (instead of previous-offset +
931 # previous-size)
932 #
933 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
934 # In the meantime, we need this.
935 while 0 <= rev:
936 e = self.index[rev]
937 if e[9] != 0:
938 return e[8] + e[9]
939 rev -= 1
940 return 0
941
942 def flags(self, rev):
943 return self.index[rev][0] & 0xFFFF
944
945 def length(self, rev):
946 return self.index[rev][1]
947
948 def sidedata_length(self, rev):
949 if not self.hassidedata:
950 return 0
951 return self.index[rev][9]
952
953 def rawsize(self, rev):
954 """return the length of the uncompressed text for a given revision"""
955 l = self.index[rev][2]
956 if l >= 0:
957 return l
958
959 t = self.rawdata(rev)
960 return len(t)
961
962 def size(self, rev):
963 """length of non-raw text (processed by a "read" flag processor)"""
964 # fast path: if no "read" flag processor could change the content,
965 # size is rawsize. note: ELLIPSIS is known to not change the content.
966 flags = self.flags(rev)
967 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
968 return self.rawsize(rev)
969
970 return len(self.revision(rev, raw=False))
971
972 def chainbase(self, rev):
973 base = self._chainbasecache.get(rev)
974 if base is not None:
975 return base
976
977 index = self.index
978 iterrev = rev
979 base = index[iterrev][3]
980 while base != iterrev:
981 iterrev = base
982 base = index[iterrev][3]
983
984 self._chainbasecache[rev] = base
985 return base
986
987 def linkrev(self, rev):
988 return self.index[rev][4]
989
990 def parentrevs(self, rev):
991 try:
992 entry = self.index[rev]
993 except IndexError:
994 if rev == wdirrev:
995 raise error.WdirUnsupported
996 raise
997 if entry[5] == nullrev:
998 return entry[6], entry[5]
999 else:
1000 return entry[5], entry[6]
1001
1002 # fast parentrevs(rev) where rev isn't filtered
1003 _uncheckedparentrevs = parentrevs
1004
1005 def node(self, rev):
1006 try:
1007 return self.index[rev][7]
1008 except IndexError:
1009 if rev == wdirrev:
1010 raise error.WdirUnsupported
1011 raise
1012
1013 # Derived from index values.
1014
1015 def end(self, rev):
1016 return self.start(rev) + self.length(rev)
1017
1018 def parents(self, node):
1019 i = self.index
1020 d = i[self.rev(node)]
1021 # inline node() to avoid function call overhead
1022 if d[5] == self.nullid:
1023 return i[d[6]][7], i[d[5]][7]
1024 else:
1025 return i[d[5]][7], i[d[6]][7]
1026
1027 def chainlen(self, rev):
1028 return self._chaininfo(rev)[0]
1029
1030 def _chaininfo(self, rev):
1031 chaininfocache = self._chaininfocache
1032 if rev in chaininfocache:
1033 return chaininfocache[rev]
1034 index = self.index
1035 generaldelta = self._generaldelta
1036 iterrev = rev
1037 e = index[iterrev]
1038 clen = 0
1039 compresseddeltalen = 0
1040 while iterrev != e[3]:
1041 clen += 1
1042 compresseddeltalen += e[1]
1043 if generaldelta:
1044 iterrev = e[3]
1045 else:
1046 iterrev -= 1
1047 if iterrev in chaininfocache:
1048 t = chaininfocache[iterrev]
1049 clen += t[0]
1050 compresseddeltalen += t[1]
1051 break
1052 e = index[iterrev]
1053 else:
1054 # Add text length of base since decompressing that also takes
1055 # work. For cache hits the length is already included.
1056 compresseddeltalen += e[1]
1057 r = (clen, compresseddeltalen)
1058 chaininfocache[rev] = r
1059 return r
1060
1061 def _deltachain(self, rev, stoprev=None):
1062 """Obtain the delta chain for a revision.
1063
1064 ``stoprev`` specifies a revision to stop at. If not specified, we
1065 stop at the base of the chain.
1066
1067 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1068 revs in ascending order and ``stopped`` is a bool indicating whether
1069 ``stoprev`` was hit.
1070 """
1071 # Try C implementation.
1072 try:
1073 return self.index.deltachain(rev, stoprev, self._generaldelta)
1074 except AttributeError:
1075 pass
1076
1077 chain = []
1078
1079 # Alias to prevent attribute lookup in tight loop.
1080 index = self.index
1081 generaldelta = self._generaldelta
1082
1083 iterrev = rev
1084 e = index[iterrev]
1085 while iterrev != e[3] and iterrev != stoprev:
1086 chain.append(iterrev)
1087 if generaldelta:
1088 iterrev = e[3]
1089 else:
1090 iterrev -= 1
1091 e = index[iterrev]
1092
1093 if iterrev == stoprev:
1094 stopped = True
1095 else:
1096 chain.append(iterrev)
1097 stopped = False
1098
1099 chain.reverse()
1100 return chain, stopped
1101
1102 def ancestors(self, revs, stoprev=0, inclusive=False):
1103 """Generate the ancestors of 'revs' in reverse revision order.
1104 Does not generate revs lower than stoprev.
1105
1106 See the documentation for ancestor.lazyancestors for more details."""
1107
1108 # first, make sure start revisions aren't filtered
1109 revs = list(revs)
1110 checkrev = self.node
1111 for r in revs:
1112 checkrev(r)
1113 # and we're sure ancestors aren't filtered as well
1114
1115 if rustancestor is not None and self.index.rust_ext_compat:
1116 lazyancestors = rustancestor.LazyAncestors
1117 arg = self.index
1118 else:
1119 lazyancestors = ancestor.lazyancestors
1120 arg = self._uncheckedparentrevs
1121 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1122
1123 def descendants(self, revs):
1124 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1125
1126 def findcommonmissing(self, common=None, heads=None):
1127 """Return a tuple of the ancestors of common and the ancestors of heads
1128 that are not ancestors of common. In revset terminology, we return the
1129 tuple:
1130
1131 ::common, (::heads) - (::common)
1132
1133 The list is sorted by revision number, meaning it is
1134 topologically sorted.
1135
1136 'heads' and 'common' are both lists of node IDs. If heads is
1137 not supplied, uses all of the revlog's heads. If common is not
1138 supplied, uses nullid."""
1139 if common is None:
1140 common = [self.nullid]
1141 if heads is None:
1142 heads = self.heads()
1143
1144 common = [self.rev(n) for n in common]
1145 heads = [self.rev(n) for n in heads]
1146
1147 # we want the ancestors, but inclusive
1148 class lazyset(object):
1149 def __init__(self, lazyvalues):
1150 self.addedvalues = set()
1151 self.lazyvalues = lazyvalues
1152
1153 def __contains__(self, value):
1154 return value in self.addedvalues or value in self.lazyvalues
1155
1156 def __iter__(self):
1157 added = self.addedvalues
1158 for r in added:
1159 yield r
1160 for r in self.lazyvalues:
1161 if not r in added:
1162 yield r
1163
1164 def add(self, value):
1165 self.addedvalues.add(value)
1166
1167 def update(self, values):
1168 self.addedvalues.update(values)
1169
1170 has = lazyset(self.ancestors(common))
1171 has.add(nullrev)
1172 has.update(common)
1173
1174 # take all ancestors from heads that aren't in has
1175 missing = set()
1176 visit = collections.deque(r for r in heads if r not in has)
1177 while visit:
1178 r = visit.popleft()
1179 if r in missing:
1180 continue
1181 else:
1182 missing.add(r)
1183 for p in self.parentrevs(r):
1184 if p not in has:
1185 visit.append(p)
1186 missing = list(missing)
1187 missing.sort()
1188 return has, [self.node(miss) for miss in missing]
1189
1190 def incrementalmissingrevs(self, common=None):
1191 """Return an object that can be used to incrementally compute the
1192 revision numbers of the ancestors of arbitrary sets that are not
1193 ancestors of common. This is an ancestor.incrementalmissingancestors
1194 object.
1195
53
1196 'common' is a list of revision numbers. If common is not supplied, uses
54 for rev in rl.revs():
1197 nullrev.
55 node = rl.node(rev)
1198 """
56 p1, p2 = rl.parents(node)
1199 if common is None:
1200 common = [nullrev]
1201
1202 if rustancestor is not None and self.index.rust_ext_compat:
1203 return rustancestor.MissingAncestors(self.index, common)
1204 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1205
1206 def findmissingrevs(self, common=None, heads=None):
1207 """Return the revision numbers of the ancestors of heads that
1208 are not ancestors of common.
1209
1210 More specifically, return a list of revision numbers corresponding to
1211 nodes N such that every N satisfies the following constraints:
1212
1213 1. N is an ancestor of some node in 'heads'
1214 2. N is not an ancestor of any node in 'common'
1215
1216 The list is sorted by revision number, meaning it is
1217 topologically sorted.
1218
1219 'heads' and 'common' are both lists of revision numbers. If heads is
1220 not supplied, uses all of the revlog's heads. If common is not
1221 supplied, uses nullid."""
1222 if common is None:
1223 common = [nullrev]
1224 if heads is None:
1225 heads = self.headrevs()
1226
1227 inc = self.incrementalmissingrevs(common=common)
1228 return inc.missingancestors(heads)
1229
1230 def findmissing(self, common=None, heads=None):
1231 """Return the ancestors of heads that are not ancestors of common.
1232
1233 More specifically, return a list of nodes N such that every N
1234 satisfies the following constraints:
1235
1236 1. N is an ancestor of some node in 'heads'
1237 2. N is not an ancestor of any node in 'common'
1238
1239 The list is sorted by revision number, meaning it is
1240 topologically sorted.
1241
1242 'heads' and 'common' are both lists of node IDs. If heads is
1243 not supplied, uses all of the revlog's heads. If common is not
1244 supplied, uses nullid."""
1245 if common is None:
1246 common = [self.nullid]
1247 if heads is None:
1248 heads = self.heads()
1249
1250 common = [self.rev(n) for n in common]
1251 heads = [self.rev(n) for n in heads]
1252
1253 inc = self.incrementalmissingrevs(common=common)
1254 return [self.node(r) for r in inc.missingancestors(heads)]
1255
1256 def nodesbetween(self, roots=None, heads=None):
1257 """Return a topological path from 'roots' to 'heads'.
1258
1259 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1260 topologically sorted list of all nodes N that satisfy both of
1261 these constraints:
1262
1263 1. N is a descendant of some node in 'roots'
1264 2. N is an ancestor of some node in 'heads'
1265
1266 Every node is considered to be both a descendant and an ancestor
1267 of itself, so every reachable node in 'roots' and 'heads' will be
1268 included in 'nodes'.
1269
1270 'outroots' is the list of reachable nodes in 'roots', i.e., the
1271 subset of 'roots' that is returned in 'nodes'. Likewise,
1272 'outheads' is the subset of 'heads' that is also in 'nodes'.
1273
1274 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1275 unspecified, uses nullid as the only root. If 'heads' is
1276 unspecified, uses list of all of the revlog's heads."""
1277 nonodes = ([], [], [])
1278 if roots is not None:
1279 roots = list(roots)
1280 if not roots:
1281 return nonodes
1282 lowestrev = min([self.rev(n) for n in roots])
1283 else:
1284 roots = [self.nullid] # Everybody's a descendant of nullid
1285 lowestrev = nullrev
1286 if (lowestrev == nullrev) and (heads is None):
1287 # We want _all_ the nodes!
1288 return (
1289 [self.node(r) for r in self],
1290 [self.nullid],
1291 list(self.heads()),
1292 )
1293 if heads is None:
1294 # All nodes are ancestors, so the latest ancestor is the last
1295 # node.
1296 highestrev = len(self) - 1
1297 # Set ancestors to None to signal that every node is an ancestor.
1298 ancestors = None
1299 # Set heads to an empty dictionary for later discovery of heads
1300 heads = {}
1301 else:
1302 heads = list(heads)
1303 if not heads:
1304 return nonodes
1305 ancestors = set()
1306 # Turn heads into a dictionary so we can remove 'fake' heads.
1307 # Also, later we will be using it to filter out the heads we can't
1308 # find from roots.
1309 heads = dict.fromkeys(heads, False)
1310 # Start at the top and keep marking parents until we're done.
1311 nodestotag = set(heads)
1312 # Remember where the top was so we can use it as a limit later.
1313 highestrev = max([self.rev(n) for n in nodestotag])
1314 while nodestotag:
1315 # grab a node to tag
1316 n = nodestotag.pop()
1317 # Never tag nullid
1318 if n == self.nullid:
1319 continue
1320 # A node's revision number represents its place in a
1321 # topologically sorted list of nodes.
1322 r = self.rev(n)
1323 if r >= lowestrev:
1324 if n not in ancestors:
1325 # If we are possibly a descendant of one of the roots
1326 # and we haven't already been marked as an ancestor
1327 ancestors.add(n) # Mark as ancestor
1328 # Add non-nullid parents to list of nodes to tag.
1329 nodestotag.update(
1330 [p for p in self.parents(n) if p != self.nullid]
1331 )
1332 elif n in heads: # We've seen it before, is it a fake head?
1333 # So it is, real heads should not be the ancestors of
1334 # any other heads.
1335 heads.pop(n)
1336 if not ancestors:
1337 return nonodes
1338 # Now that we have our set of ancestors, we want to remove any
1339 # roots that are not ancestors.
1340
1341 # If one of the roots was nullid, everything is included anyway.
1342 if lowestrev > nullrev:
1343 # But, since we weren't, let's recompute the lowest rev to not
1344 # include roots that aren't ancestors.
1345
57
1346 # Filter out roots that aren't ancestors of heads
58 if rev == censorrev:
1347 roots = [root for root in roots if root in ancestors]
59 newrl.addrawrevision(
1348 # Recompute the lowest revision
60 tombstone,
1349 if roots:
61 tr,
1350 lowestrev = min([self.rev(root) for root in roots])
62 rl.linkrev(censorrev),
1351 else:
63 p1,
1352 # No more roots? Return empty list
64 p2,
1353 return nonodes
65 censornode,
1354 else:
66 constants.REVIDX_ISCENSORED,
1355 # We are descending from nullid, and don't need to care about
1356 # any other roots.
1357 lowestrev = nullrev
1358 roots = [self.nullid]
1359 # Transform our roots list into a set.
1360 descendants = set(roots)
1361 # Also, keep the original roots so we can filter out roots that aren't
1362 # 'real' roots (i.e. are descended from other roots).
1363 roots = descendants.copy()
1364 # Our topologically sorted list of output nodes.
1365 orderedout = []
1366 # Don't start at nullid since we don't want nullid in our output list,
1367 # and if nullid shows up in descendants, empty parents will look like
1368 # they're descendants.
1369 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1370 n = self.node(r)
1371 isdescendant = False
1372 if lowestrev == nullrev: # Everybody is a descendant of nullid
1373 isdescendant = True
1374 elif n in descendants:
1375 # n is already a descendant
1376 isdescendant = True
1377 # This check only needs to be done here because all the roots
1378 # will start being marked is descendants before the loop.
1379 if n in roots:
1380 # If n was a root, check if it's a 'real' root.
1381 p = tuple(self.parents(n))
1382 # If any of its parents are descendants, it's not a root.
1383 if (p[0] in descendants) or (p[1] in descendants):
1384 roots.remove(n)
1385 else:
1386 p = tuple(self.parents(n))
1387 # A node is a descendant if either of its parents are
1388 # descendants. (We seeded the dependents list with the roots
1389 # up there, remember?)
1390 if (p[0] in descendants) or (p[1] in descendants):
1391 descendants.add(n)
1392 isdescendant = True
1393 if isdescendant and ((ancestors is None) or (n in ancestors)):
1394 # Only include nodes that are both descendants and ancestors.
1395 orderedout.append(n)
1396 if (ancestors is not None) and (n in heads):
1397 # We're trying to figure out which heads are reachable
1398 # from roots.
1399 # Mark this head as having been reached
1400 heads[n] = True
1401 elif ancestors is None:
1402 # Otherwise, we're trying to discover the heads.
1403 # Assume this is a head because if it isn't, the next step
1404 # will eventually remove it.
1405 heads[n] = True
1406 # But, obviously its parents aren't.
1407 for p in self.parents(n):
1408 heads.pop(p, None)
1409 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1410 roots = list(roots)
1411 assert orderedout
1412 assert roots
1413 assert heads
1414 return (orderedout, roots, heads)
1415
1416 def headrevs(self, revs=None):
1417 if revs is None:
1418 try:
1419 return self.index.headrevs()
1420 except AttributeError:
1421 return self._headrevs()
1422 if rustdagop is not None and self.index.rust_ext_compat:
1423 return rustdagop.headrevs(self.index, revs)
1424 return dagop.headrevs(revs, self._uncheckedparentrevs)
1425
1426 def computephases(self, roots):
1427 return self.index.computephasesmapsets(roots)
1428
1429 def _headrevs(self):
1430 count = len(self)
1431 if not count:
1432 return [nullrev]
1433 # we won't iter over filtered rev so nobody is a head at start
1434 ishead = [0] * (count + 1)
1435 index = self.index
1436 for r in self:
1437 ishead[r] = 1 # I may be an head
1438 e = index[r]
1439 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1440 return [r for r, val in enumerate(ishead) if val]
1441
1442 def heads(self, start=None, stop=None):
1443 """return the list of all nodes that have no children
1444
1445 if start is specified, only heads that are descendants of
1446 start will be returned
1447 if stop is specified, it will consider all the revs from stop
1448 as if they had no children
1449 """
1450 if start is None and stop is None:
1451 if not len(self):
1452 return [self.nullid]
1453 return [self.node(r) for r in self.headrevs()]
1454
1455 if start is None:
1456 start = nullrev
1457 else:
1458 start = self.rev(start)
1459
1460 stoprevs = {self.rev(n) for n in stop or []}
1461
1462 revs = dagop.headrevssubset(
1463 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1464 )
1465
1466 return [self.node(rev) for rev in revs]
1467
1468 def children(self, node):
1469 """find the children of a given node"""
1470 c = []
1471 p = self.rev(node)
1472 for r in self.revs(start=p + 1):
1473 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1474 if prevs:
1475 for pr in prevs:
1476 if pr == p:
1477 c.append(self.node(r))
1478 elif p == nullrev:
1479 c.append(self.node(r))
1480 return c
1481
1482 def commonancestorsheads(self, a, b):
1483 """calculate all the heads of the common ancestors of nodes a and b"""
1484 a, b = self.rev(a), self.rev(b)
1485 ancs = self._commonancestorsheads(a, b)
1486 return pycompat.maplist(self.node, ancs)
1487
1488 def _commonancestorsheads(self, *revs):
1489 """calculate all the heads of the common ancestors of revs"""
1490 try:
1491 ancs = self.index.commonancestorsheads(*revs)
1492 except (AttributeError, OverflowError): # C implementation failed
1493 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1494 return ancs
1495
1496 def isancestor(self, a, b):
1497 """return True if node a is an ancestor of node b
1498
1499 A revision is considered an ancestor of itself."""
1500 a, b = self.rev(a), self.rev(b)
1501 return self.isancestorrev(a, b)
1502
1503 def isancestorrev(self, a, b):
1504 """return True if revision a is an ancestor of revision b
1505
1506 A revision is considered an ancestor of itself.
1507
1508 The implementation of this is trivial but the use of
1509 reachableroots is not."""
1510 if a == nullrev:
1511 return True
1512 elif a == b:
1513 return True
1514 elif a > b:
1515 return False
1516 return bool(self.reachableroots(a, [b], [a], includepath=False))
1517
1518 def reachableroots(self, minroot, heads, roots, includepath=False):
1519 """return (heads(::(<roots> and <roots>::<heads>)))
1520
1521 If includepath is True, return (<roots>::<heads>)."""
1522 try:
1523 return self.index.reachableroots2(
1524 minroot, heads, roots, includepath
1525 )
1526 except AttributeError:
1527 return dagop._reachablerootspure(
1528 self.parentrevs, minroot, roots, heads, includepath
1529 )
1530
1531 def ancestor(self, a, b):
1532 """calculate the "best" common ancestor of nodes a and b"""
1533
1534 a, b = self.rev(a), self.rev(b)
1535 try:
1536 ancs = self.index.ancestors(a, b)
1537 except (AttributeError, OverflowError):
1538 ancs = ancestor.ancestors(self.parentrevs, a, b)
1539 if ancs:
1540 # choose a consistent winner when there's a tie
1541 return min(map(self.node, ancs))
1542 return self.nullid
1543
1544 def _match(self, id):
1545 if isinstance(id, int):
1546 # rev
1547 return self.node(id)
1548 if len(id) == self.nodeconstants.nodelen:
1549 # possibly a binary node
1550 # odds of a binary node being all hex in ASCII are 1 in 10**25
1551 try:
1552 node = id
1553 self.rev(node) # quick search the index
1554 return node
1555 except error.LookupError:
1556 pass # may be partial hex id
1557 try:
1558 # str(rev)
1559 rev = int(id)
1560 if b"%d" % rev != id:
1561 raise ValueError
1562 if rev < 0:
1563 rev = len(self) + rev
1564 if rev < 0 or rev >= len(self):
1565 raise ValueError
1566 return self.node(rev)
1567 except (ValueError, OverflowError):
1568 pass
1569 if len(id) == 2 * self.nodeconstants.nodelen:
1570 try:
1571 # a full hex nodeid?
1572 node = bin(id)
1573 self.rev(node)
1574 return node
1575 except (TypeError, error.LookupError):
1576 pass
1577
1578 def _partialmatch(self, id):
1579 # we don't care wdirfilenodeids as they should be always full hash
1580 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1581 ambiguous = False
1582 try:
1583 partial = self.index.partialmatch(id)
1584 if partial and self.hasnode(partial):
1585 if maybewdir:
1586 # single 'ff...' match in radix tree, ambiguous with wdir
1587 ambiguous = True
1588 else:
1589 return partial
1590 elif maybewdir:
1591 # no 'ff...' match in radix tree, wdir identified
1592 raise error.WdirUnsupported
1593 else:
1594 return None
1595 except error.RevlogError:
1596 # parsers.c radix tree lookup gave multiple matches
1597 # fast path: for unfiltered changelog, radix tree is accurate
1598 if not getattr(self, 'filteredrevs', None):
1599 ambiguous = True
1600 # fall through to slow path that filters hidden revisions
1601 except (AttributeError, ValueError):
1602 # we are pure python, or key was too short to search radix tree
1603 pass
1604 if ambiguous:
1605 raise error.AmbiguousPrefixLookupError(
1606 id, self.display_id, _(b'ambiguous identifier')
1607 )
67 )
1608
68
1609 if id in self._pcache:
69 if newrl.deltaparent(rev) != nullrev:
1610 return self._pcache[id]
70 m = _(b'censored revision stored as delta; cannot censor')
1611
71 h = _(
1612 if len(id) <= 40:
72 b'censoring of revlogs is not fully implemented;'
1613 try:
73 b' please report this bug'
1614 # hex(node)[:...]
74 )
1615 l = len(id) // 2 # grab an even number of digits
75 raise error.Abort(m, hint=h)
1616 prefix = bin(id[: l * 2])
76 continue
1617 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1618 nl = [
1619 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1620 ]
1621 if self.nodeconstants.nullhex.startswith(id):
1622 nl.append(self.nullid)
1623 if len(nl) > 0:
1624 if len(nl) == 1 and not maybewdir:
1625 self._pcache[id] = nl[0]
1626 return nl[0]
1627 raise error.AmbiguousPrefixLookupError(
1628 id, self.display_id, _(b'ambiguous identifier')
1629 )
1630 if maybewdir:
1631 raise error.WdirUnsupported
1632 return None
1633 except TypeError:
1634 pass
1635
1636 def lookup(self, id):
1637 """locate a node based on:
1638 - revision number or str(revision number)
1639 - nodeid or subset of hex nodeid
1640 """
1641 n = self._match(id)
1642 if n is not None:
1643 return n
1644 n = self._partialmatch(id)
1645 if n:
1646 return n
1647
1648 raise error.LookupError(id, self.display_id, _(b'no match found'))
1649
1650 def shortest(self, node, minlength=1):
1651 """Find the shortest unambiguous prefix that matches node."""
1652
1653 def isvalid(prefix):
1654 try:
1655 matchednode = self._partialmatch(prefix)
1656 except error.AmbiguousPrefixLookupError:
1657 return False
1658 except error.WdirUnsupported:
1659 # single 'ff...' match
1660 return True
1661 if matchednode is None:
1662 raise error.LookupError(node, self.display_id, _(b'no node'))
1663 return True
1664
1665 def maybewdir(prefix):
1666 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1667
1668 hexnode = hex(node)
1669
1670 def disambiguate(hexnode, minlength):
1671 """Disambiguate against wdirid."""
1672 for length in range(minlength, len(hexnode) + 1):
1673 prefix = hexnode[:length]
1674 if not maybewdir(prefix):
1675 return prefix
1676
1677 if not getattr(self, 'filteredrevs', None):
1678 try:
1679 length = max(self.index.shortest(node), minlength)
1680 return disambiguate(hexnode, length)
1681 except error.RevlogError:
1682 if node != self.nodeconstants.wdirid:
1683 raise error.LookupError(
1684 node, self.display_id, _(b'no node')
1685 )
1686 except AttributeError:
1687 # Fall through to pure code
1688 pass
1689
1690 if node == self.nodeconstants.wdirid:
1691 for length in range(minlength, len(hexnode) + 1):
1692 prefix = hexnode[:length]
1693 if isvalid(prefix):
1694 return prefix
1695
1696 for length in range(minlength, len(hexnode) + 1):
1697 prefix = hexnode[:length]
1698 if isvalid(prefix):
1699 return disambiguate(hexnode, length)
1700
1701 def cmp(self, node, text):
1702 """compare text with a given file revision
1703
1704 returns True if text is different than what is stored.
1705 """
1706 p1, p2 = self.parents(node)
1707 return storageutil.hashrevisionsha1(text, p1, p2) != node
1708
1709 def _cachesegment(self, offset, data):
1710 """Add a segment to the revlog cache.
1711
1712 Accepts an absolute offset and the data that is at that location.
1713 """
1714 o, d = self._chunkcache
1715 # try to add to existing cache
1716 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1717 self._chunkcache = o, d + data
1718 else:
1719 self._chunkcache = offset, data
1720
1721 def _readsegment(self, offset, length, df=None):
1722 """Load a segment of raw data from the revlog.
1723
1724 Accepts an absolute offset, length to read, and an optional existing
1725 file handle to read from.
1726
1727 If an existing file handle is passed, it will be seeked and the
1728 original seek position will NOT be restored.
1729
1730 Returns a str or buffer of raw byte data.
1731
1732 Raises if the requested number of bytes could not be read.
1733 """
1734 # Cache data both forward and backward around the requested
1735 # data, in a fixed size window. This helps speed up operations
1736 # involving reading the revlog backwards.
1737 cachesize = self._chunkcachesize
1738 realoffset = offset & ~(cachesize - 1)
1739 reallength = (
1740 (offset + length + cachesize) & ~(cachesize - 1)
1741 ) - realoffset
1742 with self._datareadfp(df) as df:
1743 df.seek(realoffset)
1744 d = df.read(reallength)
1745
1746 self._cachesegment(realoffset, d)
1747 if offset != realoffset or reallength != length:
1748 startoffset = offset - realoffset
1749 if len(d) - startoffset < length:
1750 filename = self._indexfile if self._inline else self._datafile
1751 got = len(d) - startoffset
1752 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1753 raise error.RevlogError(m)
1754 return util.buffer(d, startoffset, length)
1755
1756 if len(d) < length:
1757 filename = self._indexfile if self._inline else self._datafile
1758 got = len(d) - startoffset
1759 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1760 raise error.RevlogError(m)
1761
1762 return d
1763
1764 def _getsegment(self, offset, length, df=None):
1765 """Obtain a segment of raw data from the revlog.
1766
1767 Accepts an absolute offset, length of bytes to obtain, and an
1768 optional file handle to the already-opened revlog. If the file
1769 handle is used, it's original seek position will not be preserved.
1770
1771 Requests for data may be returned from a cache.
1772
1773 Returns a str or a buffer instance of raw byte data.
1774 """
1775 o, d = self._chunkcache
1776 l = len(d)
1777
1778 # is it in the cache?
1779 cachestart = offset - o
1780 cacheend = cachestart + length
1781 if cachestart >= 0 and cacheend <= l:
1782 if cachestart == 0 and cacheend == l:
1783 return d # avoid a copy
1784 return util.buffer(d, cachestart, cacheend - cachestart)
1785
1786 return self._readsegment(offset, length, df=df)
1787
1788 def _getsegmentforrevs(self, startrev, endrev, df=None):
1789 """Obtain a segment of raw data corresponding to a range of revisions.
1790
1791 Accepts the start and end revisions and an optional already-open
1792 file handle to be used for reading. If the file handle is read, its
1793 seek position will not be preserved.
1794
1795 Requests for data may be satisfied by a cache.
1796
1797 Returns a 2-tuple of (offset, data) for the requested range of
1798 revisions. Offset is the integer offset from the beginning of the
1799 revlog and data is a str or buffer of the raw byte data.
1800
1801 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1802 to determine where each revision's data begins and ends.
1803 """
1804 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1805 # (functions are expensive).
1806 index = self.index
1807 istart = index[startrev]
1808 start = int(istart[0] >> 16)
1809 if startrev == endrev:
1810 end = start + istart[1]
1811 else:
1812 iend = index[endrev]
1813 end = int(iend[0] >> 16) + iend[1]
1814
1815 if self._inline:
1816 start += (startrev + 1) * self.index.entry_size
1817 end += (endrev + 1) * self.index.entry_size
1818 length = end - start
1819
1820 return start, self._getsegment(start, length, df=df)
1821
1822 def _chunk(self, rev, df=None):
1823 """Obtain a single decompressed chunk for a revision.
1824
1825 Accepts an integer revision and an optional already-open file handle
1826 to be used for reading. If used, the seek position of the file will not
1827 be preserved.
1828
1829 Returns a str holding uncompressed data for the requested revision.
1830 """
1831 compression_mode = self.index[rev][10]
1832 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1833 if compression_mode == COMP_MODE_PLAIN:
1834 return data
1835 elif compression_mode == COMP_MODE_DEFAULT:
1836 return self._decompressor(data)
1837 elif compression_mode == COMP_MODE_INLINE:
1838 return self.decompress(data)
1839 else:
1840 msg = 'unknown compression mode %d'
1841 msg %= compression_mode
1842 raise error.RevlogError(msg)
1843
1844 def _chunks(self, revs, df=None, targetsize=None):
1845 """Obtain decompressed chunks for the specified revisions.
1846
1847 Accepts an iterable of numeric revisions that are assumed to be in
1848 ascending order. Also accepts an optional already-open file handle
1849 to be used for reading. If used, the seek position of the file will
1850 not be preserved.
1851
1852 This function is similar to calling ``self._chunk()`` multiple times,
1853 but is faster.
1854
1855 Returns a list with decompressed data for each requested revision.
1856 """
1857 if not revs:
1858 return []
1859 start = self.start
1860 length = self.length
1861 inline = self._inline
1862 iosize = self.index.entry_size
1863 buffer = util.buffer
1864
1865 l = []
1866 ladd = l.append
1867
1868 if not self._withsparseread:
1869 slicedchunks = (revs,)
1870 else:
1871 slicedchunks = deltautil.slicechunk(
1872 self, revs, targetsize=targetsize
1873 )
1874
1875 for revschunk in slicedchunks:
1876 firstrev = revschunk[0]
1877 # Skip trailing revisions with empty diff
1878 for lastrev in revschunk[::-1]:
1879 if length(lastrev) != 0:
1880 break
1881
1882 try:
1883 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1884 except OverflowError:
1885 # issue4215 - we can't cache a run of chunks greater than
1886 # 2G on Windows
1887 return [self._chunk(rev, df=df) for rev in revschunk]
1888
1889 decomp = self.decompress
1890 # self._decompressor might be None, but will not be used in that case
1891 def_decomp = self._decompressor
1892 for rev in revschunk:
1893 chunkstart = start(rev)
1894 if inline:
1895 chunkstart += (rev + 1) * iosize
1896 chunklength = length(rev)
1897 comp_mode = self.index[rev][10]
1898 c = buffer(data, chunkstart - offset, chunklength)
1899 if comp_mode == COMP_MODE_PLAIN:
1900 ladd(c)
1901 elif comp_mode == COMP_MODE_INLINE:
1902 ladd(decomp(c))
1903 elif comp_mode == COMP_MODE_DEFAULT:
1904 ladd(def_decomp(c))
1905 else:
1906 msg = 'unknown compression mode %d'
1907 msg %= comp_mode
1908 raise error.RevlogError(msg)
1909
1910 return l
1911
1912 def _chunkclear(self):
1913 """Clear the raw chunk cache."""
1914 self._chunkcache = (0, b'')
1915
1916 def deltaparent(self, rev):
1917 """return deltaparent of the given revision"""
1918 base = self.index[rev][3]
1919 if base == rev:
1920 return nullrev
1921 elif self._generaldelta:
1922 return base
1923 else:
1924 return rev - 1
1925
1926 def issnapshot(self, rev):
1927 """tells whether rev is a snapshot"""
1928 if not self._sparserevlog:
1929 return self.deltaparent(rev) == nullrev
1930 elif util.safehasattr(self.index, b'issnapshot'):
1931 # directly assign the method to cache the testing and access
1932 self.issnapshot = self.index.issnapshot
1933 return self.issnapshot(rev)
1934 if rev == nullrev:
1935 return True
1936 entry = self.index[rev]
1937 base = entry[3]
1938 if base == rev:
1939 return True
1940 if base == nullrev:
1941 return True
1942 p1 = entry[5]
1943 p2 = entry[6]
1944 if base == p1 or base == p2:
1945 return False
1946 return self.issnapshot(base)
1947
1948 def snapshotdepth(self, rev):
1949 """number of snapshot in the chain before this one"""
1950 if not self.issnapshot(rev):
1951 raise error.ProgrammingError(b'revision %d not a snapshot')
1952 return len(self._deltachain(rev)[0]) - 1
1953
1954 def revdiff(self, rev1, rev2):
1955 """return or calculate a delta between two revisions
1956
1957 The delta calculated is in binary form and is intended to be written to
1958 revlog data directly. So this function needs raw revision data.
1959 """
1960 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1961 return bytes(self._chunk(rev2))
1962
1963 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1964
1965 def _processflags(self, text, flags, operation, raw=False):
1966 """deprecated entry point to access flag processors"""
1967 msg = b'_processflag(...) use the specialized variant'
1968 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 if raw:
1970 return text, flagutil.processflagsraw(self, text, flags)
1971 elif operation == b'read':
1972 return flagutil.processflagsread(self, text, flags)
1973 else: # write operation
1974 return flagutil.processflagswrite(self, text, flags)
1975
1976 def revision(self, nodeorrev, _df=None, raw=False):
1977 """return an uncompressed revision of a given node or revision
1978 number.
1979
1980 _df - an existing file handle to read from. (internal-only)
1981 raw - an optional argument specifying if the revision data is to be
1982 treated as raw data when applying flag transforms. 'raw' should be set
1983 to True when generating changegroups or in debug commands.
1984 """
1985 if raw:
1986 msg = (
1987 b'revlog.revision(..., raw=True) is deprecated, '
1988 b'use revlog.rawdata(...)'
1989 )
1990 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1991 return self._revisiondata(nodeorrev, _df, raw=raw)
1992
1993 def sidedata(self, nodeorrev, _df=None):
1994 """a map of extra data related to the changeset but not part of the hash
1995
1996 This function currently return a dictionary. However, more advanced
1997 mapping object will likely be used in the future for a more
1998 efficient/lazy code.
1999 """
2000 # deal with <nodeorrev> argument type
2001 if isinstance(nodeorrev, int):
2002 rev = nodeorrev
2003 else:
2004 rev = self.rev(nodeorrev)
2005 return self._sidedata(rev)
2006
77
2007 def _revisiondata(self, nodeorrev, _df=None, raw=False):
78 if rl.iscensored(rev):
2008 # deal with <nodeorrev> argument type
79 if rl.deltaparent(rev) != nullrev:
2009 if isinstance(nodeorrev, int):
80 m = _(
2010 rev = nodeorrev
81 b'cannot censor due to censored '
2011 node = self.node(rev)
82 b'revision having delta stored'
2012 else:
83 )
2013 node = nodeorrev
84 raise error.Abort(m)
2014 rev = None
85 rawtext = rl._chunk(rev)
2015
2016 # fast path the special `nullid` rev
2017 if node == self.nullid:
2018 return b""
2019
2020 # ``rawtext`` is the text as stored inside the revlog. Might be the
2021 # revision or might need to be processed to retrieve the revision.
2022 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2023
2024 if raw and validated:
2025 # if we don't want to process the raw text and that raw
2026 # text is cached, we can exit early.
2027 return rawtext
2028 if rev is None:
2029 rev = self.rev(node)
2030 # the revlog's flag for this revision
2031 # (usually alter its state or content)
2032 flags = self.flags(rev)
2033
2034 if validated and flags == REVIDX_DEFAULT_FLAGS:
2035 # no extra flags set, no flag processor runs, text = rawtext
2036 return rawtext
2037
2038 if raw:
2039 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2040 text = rawtext
2041 else:
86 else:
2042 r = flagutil.processflagsread(self, rawtext, flags)
87 rawtext = rl.rawdata(rev)
2043 text, validatehash = r
2044 if validatehash:
2045 self.checkhash(text, node, rev=rev)
2046 if not validated:
2047 self._revisioncache = (node, rev, rawtext)
2048
2049 return text
2050
2051 def _rawtext(self, node, rev, _df=None):
2052 """return the possibly unvalidated rawtext for a revision
2053
2054 returns (rev, rawtext, validated)
2055 """
2056
2057 # revision in the cache (could be useful to apply delta)
2058 cachedrev = None
2059 # An intermediate text to apply deltas to
2060 basetext = None
2061
2062 # Check if we have the entry in cache
2063 # The cache entry looks like (node, rev, rawtext)
2064 if self._revisioncache:
2065 if self._revisioncache[0] == node:
2066 return (rev, self._revisioncache[2], True)
2067 cachedrev = self._revisioncache[1]
2068
2069 if rev is None:
2070 rev = self.rev(node)
2071
2072 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2073 if stopped:
2074 basetext = self._revisioncache[2]
2075
2076 # drop cache to save memory, the caller is expected to
2077 # update self._revisioncache after validating the text
2078 self._revisioncache = None
2079
2080 targetsize = None
2081 rawsize = self.index[rev][2]
2082 if 0 <= rawsize:
2083 targetsize = 4 * rawsize
2084
2085 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2086 if basetext is None:
2087 basetext = bytes(bins[0])
2088 bins = bins[1:]
2089
2090 rawtext = mdiff.patches(basetext, bins)
2091 del basetext # let us have a chance to free memory early
2092 return (rev, rawtext, False)
2093
2094 def _sidedata(self, rev):
2095 """Return the sidedata for a given revision number."""
2096 index_entry = self.index[rev]
2097 sidedata_offset = index_entry[8]
2098 sidedata_size = index_entry[9]
2099
2100 if self._inline:
2101 sidedata_offset += self.index.entry_size * (1 + rev)
2102 if sidedata_size == 0:
2103 return {}
2104
2105 # XXX this need caching, as we do for data
2106 with self._sidedatareadfp() as sdf:
2107 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2108 filename = self._sidedatafile
2109 end = self._docket.sidedata_end
2110 offset = sidedata_offset
2111 length = sidedata_size
2112 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2113 raise error.RevlogError(m)
2114
2115 sdf.seek(sidedata_offset, os.SEEK_SET)
2116 comp_segment = sdf.read(sidedata_size)
2117
2118 if len(comp_segment) < sidedata_size:
2119 filename = self._sidedatafile
2120 length = sidedata_size
2121 offset = sidedata_offset
2122 got = len(comp_segment)
2123 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2124 raise error.RevlogError(m)
2125
2126 comp = self.index[rev][11]
2127 if comp == COMP_MODE_PLAIN:
2128 segment = comp_segment
2129 elif comp == COMP_MODE_DEFAULT:
2130 segment = self._decompressor(comp_segment)
2131 elif comp == COMP_MODE_INLINE:
2132 segment = self.decompress(comp_segment)
2133 else:
2134 msg = 'unknown compression mode %d'
2135 msg %= comp
2136 raise error.RevlogError(msg)
2137
2138 sidedata = sidedatautil.deserialize_sidedata(segment)
2139 return sidedata
2140
2141 def rawdata(self, nodeorrev, _df=None):
2142 """return an uncompressed raw data of a given node or revision number.
2143
2144 _df - an existing file handle to read from. (internal-only)
2145 """
2146 return self._revisiondata(nodeorrev, _df, raw=True)
2147
2148 def hash(self, text, p1, p2):
2149 """Compute a node hash.
2150
2151 Available as a function so that subclasses can replace the hash
2152 as needed.
2153 """
2154 return storageutil.hashrevisionsha1(text, p1, p2)
2155
2156 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2157 """Check node hash integrity.
2158
2159 Available as a function so that subclasses can extend hash mismatch
2160 behaviors as needed.
2161 """
2162 try:
2163 if p1 is None and p2 is None:
2164 p1, p2 = self.parents(node)
2165 if node != self.hash(text, p1, p2):
2166 # Clear the revision cache on hash failure. The revision cache
2167 # only stores the raw revision and clearing the cache does have
2168 # the side-effect that we won't have a cache hit when the raw
2169 # revision data is accessed. But this case should be rare and
2170 # it is extra work to teach the cache about the hash
2171 # verification state.
2172 if self._revisioncache and self._revisioncache[0] == node:
2173 self._revisioncache = None
2174
2175 revornode = rev
2176 if revornode is None:
2177 revornode = templatefilters.short(hex(node))
2178 raise error.RevlogError(
2179 _(b"integrity check failed on %s:%s")
2180 % (self.display_id, pycompat.bytestr(revornode))
2181 )
2182 except error.RevlogError:
2183 if self._censorable and storageutil.iscensoredtext(text):
2184 raise error.CensoredNodeError(self.display_id, node, text)
2185 raise
2186
2187 def _enforceinlinesize(self, tr):
2188 """Check if the revlog is too big for inline and convert if so.
2189
2190 This should be called after revisions are added to the revlog. If the
2191 revlog has grown too large to be an inline revlog, it will convert it
2192 to use multiple index and data files.
2193 """
2194 tiprev = len(self) - 1
2195 total_size = self.start(tiprev) + self.length(tiprev)
2196 if not self._inline or total_size < _maxinline:
2197 return
2198
2199 troffset = tr.findoffset(self._indexfile)
2200 if troffset is None:
2201 raise error.RevlogError(
2202 _(b"%s not found in the transaction") % self._indexfile
2203 )
2204 trindex = 0
2205 tr.add(self._datafile, 0)
2206
88
2207 existing_handles = False
89 newrl.addrawrevision(
2208 if self._writinghandles is not None:
90 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
2209 existing_handles = True
2210 fp = self._writinghandles[0]
2211 fp.flush()
2212 fp.close()
2213 # We can't use the cached file handle after close(). So prevent
2214 # its usage.
2215 self._writinghandles = None
2216
2217 new_dfh = self._datafp(b'w+')
2218 new_dfh.truncate(0) # drop any potentially existing data
2219 try:
2220 with self._indexfp() as read_ifh:
2221 for r in self:
2222 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2223 if troffset <= self.start(r) + r * self.index.entry_size:
2224 trindex = r
2225 new_dfh.flush()
2226
2227 with self.__index_new_fp() as fp:
2228 self._format_flags &= ~FLAG_INLINE_DATA
2229 self._inline = False
2230 for i in self:
2231 e = self.index.entry_binary(i)
2232 if i == 0 and self._docket is None:
2233 header = self._format_flags | self._format_version
2234 header = self.index.pack_header(header)
2235 e = header + e
2236 fp.write(e)
2237 if self._docket is not None:
2238 self._docket.index_end = fp.tell()
2239
2240 # There is a small transactional race here. If the rename of
2241 # the index fails, we should remove the datafile. It is more
2242 # important to ensure that the data file is not truncated
2243 # when the index is replaced as otherwise data is lost.
2244 tr.replace(self._datafile, self.start(trindex))
2245
2246 # the temp file replace the real index when we exit the context
2247 # manager
2248
2249 tr.replace(self._indexfile, trindex * self.index.entry_size)
2250 nodemaputil.setup_persistent_nodemap(tr, self)
2251 self._chunkclear()
2252
2253 if existing_handles:
2254 # switched from inline to conventional reopen the index
2255 ifh = self.__index_write_fp()
2256 self._writinghandles = (ifh, new_dfh, None)
2257 new_dfh = None
2258 finally:
2259 if new_dfh is not None:
2260 new_dfh.close()
2261
2262 def _nodeduplicatecallback(self, transaction, node):
2263 """called when trying to add a node already stored."""
2264
2265 @contextlib.contextmanager
2266 def _writing(self, transaction):
2267 if self._trypending:
2268 msg = b'try to write in a `trypending` revlog: %s'
2269 msg %= self.display_id
2270 raise error.ProgrammingError(msg)
2271 if self._writinghandles is not None:
2272 yield
2273 else:
2274 ifh = dfh = sdfh = None
2275 try:
2276 r = len(self)
2277 # opening the data file.
2278 dsize = 0
2279 if r:
2280 dsize = self.end(r - 1)
2281 dfh = None
2282 if not self._inline:
2283 try:
2284 dfh = self._datafp(b"r+")
2285 if self._docket is None:
2286 dfh.seek(0, os.SEEK_END)
2287 else:
2288 dfh.seek(self._docket.data_end, os.SEEK_SET)
2289 except IOError as inst:
2290 if inst.errno != errno.ENOENT:
2291 raise
2292 dfh = self._datafp(b"w+")
2293 transaction.add(self._datafile, dsize)
2294 if self._sidedatafile is not None:
2295 try:
2296 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2297 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2298 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2300 raise
2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 transaction.add(
2303 self._sidedatafile, self._docket.sidedata_end
2304 )
2305
2306 # opening the index file.
2307 isize = r * self.index.entry_size
2308 ifh = self.__index_write_fp()
2309 if self._inline:
2310 transaction.add(self._indexfile, dsize + isize)
2311 else:
2312 transaction.add(self._indexfile, isize)
2313 # exposing all file handle for writing.
2314 self._writinghandles = (ifh, dfh, sdfh)
2315 yield
2316 if self._docket is not None:
2317 self._write_docket(transaction)
2318 finally:
2319 self._writinghandles = None
2320 if dfh is not None:
2321 dfh.close()
2322 if sdfh is not None:
2323 dfh.close()
2324 # closing the index file last to avoid exposing referent to
2325 # potential unflushed data content.
2326 if ifh is not None:
2327 ifh.close()
2328
2329 def _write_docket(self, transaction):
2330 """write the current docket on disk
2331
2332 Exist as a method to help changelog to implement transaction logic
2333
2334 We could also imagine using the same transaction logic for all revlog
2335 since docket are cheap."""
2336 self._docket.write(transaction)
2337
2338 def addrevision(
2339 self,
2340 text,
2341 transaction,
2342 link,
2343 p1,
2344 p2,
2345 cachedelta=None,
2346 node=None,
2347 flags=REVIDX_DEFAULT_FLAGS,
2348 deltacomputer=None,
2349 sidedata=None,
2350 ):
2351 """add a revision to the log
2352
2353 text - the revision data to add
2354 transaction - the transaction object used for rollback
2355 link - the linkrev data to add
2356 p1, p2 - the parent nodeids of the revision
2357 cachedelta - an optional precomputed delta
2358 node - nodeid of revision; typically node is not specified, and it is
2359 computed by default as hash(text, p1, p2), however subclasses might
2360 use different hashing method (and override checkhash() in such case)
2361 flags - the known flags to set on the revision
2362 deltacomputer - an optional deltacomputer instance shared between
2363 multiple calls
2364 """
2365 if link == nullrev:
2366 raise error.RevlogError(
2367 _(b"attempted to add linkrev -1 to %s") % self.display_id
2368 )
2369
2370 if sidedata is None:
2371 sidedata = {}
2372 elif sidedata and not self.hassidedata:
2373 raise error.ProgrammingError(
2374 _(b"trying to add sidedata to a revlog who don't support them")
2375 )
2376
2377 if flags:
2378 node = node or self.hash(text, p1, p2)
2379
2380 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2381
2382 # If the flag processor modifies the revision data, ignore any provided
2383 # cachedelta.
2384 if rawtext != text:
2385 cachedelta = None
2386
2387 if len(rawtext) > _maxentrysize:
2388 raise error.RevlogError(
2389 _(
2390 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2391 )
2392 % (self.display_id, len(rawtext))
2393 )
2394
2395 node = node or self.hash(rawtext, p1, p2)
2396 rev = self.index.get_rev(node)
2397 if rev is not None:
2398 return rev
2399
2400 if validatehash:
2401 self.checkhash(rawtext, node, p1=p1, p2=p2)
2402
2403 return self.addrawrevision(
2404 rawtext,
2405 transaction,
2406 link,
2407 p1,
2408 p2,
2409 node,
2410 flags,
2411 cachedelta=cachedelta,
2412 deltacomputer=deltacomputer,
2413 sidedata=sidedata,
2414 )
91 )
2415
92
2416 def addrawrevision(
93 tr.addbackup(rl._indexfile, location=b'store')
2417 self,
94 if not rl._inline:
2418 rawtext,
95 tr.addbackup(rl._datafile, location=b'store')
2419 transaction,
2420 link,
2421 p1,
2422 p2,
2423 node,
2424 flags,
2425 cachedelta=None,
2426 deltacomputer=None,
2427 sidedata=None,
2428 ):
2429 """add a raw revision with known flags, node and parents
2430 useful when reusing a revision not stored in this revlog (ex: received
2431 over wire, or read from an external bundle).
2432 """
2433 with self._writing(transaction):
2434 return self._addrevision(
2435 node,
2436 rawtext,
2437 transaction,
2438 link,
2439 p1,
2440 p2,
2441 flags,
2442 cachedelta,
2443 deltacomputer=deltacomputer,
2444 sidedata=sidedata,
2445 )
2446
2447 def compress(self, data):
2448 """Generate a possibly-compressed representation of data."""
2449 if not data:
2450 return b'', data
2451
2452 compressed = self._compressor.compress(data)
2453
2454 if compressed:
2455 # The revlog compressor added the header in the returned data.
2456 return b'', compressed
2457
2458 if data[0:1] == b'\0':
2459 return b'', data
2460 return b'u', data
2461
2462 def decompress(self, data):
2463 """Decompress a revlog chunk.
2464
2465 The chunk is expected to begin with a header identifying the
2466 format type so it can be routed to an appropriate decompressor.
2467 """
2468 if not data:
2469 return data
2470
2471 # Revlogs are read much more frequently than they are written and many
2472 # chunks only take microseconds to decompress, so performance is
2473 # important here.
2474 #
2475 # We can make a few assumptions about revlogs:
2476 #
2477 # 1) the majority of chunks will be compressed (as opposed to inline
2478 # raw data).
2479 # 2) decompressing *any* data will likely by at least 10x slower than
2480 # returning raw inline data.
2481 # 3) we want to prioritize common and officially supported compression
2482 # engines
2483 #
2484 # It follows that we want to optimize for "decompress compressed data
2485 # when encoded with common and officially supported compression engines"
2486 # case over "raw data" and "data encoded by less common or non-official
2487 # compression engines." That is why we have the inline lookup first
2488 # followed by the compengines lookup.
2489 #
2490 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2491 # compressed chunks. And this matters for changelog and manifest reads.
2492 t = data[0:1]
2493
2494 if t == b'x':
2495 try:
2496 return _zlibdecompress(data)
2497 except zlib.error as e:
2498 raise error.RevlogError(
2499 _(b'revlog decompress error: %s')
2500 % stringutil.forcebytestr(e)
2501 )
2502 # '\0' is more common than 'u' so it goes first.
2503 elif t == b'\0':
2504 return data
2505 elif t == b'u':
2506 return util.buffer(data, 1)
2507
2508 compressor = self._get_decompressor(t)
2509
2510 return compressor.decompress(data)
2511
2512 def _addrevision(
2513 self,
2514 node,
2515 rawtext,
2516 transaction,
2517 link,
2518 p1,
2519 p2,
2520 flags,
2521 cachedelta,
2522 alwayscache=False,
2523 deltacomputer=None,
2524 sidedata=None,
2525 ):
2526 """internal function to add revisions to the log
2527
2528 see addrevision for argument descriptions.
2529
2530 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2531
2532 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2533 be used.
2534
2535 invariants:
2536 - rawtext is optional (can be None); if not set, cachedelta must be set.
2537 if both are set, they must correspond to each other.
2538 """
2539 if node == self.nullid:
2540 raise error.RevlogError(
2541 _(b"%s: attempt to add null revision") % self.display_id
2542 )
2543 if (
2544 node == self.nodeconstants.wdirid
2545 or node in self.nodeconstants.wdirfilenodeids
2546 ):
2547 raise error.RevlogError(
2548 _(b"%s: attempt to add wdir revision") % self.display_id
2549 )
2550 if self._writinghandles is None:
2551 msg = b'adding revision outside `revlog._writing` context'
2552 raise error.ProgrammingError(msg)
2553
2554 if self._inline:
2555 fh = self._writinghandles[0]
2556 else:
2557 fh = self._writinghandles[1]
2558
2559 btext = [rawtext]
2560
2561 curr = len(self)
2562 prev = curr - 1
2563
2564 offset = self._get_data_offset(prev)
2565
2566 if self._concurrencychecker:
2567 ifh, dfh, sdfh = self._writinghandles
2568 # XXX no checking for the sidedata file
2569 if self._inline:
2570 # offset is "as if" it were in the .d file, so we need to add on
2571 # the size of the entry metadata.
2572 self._concurrencychecker(
2573 ifh, self._indexfile, offset + curr * self.index.entry_size
2574 )
2575 else:
2576 # Entries in the .i are a consistent size.
2577 self._concurrencychecker(
2578 ifh, self._indexfile, curr * self.index.entry_size
2579 )
2580 self._concurrencychecker(dfh, self._datafile, offset)
2581
2582 p1r, p2r = self.rev(p1), self.rev(p2)
2583
2584 # full versions are inserted when the needed deltas
2585 # become comparable to the uncompressed text
2586 if rawtext is None:
2587 # need rawtext size, before changed by flag processors, which is
2588 # the non-raw size. use revlog explicitly to avoid filelog's extra
2589 # logic that might remove metadata size.
2590 textlen = mdiff.patchedsize(
2591 revlog.size(self, cachedelta[0]), cachedelta[1]
2592 )
2593 else:
2594 textlen = len(rawtext)
2595
2596 if deltacomputer is None:
2597 deltacomputer = deltautil.deltacomputer(self)
2598
2599 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2600
2601 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2602
2603 compression_mode = COMP_MODE_INLINE
2604 if self._docket is not None:
2605 h, d = deltainfo.data
2606 if not h and not d:
2607 # not data to store at all... declare them uncompressed
2608 compression_mode = COMP_MODE_PLAIN
2609 elif not h:
2610 t = d[0:1]
2611 if t == b'\0':
2612 compression_mode = COMP_MODE_PLAIN
2613 elif t == self._docket.default_compression_header:
2614 compression_mode = COMP_MODE_DEFAULT
2615 elif h == b'u':
2616 # we have a more efficient way to declare uncompressed
2617 h = b''
2618 compression_mode = COMP_MODE_PLAIN
2619 deltainfo = deltautil.drop_u_compression(deltainfo)
2620
2621 sidedata_compression_mode = COMP_MODE_INLINE
2622 if sidedata and self.hassidedata:
2623 sidedata_compression_mode = COMP_MODE_PLAIN
2624 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2625 sidedata_offset = self._docket.sidedata_end
2626 h, comp_sidedata = self.compress(serialized_sidedata)
2627 if (
2628 h != b'u'
2629 and comp_sidedata[0:1] != b'\0'
2630 and len(comp_sidedata) < len(serialized_sidedata)
2631 ):
2632 assert not h
2633 if (
2634 comp_sidedata[0:1]
2635 == self._docket.default_compression_header
2636 ):
2637 sidedata_compression_mode = COMP_MODE_DEFAULT
2638 serialized_sidedata = comp_sidedata
2639 else:
2640 sidedata_compression_mode = COMP_MODE_INLINE
2641 serialized_sidedata = comp_sidedata
2642 else:
2643 serialized_sidedata = b""
2644 # Don't store the offset if the sidedata is empty, that way
2645 # we can easily detect empty sidedata and they will be no different
2646 # than ones we manually add.
2647 sidedata_offset = 0
2648
2649 e = (
2650 offset_type(offset, flags),
2651 deltainfo.deltalen,
2652 textlen,
2653 deltainfo.base,
2654 link,
2655 p1r,
2656 p2r,
2657 node,
2658 sidedata_offset,
2659 len(serialized_sidedata),
2660 compression_mode,
2661 sidedata_compression_mode,
2662 )
2663
2664 self.index.append(e)
2665 entry = self.index.entry_binary(curr)
2666 if curr == 0 and self._docket is None:
2667 header = self._format_flags | self._format_version
2668 header = self.index.pack_header(header)
2669 entry = header + entry
2670 self._writeentry(
2671 transaction,
2672 entry,
2673 deltainfo.data,
2674 link,
2675 offset,
2676 serialized_sidedata,
2677 sidedata_offset,
2678 )
2679
2680 rawtext = btext[0]
2681
2682 if alwayscache and rawtext is None:
2683 rawtext = deltacomputer.buildtext(revinfo, fh)
2684
2685 if type(rawtext) == bytes: # only accept immutable objects
2686 self._revisioncache = (node, curr, rawtext)
2687 self._chainbasecache[curr] = deltainfo.chainbase
2688 return curr
2689
2690 def _get_data_offset(self, prev):
2691 """Returns the current offset in the (in-transaction) data file.
2692 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2693 file to store that information: since sidedata can be rewritten to the
2694 end of the data file within a transaction, you can have cases where, for
2695 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2696 to `n - 1`'s sidedata being written after `n`'s data.
2697
2698 TODO cache this in a docket file before getting out of experimental."""
2699 if self._docket is None:
2700 return self.end(prev)
2701 else:
2702 return self._docket.data_end
2703
2704 def _writeentry(
2705 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2706 ):
2707 # Files opened in a+ mode have inconsistent behavior on various
2708 # platforms. Windows requires that a file positioning call be made
2709 # when the file handle transitions between reads and writes. See
2710 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2711 # platforms, Python or the platform itself can be buggy. Some versions
2712 # of Solaris have been observed to not append at the end of the file
2713 # if the file was seeked to before the end. See issue4943 for more.
2714 #
2715 # We work around this issue by inserting a seek() before writing.
2716 # Note: This is likely not necessary on Python 3. However, because
2717 # the file handle is reused for reads and may be seeked there, we need
2718 # to be careful before changing this.
2719 if self._writinghandles is None:
2720 msg = b'adding revision outside `revlog._writing` context'
2721 raise error.ProgrammingError(msg)
2722 ifh, dfh, sdfh = self._writinghandles
2723 if self._docket is None:
2724 ifh.seek(0, os.SEEK_END)
2725 else:
2726 ifh.seek(self._docket.index_end, os.SEEK_SET)
2727 if dfh:
2728 if self._docket is None:
2729 dfh.seek(0, os.SEEK_END)
2730 else:
2731 dfh.seek(self._docket.data_end, os.SEEK_SET)
2732 if sdfh:
2733 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2734
2735 curr = len(self) - 1
2736 if not self._inline:
2737 transaction.add(self._datafile, offset)
2738 if self._sidedatafile:
2739 transaction.add(self._sidedatafile, sidedata_offset)
2740 transaction.add(self._indexfile, curr * len(entry))
2741 if data[0]:
2742 dfh.write(data[0])
2743 dfh.write(data[1])
2744 if sidedata:
2745 sdfh.write(sidedata)
2746 ifh.write(entry)
2747 else:
2748 offset += curr * self.index.entry_size
2749 transaction.add(self._indexfile, offset)
2750 ifh.write(entry)
2751 ifh.write(data[0])
2752 ifh.write(data[1])
2753 assert not sidedata
2754 self._enforceinlinesize(transaction)
2755 if self._docket is not None:
2756 self._docket.index_end = self._writinghandles[0].tell()
2757 self._docket.data_end = self._writinghandles[1].tell()
2758 self._docket.sidedata_end = self._writinghandles[2].tell()
2759
2760 nodemaputil.setup_persistent_nodemap(transaction, self)
2761
2762 def addgroup(
2763 self,
2764 deltas,
2765 linkmapper,
2766 transaction,
2767 alwayscache=False,
2768 addrevisioncb=None,
2769 duplicaterevisioncb=None,
2770 ):
2771 """
2772 add a delta group
2773
2774 given a set of deltas, add them to the revision log. the
2775 first delta is against its parent, which should be in our
2776 log, the rest are against the previous delta.
2777
2778 If ``addrevisioncb`` is defined, it will be called with arguments of
2779 this revlog and the node that was added.
2780 """
2781
2782 if self._adding_group:
2783 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2784
2785 self._adding_group = True
2786 empty = True
2787 try:
2788 with self._writing(transaction):
2789 deltacomputer = deltautil.deltacomputer(self)
2790 # loop through our set of deltas
2791 for data in deltas:
2792 (
2793 node,
2794 p1,
2795 p2,
2796 linknode,
2797 deltabase,
2798 delta,
2799 flags,
2800 sidedata,
2801 ) = data
2802 link = linkmapper(linknode)
2803 flags = flags or REVIDX_DEFAULT_FLAGS
2804
2805 rev = self.index.get_rev(node)
2806 if rev is not None:
2807 # this can happen if two branches make the same change
2808 self._nodeduplicatecallback(transaction, rev)
2809 if duplicaterevisioncb:
2810 duplicaterevisioncb(self, rev)
2811 empty = False
2812 continue
2813
2814 for p in (p1, p2):
2815 if not self.index.has_node(p):
2816 raise error.LookupError(
2817 p, self.radix, _(b'unknown parent')
2818 )
2819
2820 if not self.index.has_node(deltabase):
2821 raise error.LookupError(
2822 deltabase, self.display_id, _(b'unknown delta base')
2823 )
2824
2825 baserev = self.rev(deltabase)
2826
2827 if baserev != nullrev and self.iscensored(baserev):
2828 # if base is censored, delta must be full replacement in a
2829 # single patch operation
2830 hlen = struct.calcsize(b">lll")
2831 oldlen = self.rawsize(baserev)
2832 newlen = len(delta) - hlen
2833 if delta[:hlen] != mdiff.replacediffheader(
2834 oldlen, newlen
2835 ):
2836 raise error.CensoredBaseError(
2837 self.display_id, self.node(baserev)
2838 )
2839
2840 if not flags and self._peek_iscensored(baserev, delta):
2841 flags |= REVIDX_ISCENSORED
2842
2843 # We assume consumers of addrevisioncb will want to retrieve
2844 # the added revision, which will require a call to
2845 # revision(). revision() will fast path if there is a cache
2846 # hit. So, we tell _addrevision() to always cache in this case.
2847 # We're only using addgroup() in the context of changegroup
2848 # generation so the revision data can always be handled as raw
2849 # by the flagprocessor.
2850 rev = self._addrevision(
2851 node,
2852 None,
2853 transaction,
2854 link,
2855 p1,
2856 p2,
2857 flags,
2858 (baserev, delta),
2859 alwayscache=alwayscache,
2860 deltacomputer=deltacomputer,
2861 sidedata=sidedata,
2862 )
2863
2864 if addrevisioncb:
2865 addrevisioncb(self, rev)
2866 empty = False
2867 finally:
2868 self._adding_group = False
2869 return not empty
2870
2871 def iscensored(self, rev):
2872 """Check if a file revision is censored."""
2873 if not self._censorable:
2874 return False
2875
2876 return self.flags(rev) & REVIDX_ISCENSORED
2877
2878 def _peek_iscensored(self, baserev, delta):
2879 """Quickly check if a delta produces a censored revision."""
2880 if not self._censorable:
2881 return False
2882
2883 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2884
2885 def getstrippoint(self, minlink):
2886 """find the minimum rev that must be stripped to strip the linkrev
2887
2888 Returns a tuple containing the minimum rev and a set of all revs that
2889 have linkrevs that will be broken by this strip.
2890 """
2891 return storageutil.resolvestripinfo(
2892 minlink,
2893 len(self) - 1,
2894 self.headrevs(),
2895 self.linkrev,
2896 self.parentrevs,
2897 )
2898
2899 def strip(self, minlink, transaction):
2900 """truncate the revlog on the first revision with a linkrev >= minlink
2901
2902 This function is called when we're stripping revision minlink and
2903 its descendants from the repository.
2904
2905 We have to remove all revisions with linkrev >= minlink, because
2906 the equivalent changelog revisions will be renumbered after the
2907 strip.
2908
2909 So we truncate the revlog on the first of these revisions, and
2910 trust that the caller has saved the revisions that shouldn't be
2911 removed and that it'll re-add them after this truncation.
2912 """
2913 if len(self) == 0:
2914 return
2915
2916 rev, _ = self.getstrippoint(minlink)
2917 if rev == len(self):
2918 return
2919
2920 # first truncate the files on disk
2921 data_end = self.start(rev)
2922 if not self._inline:
2923 transaction.add(self._datafile, data_end)
2924 end = rev * self.index.entry_size
2925 else:
2926 end = data_end + (rev * self.index.entry_size)
2927
2928 if self._sidedatafile:
2929 sidedata_end = self.sidedata_cut_off(rev)
2930 transaction.add(self._sidedatafile, sidedata_end)
2931
2932 transaction.add(self._indexfile, end)
2933 if self._docket is not None:
2934 # XXX we could, leverage the docket while stripping. However it is
2935 # not powerfull enough at the time of this comment
2936 self._docket.index_end = end
2937 self._docket.data_end = data_end
2938 self._docket.sidedata_end = sidedata_end
2939 self._docket.write(transaction, stripping=True)
2940
2941 # then reset internal state in memory to forget those revisions
2942 self._revisioncache = None
2943 self._chaininfocache = util.lrucachedict(500)
2944 self._chunkclear()
2945
2946 del self.index[rev:-1]
2947
2948 def checksize(self):
2949 """Check size of index and data files
2950
2951 return a (dd, di) tuple.
2952 - dd: extra bytes for the "data" file
2953 - di: extra bytes for the "index" file
2954
2955 A healthy revlog will return (0, 0).
2956 """
2957 expected = 0
2958 if len(self):
2959 expected = max(0, self.end(len(self) - 1))
2960
2961 try:
2962 with self._datafp() as f:
2963 f.seek(0, io.SEEK_END)
2964 actual = f.tell()
2965 dd = actual - expected
2966 except IOError as inst:
2967 if inst.errno != errno.ENOENT:
2968 raise
2969 dd = 0
2970
96
2971 try:
97 rl.opener.rename(newrl._indexfile, rl._indexfile)
2972 f = self.opener(self._indexfile)
98 if not rl._inline:
2973 f.seek(0, io.SEEK_END)
99 rl.opener.rename(newrl._datafile, rl._datafile)
2974 actual = f.tell()
2975 f.close()
2976 s = self.index.entry_size
2977 i = max(0, actual // s)
2978 di = actual - (i * s)
2979 if self._inline:
2980 databytes = 0
2981 for r in self:
2982 databytes += max(0, self.length(r))
2983 dd = 0
2984 di = actual - len(self) * s - databytes
2985 except IOError as inst:
2986 if inst.errno != errno.ENOENT:
2987 raise
2988 di = 0
2989
2990 return (dd, di)
2991
2992 def files(self):
2993 res = [self._indexfile]
2994 if not self._inline:
2995 res.append(self._datafile)
2996 return res
2997
2998 def emitrevisions(
2999 self,
3000 nodes,
3001 nodesorder=None,
3002 revisiondata=False,
3003 assumehaveparentrevisions=False,
3004 deltamode=repository.CG_DELTAMODE_STD,
3005 sidedata_helpers=None,
3006 ):
3007 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3008 raise error.ProgrammingError(
3009 b'unhandled value for nodesorder: %s' % nodesorder
3010 )
3011
3012 if nodesorder is None and not self._generaldelta:
3013 nodesorder = b'storage'
3014
3015 if (
3016 not self._storedeltachains
3017 and deltamode != repository.CG_DELTAMODE_PREV
3018 ):
3019 deltamode = repository.CG_DELTAMODE_FULL
3020
3021 return storageutil.emitrevisions(
3022 self,
3023 nodes,
3024 nodesorder,
3025 revlogrevisiondelta,
3026 deltaparentfn=self.deltaparent,
3027 candeltafn=self.candelta,
3028 rawsizefn=self.rawsize,
3029 revdifffn=self.revdiff,
3030 flagsfn=self.flags,
3031 deltamode=deltamode,
3032 revisiondata=revisiondata,
3033 assumehaveparentrevisions=assumehaveparentrevisions,
3034 sidedata_helpers=sidedata_helpers,
3035 )
3036
3037 DELTAREUSEALWAYS = b'always'
3038 DELTAREUSESAMEREVS = b'samerevs'
3039 DELTAREUSENEVER = b'never'
3040
3041 DELTAREUSEFULLADD = b'fulladd'
3042
3043 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3044
3045 def clone(
3046 self,
3047 tr,
3048 destrevlog,
3049 addrevisioncb=None,
3050 deltareuse=DELTAREUSESAMEREVS,
3051 forcedeltabothparents=None,
3052 sidedata_helpers=None,
3053 ):
3054 """Copy this revlog to another, possibly with format changes.
3055
3056 The destination revlog will contain the same revisions and nodes.
3057 However, it may not be bit-for-bit identical due to e.g. delta encoding
3058 differences.
3059
3060 The ``deltareuse`` argument control how deltas from the existing revlog
3061 are preserved in the destination revlog. The argument can have the
3062 following values:
3063
3064 DELTAREUSEALWAYS
3065 Deltas will always be reused (if possible), even if the destination
3066 revlog would not select the same revisions for the delta. This is the
3067 fastest mode of operation.
3068 DELTAREUSESAMEREVS
3069 Deltas will be reused if the destination revlog would pick the same
3070 revisions for the delta. This mode strikes a balance between speed
3071 and optimization.
3072 DELTAREUSENEVER
3073 Deltas will never be reused. This is the slowest mode of execution.
3074 This mode can be used to recompute deltas (e.g. if the diff/delta
3075 algorithm changes).
3076 DELTAREUSEFULLADD
3077 Revision will be re-added as if their were new content. This is
3078 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3079 eg: large file detection and handling.
3080
3081 Delta computation can be slow, so the choice of delta reuse policy can
3082 significantly affect run time.
3083
3084 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3085 two extremes. Deltas will be reused if they are appropriate. But if the
3086 delta could choose a better revision, it will do so. This means if you
3087 are converting a non-generaldelta revlog to a generaldelta revlog,
3088 deltas will be recomputed if the delta's parent isn't a parent of the
3089 revision.
3090
3091 In addition to the delta policy, the ``forcedeltabothparents``
3092 argument controls whether to force compute deltas against both parents
3093 for merges. By default, the current default is used.
3094
3095 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3096 `sidedata_helpers`.
3097 """
3098 if deltareuse not in self.DELTAREUSEALL:
3099 raise ValueError(
3100 _(b'value for deltareuse invalid: %s') % deltareuse
3101 )
3102
3103 if len(destrevlog):
3104 raise ValueError(_(b'destination revlog is not empty'))
3105
3106 if getattr(self, 'filteredrevs', None):
3107 raise ValueError(_(b'source revlog has filtered revisions'))
3108 if getattr(destrevlog, 'filteredrevs', None):
3109 raise ValueError(_(b'destination revlog has filtered revisions'))
3110
3111 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3112 # if possible.
3113 oldlazydelta = destrevlog._lazydelta
3114 oldlazydeltabase = destrevlog._lazydeltabase
3115 oldamd = destrevlog._deltabothparents
3116
3117 try:
3118 if deltareuse == self.DELTAREUSEALWAYS:
3119 destrevlog._lazydeltabase = True
3120 destrevlog._lazydelta = True
3121 elif deltareuse == self.DELTAREUSESAMEREVS:
3122 destrevlog._lazydeltabase = False
3123 destrevlog._lazydelta = True
3124 elif deltareuse == self.DELTAREUSENEVER:
3125 destrevlog._lazydeltabase = False
3126 destrevlog._lazydelta = False
3127
3128 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3129
3130 self._clone(
3131 tr,
3132 destrevlog,
3133 addrevisioncb,
3134 deltareuse,
3135 forcedeltabothparents,
3136 sidedata_helpers,
3137 )
3138
3139 finally:
3140 destrevlog._lazydelta = oldlazydelta
3141 destrevlog._lazydeltabase = oldlazydeltabase
3142 destrevlog._deltabothparents = oldamd
3143
3144 def _clone(
3145 self,
3146 tr,
3147 destrevlog,
3148 addrevisioncb,
3149 deltareuse,
3150 forcedeltabothparents,
3151 sidedata_helpers,
3152 ):
3153 """perform the core duty of `revlog.clone` after parameter processing"""
3154 deltacomputer = deltautil.deltacomputer(destrevlog)
3155 index = self.index
3156 for rev in self:
3157 entry = index[rev]
3158
3159 # Some classes override linkrev to take filtered revs into
3160 # account. Use raw entry from index.
3161 flags = entry[0] & 0xFFFF
3162 linkrev = entry[4]
3163 p1 = index[entry[5]][7]
3164 p2 = index[entry[6]][7]
3165 node = entry[7]
3166
3167 # (Possibly) reuse the delta from the revlog if allowed and
3168 # the revlog chunk is a delta.
3169 cachedelta = None
3170 rawtext = None
3171 if deltareuse == self.DELTAREUSEFULLADD:
3172 text = self._revisiondata(rev)
3173 sidedata = self.sidedata(rev)
3174
3175 if sidedata_helpers is not None:
3176 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3177 self, sidedata_helpers, sidedata, rev
3178 )
3179 flags = flags | new_flags[0] & ~new_flags[1]
3180
3181 destrevlog.addrevision(
3182 text,
3183 tr,
3184 linkrev,
3185 p1,
3186 p2,
3187 cachedelta=cachedelta,
3188 node=node,
3189 flags=flags,
3190 deltacomputer=deltacomputer,
3191 sidedata=sidedata,
3192 )
3193 else:
3194 if destrevlog._lazydelta:
3195 dp = self.deltaparent(rev)
3196 if dp != nullrev:
3197 cachedelta = (dp, bytes(self._chunk(rev)))
3198
3199 sidedata = None
3200 if not cachedelta:
3201 rawtext = self._revisiondata(rev)
3202 sidedata = self.sidedata(rev)
3203 if sidedata is None:
3204 sidedata = self.sidedata(rev)
3205
3206 if sidedata_helpers is not None:
3207 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3208 self, sidedata_helpers, sidedata, rev
3209 )
3210 flags = flags | new_flags[0] & ~new_flags[1]
3211
3212 with destrevlog._writing(tr):
3213 destrevlog._addrevision(
3214 node,
3215 rawtext,
3216 tr,
3217 linkrev,
3218 p1,
3219 p2,
3220 flags,
3221 cachedelta,
3222 deltacomputer=deltacomputer,
3223 sidedata=sidedata,
3224 )
3225
3226 if addrevisioncb:
3227 addrevisioncb(self, rev, node)
3228
3229 def censorrevision(self, tr, censornode, tombstone=b''):
3230 if self._format_version == REVLOGV0:
3231 raise error.RevlogError(
3232 _(b'cannot censor with version %d revlogs')
3233 % self._format_version
3234 )
3235
3236 censorrev = self.rev(censornode)
3237 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3238
3239 if len(tombstone) > self.rawsize(censorrev):
3240 raise error.Abort(
3241 _(b'censor tombstone must be no longer than censored data')
3242 )
3243
100
3244 # Rewriting the revlog in place is hard. Our strategy for censoring is
101 rl.clearcaches()
3245 # to create a new revlog, copy all revisions to it, then replace the
102 rl._loadindex()
3246 # revlogs on transaction close.
3247 #
3248 # This is a bit dangerous. We could easily have a mismatch of state.
3249 newrl = revlog(
3250 self.opener,
3251 target=self.target,
3252 radix=self.radix,
3253 postfix=b'tmpcensored',
3254 censorable=True,
3255 )
3256 newrl._format_version = self._format_version
3257 newrl._format_flags = self._format_flags
3258 newrl._generaldelta = self._generaldelta
3259 newrl._parse_index = self._parse_index
3260
3261 for rev in self.revs():
3262 node = self.node(rev)
3263 p1, p2 = self.parents(node)
3264
3265 if rev == censorrev:
3266 newrl.addrawrevision(
3267 tombstone,
3268 tr,
3269 self.linkrev(censorrev),
3270 p1,
3271 p2,
3272 censornode,
3273 REVIDX_ISCENSORED,
3274 )
3275
3276 if newrl.deltaparent(rev) != nullrev:
3277 raise error.Abort(
3278 _(
3279 b'censored revision stored as delta; '
3280 b'cannot censor'
3281 ),
3282 hint=_(
3283 b'censoring of revlogs is not '
3284 b'fully implemented; please report '
3285 b'this bug'
3286 ),
3287 )
3288 continue
3289
3290 if self.iscensored(rev):
3291 if self.deltaparent(rev) != nullrev:
3292 raise error.Abort(
3293 _(
3294 b'cannot censor due to censored '
3295 b'revision having delta stored'
3296 )
3297 )
3298 rawtext = self._chunk(rev)
3299 else:
3300 rawtext = self.rawdata(rev)
3301
3302 newrl.addrawrevision(
3303 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3304 )
3305
3306 tr.addbackup(self._indexfile, location=b'store')
3307 if not self._inline:
3308 tr.addbackup(self._datafile, location=b'store')
3309
3310 self.opener.rename(newrl._indexfile, self._indexfile)
3311 if not self._inline:
3312 self.opener.rename(newrl._datafile, self._datafile)
3313
3314 self.clearcaches()
3315 self._loadindex()
3316
3317 def verifyintegrity(self, state):
3318 """Verifies the integrity of the revlog.
3319
3320 Yields ``revlogproblem`` instances describing problems that are
3321 found.
3322 """
3323 dd, di = self.checksize()
3324 if dd:
3325 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3326 if di:
3327 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3328
3329 version = self._format_version
3330
3331 # The verifier tells us what version revlog we should be.
3332 if version != state[b'expectedversion']:
3333 yield revlogproblem(
3334 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3335 % (self.display_id, version, state[b'expectedversion'])
3336 )
3337
3338 state[b'skipread'] = set()
3339 state[b'safe_renamed'] = set()
3340
3341 for rev in self:
3342 node = self.node(rev)
3343
3344 # Verify contents. 4 cases to care about:
3345 #
3346 # common: the most common case
3347 # rename: with a rename
3348 # meta: file content starts with b'\1\n', the metadata
3349 # header defined in filelog.py, but without a rename
3350 # ext: content stored externally
3351 #
3352 # More formally, their differences are shown below:
3353 #
3354 # | common | rename | meta | ext
3355 # -------------------------------------------------------
3356 # flags() | 0 | 0 | 0 | not 0
3357 # renamed() | False | True | False | ?
3358 # rawtext[0:2]=='\1\n'| False | True | True | ?
3359 #
3360 # "rawtext" means the raw text stored in revlog data, which
3361 # could be retrieved by "rawdata(rev)". "text"
3362 # mentioned below is "revision(rev)".
3363 #
3364 # There are 3 different lengths stored physically:
3365 # 1. L1: rawsize, stored in revlog index
3366 # 2. L2: len(rawtext), stored in revlog data
3367 # 3. L3: len(text), stored in revlog data if flags==0, or
3368 # possibly somewhere else if flags!=0
3369 #
3370 # L1 should be equal to L2. L3 could be different from them.
3371 # "text" may or may not affect commit hash depending on flag
3372 # processors (see flagutil.addflagprocessor).
3373 #
3374 # | common | rename | meta | ext
3375 # -------------------------------------------------
3376 # rawsize() | L1 | L1 | L1 | L1
3377 # size() | L1 | L2-LM | L1(*) | L1 (?)
3378 # len(rawtext) | L2 | L2 | L2 | L2
3379 # len(text) | L2 | L2 | L2 | L3
3380 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3381 #
3382 # LM: length of metadata, depending on rawtext
3383 # (*): not ideal, see comment in filelog.size
3384 # (?): could be "- len(meta)" if the resolved content has
3385 # rename metadata
3386 #
3387 # Checks needed to be done:
3388 # 1. length check: L1 == L2, in all cases.
3389 # 2. hash check: depending on flag processor, we may need to
3390 # use either "text" (external), or "rawtext" (in revlog).
3391
3392 try:
3393 skipflags = state.get(b'skipflags', 0)
3394 if skipflags:
3395 skipflags &= self.flags(rev)
3396
3397 _verify_revision(self, skipflags, state, node)
3398
3399 l1 = self.rawsize(rev)
3400 l2 = len(self.rawdata(node))
3401
3402 if l1 != l2:
3403 yield revlogproblem(
3404 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3405 node=node,
3406 )
3407
3408 except error.CensoredNodeError:
3409 if state[b'erroroncensored']:
3410 yield revlogproblem(
3411 error=_(b'censored file data'), node=node
3412 )
3413 state[b'skipread'].add(node)
3414 except Exception as e:
3415 yield revlogproblem(
3416 error=_(b'unpacking %s: %s')
3417 % (short(node), stringutil.forcebytestr(e)),
3418 node=node,
3419 )
3420 state[b'skipread'].add(node)
3421
3422 def storageinfo(
3423 self,
3424 exclusivefiles=False,
3425 sharedfiles=False,
3426 revisionscount=False,
3427 trackedsize=False,
3428 storedsize=False,
3429 ):
3430 d = {}
3431
3432 if exclusivefiles:
3433 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3434 if not self._inline:
3435 d[b'exclusivefiles'].append((self.opener, self._datafile))
3436
3437 if sharedfiles:
3438 d[b'sharedfiles'] = []
3439
3440 if revisionscount:
3441 d[b'revisionscount'] = len(self)
3442
3443 if trackedsize:
3444 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3445
3446 if storedsize:
3447 d[b'storedsize'] = sum(
3448 self.opener.stat(path).st_size for path in self.files()
3449 )
3450
3451 return d
3452
3453 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3454 if not self.hassidedata:
3455 return
3456 # revlog formats with sidedata support does not support inline
3457 assert not self._inline
3458 if not helpers[1] and not helpers[2]:
3459 # Nothing to generate or remove
3460 return
3461
3462 new_entries = []
3463 # append the new sidedata
3464 with self._writing(transaction):
3465 ifh, dfh, sdfh = self._writinghandles
3466 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3467
3468 current_offset = sdfh.tell()
3469 for rev in range(startrev, endrev + 1):
3470 entry = self.index[rev]
3471 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3472 store=self,
3473 sidedata_helpers=helpers,
3474 sidedata={},
3475 rev=rev,
3476 )
3477
3478 serialized_sidedata = sidedatautil.serialize_sidedata(
3479 new_sidedata
3480 )
3481
3482 sidedata_compression_mode = COMP_MODE_INLINE
3483 if serialized_sidedata and self.hassidedata:
3484 sidedata_compression_mode = COMP_MODE_PLAIN
3485 h, comp_sidedata = self.compress(serialized_sidedata)
3486 if (
3487 h != b'u'
3488 and comp_sidedata[0] != b'\0'
3489 and len(comp_sidedata) < len(serialized_sidedata)
3490 ):
3491 assert not h
3492 if (
3493 comp_sidedata[0]
3494 == self._docket.default_compression_header
3495 ):
3496 sidedata_compression_mode = COMP_MODE_DEFAULT
3497 serialized_sidedata = comp_sidedata
3498 else:
3499 sidedata_compression_mode = COMP_MODE_INLINE
3500 serialized_sidedata = comp_sidedata
3501 if entry[8] != 0 or entry[9] != 0:
3502 # rewriting entries that already have sidedata is not
3503 # supported yet, because it introduces garbage data in the
3504 # revlog.
3505 msg = b"rewriting existing sidedata is not supported yet"
3506 raise error.Abort(msg)
3507
3508 # Apply (potential) flags to add and to remove after running
3509 # the sidedata helpers
3510 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3511 entry_update = (
3512 current_offset,
3513 len(serialized_sidedata),
3514 new_offset_flags,
3515 sidedata_compression_mode,
3516 )
3517
3518 # the sidedata computation might have move the file cursors around
3519 sdfh.seek(current_offset, os.SEEK_SET)
3520 sdfh.write(serialized_sidedata)
3521 new_entries.append(entry_update)
3522 current_offset += len(serialized_sidedata)
3523 self._docket.sidedata_end = sdfh.tell()
3524
3525 # rewrite the new index entries
3526 ifh.seek(startrev * self.index.entry_size)
3527 for i, e in enumerate(new_entries):
3528 rev = startrev + i
3529 self.index.replace_sidedata_info(rev, *e)
3530 packed = self.index.entry_binary(rev)
3531 if rev == 0 and self._docket is None:
3532 header = self._format_flags | self._format_version
3533 header = self.index.pack_header(header)
3534 packed = header + packed
3535 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now