##// END OF EJS Templates
revlog: move censoring code in a dedicated module...
marmoute -
r48183:33d62691 default
parent child Browse files
Show More
@@ -80,6 +80,7 b' from .interfaces import ('
80 util as interfaceutil,
80 util as interfaceutil,
81 )
81 )
82 from .revlogutils import (
82 from .revlogutils import (
83 censor,
83 deltas as deltautil,
84 deltas as deltautil,
84 docket as docketutil,
85 docket as docketutil,
85 flagutil,
86 flagutil,
@@ -3232,88 +3233,15 b' class revlog(object):'
3232 _(b'cannot censor with version %d revlogs')
3233 _(b'cannot censor with version %d revlogs')
3233 % self._format_version
3234 % self._format_version
3234 )
3235 )
3235
3236 elif self._format_version == REVLOGV1:
3236 censorrev = self.rev(censornode)
3237 censor.v1_censor(self, tr, censornode, tombstone)
3237 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3238 else:
3238
3239 # revlog v2
3239 if len(tombstone) > self.rawsize(censorrev):
3240 raise error.RevlogError(
3240 raise error.Abort(
3241 _(b'cannot censor with version %d revlogs')
3241 _(b'censor tombstone must be no longer than censored data')
3242 % self._format_version
3242 )
3243 )
3243
3244
3244 # Rewriting the revlog in place is hard. Our strategy for censoring is
3245 # to create a new revlog, copy all revisions to it, then replace the
3246 # revlogs on transaction close.
3247 #
3248 # This is a bit dangerous. We could easily have a mismatch of state.
3249 newrl = revlog(
3250 self.opener,
3251 target=self.target,
3252 radix=self.radix,
3253 postfix=b'tmpcensored',
3254 censorable=True,
3255 )
3256 newrl._format_version = self._format_version
3257 newrl._format_flags = self._format_flags
3258 newrl._generaldelta = self._generaldelta
3259 newrl._parse_index = self._parse_index
3260
3261 for rev in self.revs():
3262 node = self.node(rev)
3263 p1, p2 = self.parents(node)
3264
3265 if rev == censorrev:
3266 newrl.addrawrevision(
3267 tombstone,
3268 tr,
3269 self.linkrev(censorrev),
3270 p1,
3271 p2,
3272 censornode,
3273 REVIDX_ISCENSORED,
3274 )
3275
3276 if newrl.deltaparent(rev) != nullrev:
3277 raise error.Abort(
3278 _(
3279 b'censored revision stored as delta; '
3280 b'cannot censor'
3281 ),
3282 hint=_(
3283 b'censoring of revlogs is not '
3284 b'fully implemented; please report '
3285 b'this bug'
3286 ),
3287 )
3288 continue
3289
3290 if self.iscensored(rev):
3291 if self.deltaparent(rev) != nullrev:
3292 raise error.Abort(
3293 _(
3294 b'cannot censor due to censored '
3295 b'revision having delta stored'
3296 )
3297 )
3298 rawtext = self._chunk(rev)
3299 else:
3300 rawtext = self.rawdata(rev)
3301
3302 newrl.addrawrevision(
3303 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3304 )
3305
3306 tr.addbackup(self._indexfile, location=b'store')
3307 if not self._inline:
3308 tr.addbackup(self._datafile, location=b'store')
3309
3310 self.opener.rename(newrl._indexfile, self._indexfile)
3311 if not self._inline:
3312 self.opener.rename(newrl._datafile, self._datafile)
3313
3314 self.clearcaches()
3315 self._loadindex()
3316
3317 def verifyintegrity(self, state):
3245 def verifyintegrity(self, state):
3318 """Verifies the integrity of the revlog.
3246 """Verifies the integrity of the revlog.
3319
3247
This diff has been collapsed as it changes many lines, (3583 lines changed) Show them Hide them
@@ -1,3535 +1,102 b''
1 # revlog.py - storage back-end for mercurial
1 # censor code related to censoring revision
2 # coding: utf8
3 #
2 #
4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2015 Google, Inc <martinvonz@google.com>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 """Storage back-end for Mercurial.
9 from ..node import (
10
11 This provides efficient delta storage with O(1) retrieve and append
12 and O(changes) merge between branches.
13 """
14
15 from __future__ import absolute_import
16
17 import binascii
18 import collections
19 import contextlib
20 import errno
21 import io
22 import os
23 import struct
24 import zlib
25
26 # import stuff from node for others to import from revlog
27 from .node import (
28 bin,
29 hex,
30 nullrev,
10 nullrev,
31 sha1nodeconstants,
32 short,
33 wdirrev,
34 )
35 from .i18n import _
36 from .pycompat import getattr
37 from .revlogutils.constants import (
38 ALL_KINDS,
39 CHANGELOGV2,
40 COMP_MODE_DEFAULT,
41 COMP_MODE_INLINE,
42 COMP_MODE_PLAIN,
43 FEATURES_BY_VERSION,
44 FLAG_GENERALDELTA,
45 FLAG_INLINE_DATA,
46 INDEX_HEADER,
47 KIND_CHANGELOG,
48 REVLOGV0,
49 REVLOGV1,
50 REVLOGV1_FLAGS,
51 REVLOGV2,
52 REVLOGV2_FLAGS,
53 REVLOG_DEFAULT_FLAGS,
54 REVLOG_DEFAULT_FORMAT,
55 REVLOG_DEFAULT_VERSION,
56 SUPPORTED_FLAGS,
57 )
58 from .revlogutils.flagutil import (
59 REVIDX_DEFAULT_FLAGS,
60 REVIDX_ELLIPSIS,
61 REVIDX_EXTSTORED,
62 REVIDX_FLAGS_ORDER,
63 REVIDX_HASCOPIESINFO,
64 REVIDX_ISCENSORED,
65 REVIDX_RAWTEXT_CHANGING_FLAGS,
66 )
67 from .thirdparty import attr
68 from . import (
69 ancestor,
70 dagop,
71 error,
72 mdiff,
73 policy,
74 pycompat,
75 templatefilters,
76 util,
77 )
78 from .interfaces import (
79 repository,
80 util as interfaceutil,
81 )
82 from .revlogutils import (
83 deltas as deltautil,
84 docket as docketutil,
85 flagutil,
86 nodemap as nodemaputil,
87 revlogv0,
88 sidedata as sidedatautil,
89 )
90 from .utils import (
91 storageutil,
92 stringutil,
93 )
94
95 # blanked usage of all the name to prevent pyflakes constraints
96 # We need these name available in the module for extensions.
97
98 REVLOGV0
99 REVLOGV1
100 REVLOGV2
101 FLAG_INLINE_DATA
102 FLAG_GENERALDELTA
103 REVLOG_DEFAULT_FLAGS
104 REVLOG_DEFAULT_FORMAT
105 REVLOG_DEFAULT_VERSION
106 REVLOGV1_FLAGS
107 REVLOGV2_FLAGS
108 REVIDX_ISCENSORED
109 REVIDX_ELLIPSIS
110 REVIDX_HASCOPIESINFO
111 REVIDX_EXTSTORED
112 REVIDX_DEFAULT_FLAGS
113 REVIDX_FLAGS_ORDER
114 REVIDX_RAWTEXT_CHANGING_FLAGS
115
116 parsers = policy.importmod('parsers')
117 rustancestor = policy.importrust('ancestor')
118 rustdagop = policy.importrust('dagop')
119 rustrevlog = policy.importrust('revlog')
120
121 # Aliased for performance.
122 _zlibdecompress = zlib.decompress
123
124 # max size of revlog with inline data
125 _maxinline = 131072
126 _chunksize = 1048576
127
128 # Flag processors for REVIDX_ELLIPSIS.
129 def ellipsisreadprocessor(rl, text):
130 return text, False
131
132
133 def ellipsiswriteprocessor(rl, text):
134 return text, False
135
136
137 def ellipsisrawprocessor(rl, text):
138 return False
139
140
141 ellipsisprocessor = (
142 ellipsisreadprocessor,
143 ellipsiswriteprocessor,
144 ellipsisrawprocessor,
145 )
11 )
146
12 from ..i18n import _
147
13 from .. import (
148 def offset_type(offset, type):
14 error,
149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 raise ValueError(b'unknown revlog index flags')
151 return int(int(offset) << 16 | type)
152
153
154 def _verify_revision(rl, skipflags, state, node):
155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 point for extensions to influence the operation."""
157 if skipflags:
158 state[b'skipread'].add(node)
159 else:
160 # Side-effect: read content and verify hash.
161 rl.revision(node)
162
163
164 # True if a fast implementation for persistent-nodemap is available
165 #
166 # We also consider we have a "fast" implementation in "pure" python because
167 # people using pure don't really have performance consideration (and a
168 # wheelbarrow of other slowness source)
169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 parsers, 'BaseIndexObject'
171 )
15 )
172
16 from ..utils import (
173
17 storageutil,
174 @attr.s(slots=True, frozen=True)
175 class _revisioninfo(object):
176 """Information about a revision that allows building its fulltext
177 node: expected hash of the revision
178 p1, p2: parent revs of the revision
179 btext: built text cache consisting of a one-element list
180 cachedelta: (baserev, uncompressed_delta) or None
181 flags: flags associated to the revision storage
182
183 One of btext[0] or cachedelta must be set.
184 """
185
186 node = attr.ib()
187 p1 = attr.ib()
188 p2 = attr.ib()
189 btext = attr.ib()
190 textlen = attr.ib()
191 cachedelta = attr.ib()
192 flags = attr.ib()
193
194
195 @interfaceutil.implementer(repository.irevisiondelta)
196 @attr.s(slots=True)
197 class revlogrevisiondelta(object):
198 node = attr.ib()
199 p1node = attr.ib()
200 p2node = attr.ib()
201 basenode = attr.ib()
202 flags = attr.ib()
203 baserevisionsize = attr.ib()
204 revision = attr.ib()
205 delta = attr.ib()
206 sidedata = attr.ib()
207 protocol_flags = attr.ib()
208 linknode = attr.ib(default=None)
209
210
211 @interfaceutil.implementer(repository.iverifyproblem)
212 @attr.s(frozen=True)
213 class revlogproblem(object):
214 warning = attr.ib(default=None)
215 error = attr.ib(default=None)
216 node = attr.ib(default=None)
217
218
219 def parse_index_v1(data, inline):
220 # call the C implementation to parse the index data
221 index, cache = parsers.parse_index2(data, inline)
222 return index, cache
223
224
225 def parse_index_v2(data, inline):
226 # call the C implementation to parse the index data
227 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
228 return index, cache
229
230
231 def parse_index_cl_v2(data, inline):
232 # call the C implementation to parse the index data
233 assert not inline
234 from .pure.parsers import parse_index_cl_v2
235
236 index, cache = parse_index_cl_v2(data)
237 return index, cache
238
239
240 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
241
242 def parse_index_v1_nodemap(data, inline):
243 index, cache = parsers.parse_index_devel_nodemap(data, inline)
244 return index, cache
245
246
247 else:
248 parse_index_v1_nodemap = None
249
250
251 def parse_index_v1_mixed(data, inline):
252 index, cache = parse_index_v1(data, inline)
253 return rustrevlog.MixedIndex(index), cache
254
255
256 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
257 # signed integer)
258 _maxentrysize = 0x7FFFFFFF
259
260 PARTIAL_READ_MSG = _(
261 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
262 )
18 )
263
19 from . import constants
264 FILE_TOO_SHORT_MSG = _(
265 b'cannot read from revlog %s;'
266 b' expected %d bytes from offset %d, data size is %d'
267 )
268
269
270 class revlog(object):
271 """
272 the underlying revision storage object
273
274 A revlog consists of two parts, an index and the revision data.
275
276 The index is a file with a fixed record size containing
277 information on each revision, including its nodeid (hash), the
278 nodeids of its parents, the position and offset of its data within
279 the data file, and the revision it's based on. Finally, each entry
280 contains a linkrev entry that can serve as a pointer to external
281 data.
282
283 The revision data itself is a linear collection of data chunks.
284 Each chunk represents a revision and is usually represented as a
285 delta against the previous chunk. To bound lookup time, runs of
286 deltas are limited to about 2 times the length of the original
287 version data. This makes retrieval of a version proportional to
288 its size, or O(1) relative to the number of revisions.
289
290 Both pieces of the revlog are written to in an append-only
291 fashion, which means we never need to rewrite a file to insert or
292 remove data, and can use some simple techniques to avoid the need
293 for locking while reading.
294
295 If checkambig, indexfile is opened with checkambig=True at
296 writing, to avoid file stat ambiguity.
297
298 If mmaplargeindex is True, and an mmapindexthreshold is set, the
299 index will be mmapped rather than read if it is larger than the
300 configured threshold.
301
302 If censorable is True, the revlog can have censored revisions.
303
304 If `upperboundcomp` is not None, this is the expected maximal gain from
305 compression for the data content.
306
307 `concurrencychecker` is an optional function that receives 3 arguments: a
308 file handle, a filename, and an expected position. It should check whether
309 the current position in the file handle is valid, and log/warn/fail (by
310 raising).
311
20
312
21
313 Internal details
22 def v1_censor(rl, tr, censornode, tombstone=b''):
314 ----------------
23 """censors a revision in a "version 1" revlog"""
315
24 assert rl._format_version == constants.REVLOGV1, rl._format_version
316 A large part of the revlog logic deals with revisions' "index entries", tuple
317 objects that contains the same "items" whatever the revlog version.
318 Different versions will have different ways of storing these items (sometimes
319 not having them at all), but the tuple will always be the same. New fields
320 are usually added at the end to avoid breaking existing code that relies
321 on the existing order. The field are defined as follows:
322
323 [0] offset:
324 The byte index of the start of revision data chunk.
325 That value is shifted up by 16 bits. use "offset = field >> 16" to
326 retrieve it.
327
328 flags:
329 A flag field that carries special information or changes the behavior
330 of the revision. (see `REVIDX_*` constants for details)
331 The flag field only occupies the first 16 bits of this field,
332 use "flags = field & 0xFFFF" to retrieve the value.
333
334 [1] compressed length:
335 The size, in bytes, of the chunk on disk
336
337 [2] uncompressed length:
338 The size, in bytes, of the full revision once reconstructed.
339
340 [3] base rev:
341 Either the base of the revision delta chain (without general
342 delta), or the base of the delta (stored in the data chunk)
343 with general delta.
344
345 [4] link rev:
346 Changelog revision number of the changeset introducing this
347 revision.
348
349 [5] parent 1 rev:
350 Revision number of the first parent
351
352 [6] parent 2 rev:
353 Revision number of the second parent
354
355 [7] node id:
356 The node id of the current revision
357
358 [8] sidedata offset:
359 The byte index of the start of the revision's side-data chunk.
360
361 [9] sidedata chunk length:
362 The size, in bytes, of the revision's side-data chunk.
363
364 [10] data compression mode:
365 two bits that detail the way the data chunk is compressed on disk.
366 (see "COMP_MODE_*" constants for details). For revlog version 0 and
367 1 this will always be COMP_MODE_INLINE.
368
369 [11] side-data compression mode:
370 two bits that detail the way the sidedata chunk is compressed on disk.
371 (see "COMP_MODE_*" constants for details)
372 """
373
374 _flagserrorclass = error.RevlogError
375
376 def __init__(
377 self,
378 opener,
379 target,
380 radix,
381 postfix=None, # only exist for `tmpcensored` now
382 checkambig=False,
383 mmaplargeindex=False,
384 censorable=False,
385 upperboundcomp=None,
386 persistentnodemap=False,
387 concurrencychecker=None,
388 trypending=False,
389 ):
390 """
391 create a revlog object
392
393 opener is a function that abstracts the file opening operation
394 and can be used to implement COW semantics or the like.
395
396 `target`: a (KIND, ID) tuple that identify the content stored in
397 this revlog. It help the rest of the code to understand what the revlog
398 is about without having to resort to heuristic and index filename
399 analysis. Note: that this must be reliably be set by normal code, but
400 that test, debug, or performance measurement code might not set this to
401 accurate value.
402 """
403 self.upperboundcomp = upperboundcomp
404
405 self.radix = radix
406
407 self._docket_file = None
408 self._indexfile = None
409 self._datafile = None
410 self._sidedatafile = None
411 self._nodemap_file = None
412 self.postfix = postfix
413 self._trypending = trypending
414 self.opener = opener
415 if persistentnodemap:
416 self._nodemap_file = nodemaputil.get_nodemap_file(self)
417
25
418 assert target[0] in ALL_KINDS
26 # avoid cycle
419 assert len(target) == 2
27 from .. import revlog
420 self.target = target
421 # When True, indexfile is opened with checkambig=True at writing, to
422 # avoid file stat ambiguity.
423 self._checkambig = checkambig
424 self._mmaplargeindex = mmaplargeindex
425 self._censorable = censorable
426 # 3-tuple of (node, rev, text) for a raw revision.
427 self._revisioncache = None
428 # Maps rev to chain base rev.
429 self._chainbasecache = util.lrucachedict(100)
430 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
431 self._chunkcache = (0, b'')
432 # How much data to read and cache into the raw revlog data cache.
433 self._chunkcachesize = 65536
434 self._maxchainlen = None
435 self._deltabothparents = True
436 self.index = None
437 self._docket = None
438 self._nodemap_docket = None
439 # Mapping of partial identifiers to full nodes.
440 self._pcache = {}
441 # Mapping of revision integer to full node.
442 self._compengine = b'zlib'
443 self._compengineopts = {}
444 self._maxdeltachainspan = -1
445 self._withsparseread = False
446 self._sparserevlog = False
447 self.hassidedata = False
448 self._srdensitythreshold = 0.50
449 self._srmingapsize = 262144
450
451 # Make copy of flag processors so each revlog instance can support
452 # custom flags.
453 self._flagprocessors = dict(flagutil.flagprocessors)
454
455 # 3-tuple of file handles being used for active writing.
456 self._writinghandles = None
457 # prevent nesting of addgroup
458 self._adding_group = None
459
460 self._loadindex()
461
462 self._concurrencychecker = concurrencychecker
463
464 def _init_opts(self):
465 """process options (from above/config) to setup associated default revlog mode
466
467 These values might be affected when actually reading on disk information.
468
469 The relevant values are returned for use in _loadindex().
470
471 * newversionflags:
472 version header to use if we need to create a new revlog
473
474 * mmapindexthreshold:
475 minimal index size for start to use mmap
476
477 * force_nodemap:
478 force the usage of a "development" version of the nodemap code
479 """
480 mmapindexthreshold = None
481 opts = self.opener.options
482
483 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
484 new_header = CHANGELOGV2
485 elif b'revlogv2' in opts:
486 new_header = REVLOGV2
487 elif b'revlogv1' in opts:
488 new_header = REVLOGV1 | FLAG_INLINE_DATA
489 if b'generaldelta' in opts:
490 new_header |= FLAG_GENERALDELTA
491 elif b'revlogv0' in self.opener.options:
492 new_header = REVLOGV0
493 else:
494 new_header = REVLOG_DEFAULT_VERSION
495
496 if b'chunkcachesize' in opts:
497 self._chunkcachesize = opts[b'chunkcachesize']
498 if b'maxchainlen' in opts:
499 self._maxchainlen = opts[b'maxchainlen']
500 if b'deltabothparents' in opts:
501 self._deltabothparents = opts[b'deltabothparents']
502 self._lazydelta = bool(opts.get(b'lazydelta', True))
503 self._lazydeltabase = False
504 if self._lazydelta:
505 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
506 if b'compengine' in opts:
507 self._compengine = opts[b'compengine']
508 if b'zlib.level' in opts:
509 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
510 if b'zstd.level' in opts:
511 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
512 if b'maxdeltachainspan' in opts:
513 self._maxdeltachainspan = opts[b'maxdeltachainspan']
514 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
515 mmapindexthreshold = opts[b'mmapindexthreshold']
516 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
517 withsparseread = bool(opts.get(b'with-sparse-read', False))
518 # sparse-revlog forces sparse-read
519 self._withsparseread = self._sparserevlog or withsparseread
520 if b'sparse-read-density-threshold' in opts:
521 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
522 if b'sparse-read-min-gap-size' in opts:
523 self._srmingapsize = opts[b'sparse-read-min-gap-size']
524 if opts.get(b'enableellipsis'):
525 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
526
527 # revlog v0 doesn't have flag processors
528 for flag, processor in pycompat.iteritems(
529 opts.get(b'flagprocessors', {})
530 ):
531 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
532
533 if self._chunkcachesize <= 0:
534 raise error.RevlogError(
535 _(b'revlog chunk cache size %r is not greater than 0')
536 % self._chunkcachesize
537 )
538 elif self._chunkcachesize & (self._chunkcachesize - 1):
539 raise error.RevlogError(
540 _(b'revlog chunk cache size %r is not a power of 2')
541 % self._chunkcachesize
542 )
543 force_nodemap = opts.get(b'devel-force-nodemap', False)
544 return new_header, mmapindexthreshold, force_nodemap
545
28
546 def _get_data(self, filepath, mmap_threshold, size=None):
29 censorrev = rl.rev(censornode)
547 """return a file content with or without mmap
30 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
548
549 If the file is missing return the empty string"""
550 try:
551 with self.opener(filepath) as fp:
552 if mmap_threshold is not None:
553 file_size = self.opener.fstat(fp).st_size
554 if file_size >= mmap_threshold:
555 if size is not None:
556 # avoid potentiel mmap crash
557 size = min(file_size, size)
558 # TODO: should .close() to release resources without
559 # relying on Python GC
560 if size is None:
561 return util.buffer(util.mmapread(fp))
562 else:
563 return util.buffer(util.mmapread(fp, size))
564 if size is None:
565 return fp.read()
566 else:
567 return fp.read(size)
568 except IOError as inst:
569 if inst.errno != errno.ENOENT:
570 raise
571 return b''
572
573 def _loadindex(self):
574
575 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
576
577 if self.postfix is not None:
578 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
579 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
580 entry_point = b'%s.i.a' % self.radix
581 else:
582 entry_point = b'%s.i' % self.radix
583
584 entry_data = b''
585 self._initempty = True
586 entry_data = self._get_data(entry_point, mmapindexthreshold)
587 if len(entry_data) > 0:
588 header = INDEX_HEADER.unpack(entry_data[:4])[0]
589 self._initempty = False
590 else:
591 header = new_header
592
593 self._format_flags = header & ~0xFFFF
594 self._format_version = header & 0xFFFF
595
596 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
597 if supported_flags is None:
598 msg = _(b'unknown version (%d) in revlog %s')
599 msg %= (self._format_version, self.display_id)
600 raise error.RevlogError(msg)
601 elif self._format_flags & ~supported_flags:
602 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
603 display_flag = self._format_flags >> 16
604 msg %= (display_flag, self._format_version, self.display_id)
605 raise error.RevlogError(msg)
606
607 features = FEATURES_BY_VERSION[self._format_version]
608 self._inline = features[b'inline'](self._format_flags)
609 self._generaldelta = features[b'generaldelta'](self._format_flags)
610 self.hassidedata = features[b'sidedata']
611
612 if not features[b'docket']:
613 self._indexfile = entry_point
614 index_data = entry_data
615 else:
616 self._docket_file = entry_point
617 if self._initempty:
618 self._docket = docketutil.default_docket(self, header)
619 else:
620 self._docket = docketutil.parse_docket(
621 self, entry_data, use_pending=self._trypending
622 )
623 self._indexfile = self._docket.index_filepath()
624 index_data = b''
625 index_size = self._docket.index_end
626 if index_size > 0:
627 index_data = self._get_data(
628 self._indexfile, mmapindexthreshold, size=index_size
629 )
630 if len(index_data) < index_size:
631 msg = _(b'too few index data for %s: got %d, expected %d')
632 msg %= (self.display_id, len(index_data), index_size)
633 raise error.RevlogError(msg)
634
635 self._inline = False
636 # generaldelta implied by version 2 revlogs.
637 self._generaldelta = True
638 # the logic for persistent nodemap will be dealt with within the
639 # main docket, so disable it for now.
640 self._nodemap_file = None
641
642 if self._docket is not None:
643 self._datafile = self._docket.data_filepath()
644 self._sidedatafile = self._docket.sidedata_filepath()
645 elif self.postfix is None:
646 self._datafile = b'%s.d' % self.radix
647 else:
648 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
649
650 self.nodeconstants = sha1nodeconstants
651 self.nullid = self.nodeconstants.nullid
652
653 # sparse-revlog can't be on without general-delta (issue6056)
654 if not self._generaldelta:
655 self._sparserevlog = False
656
657 self._storedeltachains = True
658
31
659 devel_nodemap = (
32 if len(tombstone) > rl.rawsize(censorrev):
660 self._nodemap_file
33 raise error.Abort(
661 and force_nodemap
34 _(b'censor tombstone must be no longer than censored data')
662 and parse_index_v1_nodemap is not None
663 )
664
665 use_rust_index = False
666 if rustrevlog is not None:
667 if self._nodemap_file is not None:
668 use_rust_index = True
669 else:
670 use_rust_index = self.opener.options.get(b'rust.index')
671
672 self._parse_index = parse_index_v1
673 if self._format_version == REVLOGV0:
674 self._parse_index = revlogv0.parse_index_v0
675 elif self._format_version == REVLOGV2:
676 self._parse_index = parse_index_v2
677 elif self._format_version == CHANGELOGV2:
678 self._parse_index = parse_index_cl_v2
679 elif devel_nodemap:
680 self._parse_index = parse_index_v1_nodemap
681 elif use_rust_index:
682 self._parse_index = parse_index_v1_mixed
683 try:
684 d = self._parse_index(index_data, self._inline)
685 index, _chunkcache = d
686 use_nodemap = (
687 not self._inline
688 and self._nodemap_file is not None
689 and util.safehasattr(index, 'update_nodemap_data')
690 )
691 if use_nodemap:
692 nodemap_data = nodemaputil.persisted_data(self)
693 if nodemap_data is not None:
694 docket = nodemap_data[0]
695 if (
696 len(d[0]) > docket.tip_rev
697 and d[0][docket.tip_rev][7] == docket.tip_node
698 ):
699 # no changelog tampering
700 self._nodemap_docket = docket
701 index.update_nodemap_data(*nodemap_data)
702 except (ValueError, IndexError):
703 raise error.RevlogError(
704 _(b"index %s is corrupted") % self.display_id
705 )
706 self.index, self._chunkcache = d
707 if not self._chunkcache:
708 self._chunkclear()
709 # revnum -> (chain-length, sum-delta-length)
710 self._chaininfocache = util.lrucachedict(500)
711 # revlog header -> revlog compressor
712 self._decompressors = {}
713
714 @util.propertycache
715 def revlog_kind(self):
716 return self.target[0]
717
718 @util.propertycache
719 def display_id(self):
720 """The public facing "ID" of the revlog that we use in message"""
721 # Maybe we should build a user facing representation of
722 # revlog.target instead of using `self.radix`
723 return self.radix
724
725 def _get_decompressor(self, t):
726 try:
727 compressor = self._decompressors[t]
728 except KeyError:
729 try:
730 engine = util.compengines.forrevlogheader(t)
731 compressor = engine.revlogcompressor(self._compengineopts)
732 self._decompressors[t] = compressor
733 except KeyError:
734 raise error.RevlogError(
735 _(b'unknown compression type %s') % binascii.hexlify(t)
736 )
737 return compressor
738
739 @util.propertycache
740 def _compressor(self):
741 engine = util.compengines[self._compengine]
742 return engine.revlogcompressor(self._compengineopts)
743
744 @util.propertycache
745 def _decompressor(self):
746 """the default decompressor"""
747 if self._docket is None:
748 return None
749 t = self._docket.default_compression_header
750 c = self._get_decompressor(t)
751 return c.decompress
752
753 def _indexfp(self):
754 """file object for the revlog's index file"""
755 return self.opener(self._indexfile, mode=b"r")
756
757 def __index_write_fp(self):
758 # You should not use this directly and use `_writing` instead
759 try:
760 f = self.opener(
761 self._indexfile, mode=b"r+", checkambig=self._checkambig
762 )
763 if self._docket is None:
764 f.seek(0, os.SEEK_END)
765 else:
766 f.seek(self._docket.index_end, os.SEEK_SET)
767 return f
768 except IOError as inst:
769 if inst.errno != errno.ENOENT:
770 raise
771 return self.opener(
772 self._indexfile, mode=b"w+", checkambig=self._checkambig
773 )
774
775 def __index_new_fp(self):
776 # You should not use this unless you are upgrading from inline revlog
777 return self.opener(
778 self._indexfile,
779 mode=b"w",
780 checkambig=self._checkambig,
781 atomictemp=True,
782 )
35 )
783
36
784 def _datafp(self, mode=b'r'):
37 # Rewriting the revlog in place is hard. Our strategy for censoring is
785 """file object for the revlog's data file"""
38 # to create a new revlog, copy all revisions to it, then replace the
786 return self.opener(self._datafile, mode=mode)
39 # revlogs on transaction close.
787
40 #
788 @contextlib.contextmanager
41 # This is a bit dangerous. We could easily have a mismatch of state.
789 def _datareadfp(self, existingfp=None):
42 newrl = revlog.revlog(
790 """file object suitable to read data"""
43 rl.opener,
791 # Use explicit file handle, if given.
44 target=rl.target,
792 if existingfp is not None:
45 radix=rl.radix,
793 yield existingfp
46 postfix=b'tmpcensored',
794
47 censorable=True,
795 # Use a file handle being actively used for writes, if available.
48 )
796 # There is some danger to doing this because reads will seek the
49 newrl._format_version = rl._format_version
797 # file. However, _writeentry() performs a SEEK_END before all writes,
50 newrl._format_flags = rl._format_flags
798 # so we should be safe.
51 newrl._generaldelta = rl._generaldelta
799 elif self._writinghandles:
52 newrl._parse_index = rl._parse_index
800 if self._inline:
801 yield self._writinghandles[0]
802 else:
803 yield self._writinghandles[1]
804
805 # Otherwise open a new file handle.
806 else:
807 if self._inline:
808 func = self._indexfp
809 else:
810 func = self._datafp
811 with func() as fp:
812 yield fp
813
814 @contextlib.contextmanager
815 def _sidedatareadfp(self):
816 """file object suitable to read sidedata"""
817 if self._writinghandles:
818 yield self._writinghandles[2]
819 else:
820 with self.opener(self._sidedatafile) as fp:
821 yield fp
822
823 def tiprev(self):
824 return len(self.index) - 1
825
826 def tip(self):
827 return self.node(self.tiprev())
828
829 def __contains__(self, rev):
830 return 0 <= rev < len(self)
831
832 def __len__(self):
833 return len(self.index)
834
835 def __iter__(self):
836 return iter(pycompat.xrange(len(self)))
837
838 def revs(self, start=0, stop=None):
839 """iterate over all rev in this revlog (from start to stop)"""
840 return storageutil.iterrevs(len(self), start=start, stop=stop)
841
842 @property
843 def nodemap(self):
844 msg = (
845 b"revlog.nodemap is deprecated, "
846 b"use revlog.index.[has_node|rev|get_rev]"
847 )
848 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
849 return self.index.nodemap
850
851 @property
852 def _nodecache(self):
853 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
854 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
855 return self.index.nodemap
856
857 def hasnode(self, node):
858 try:
859 self.rev(node)
860 return True
861 except KeyError:
862 return False
863
864 def candelta(self, baserev, rev):
865 """whether two revisions (baserev, rev) can be delta-ed or not"""
866 # Disable delta if either rev requires a content-changing flag
867 # processor (ex. LFS). This is because such flag processor can alter
868 # the rawtext content that the delta will be based on, and two clients
869 # could have a same revlog node with different flags (i.e. different
870 # rawtext contents) and the delta could be incompatible.
871 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
872 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
873 ):
874 return False
875 return True
876
877 def update_caches(self, transaction):
878 if self._nodemap_file is not None:
879 if transaction is None:
880 nodemaputil.update_persistent_nodemap(self)
881 else:
882 nodemaputil.setup_persistent_nodemap(transaction, self)
883
884 def clearcaches(self):
885 self._revisioncache = None
886 self._chainbasecache.clear()
887 self._chunkcache = (0, b'')
888 self._pcache = {}
889 self._nodemap_docket = None
890 self.index.clearcaches()
891 # The python code is the one responsible for validating the docket, we
892 # end up having to refresh it here.
893 use_nodemap = (
894 not self._inline
895 and self._nodemap_file is not None
896 and util.safehasattr(self.index, 'update_nodemap_data')
897 )
898 if use_nodemap:
899 nodemap_data = nodemaputil.persisted_data(self)
900 if nodemap_data is not None:
901 self._nodemap_docket = nodemap_data[0]
902 self.index.update_nodemap_data(*nodemap_data)
903
904 def rev(self, node):
905 try:
906 return self.index.rev(node)
907 except TypeError:
908 raise
909 except error.RevlogError:
910 # parsers.c radix tree lookup failed
911 if (
912 node == self.nodeconstants.wdirid
913 or node in self.nodeconstants.wdirfilenodeids
914 ):
915 raise error.WdirUnsupported
916 raise error.LookupError(node, self.display_id, _(b'no node'))
917
918 # Accessors for index entries.
919
920 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
921 # are flags.
922 def start(self, rev):
923 return int(self.index[rev][0] >> 16)
924
925 def sidedata_cut_off(self, rev):
926 sd_cut_off = self.index[rev][8]
927 if sd_cut_off != 0:
928 return sd_cut_off
929 # This is some annoying dance, because entries without sidedata
930 # currently use 0 as their ofsset. (instead of previous-offset +
931 # previous-size)
932 #
933 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
934 # In the meantime, we need this.
935 while 0 <= rev:
936 e = self.index[rev]
937 if e[9] != 0:
938 return e[8] + e[9]
939 rev -= 1
940 return 0
941
942 def flags(self, rev):
943 return self.index[rev][0] & 0xFFFF
944
945 def length(self, rev):
946 return self.index[rev][1]
947
948 def sidedata_length(self, rev):
949 if not self.hassidedata:
950 return 0
951 return self.index[rev][9]
952
953 def rawsize(self, rev):
954 """return the length of the uncompressed text for a given revision"""
955 l = self.index[rev][2]
956 if l >= 0:
957 return l
958
959 t = self.rawdata(rev)
960 return len(t)
961
962 def size(self, rev):
963 """length of non-raw text (processed by a "read" flag processor)"""
964 # fast path: if no "read" flag processor could change the content,
965 # size is rawsize. note: ELLIPSIS is known to not change the content.
966 flags = self.flags(rev)
967 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
968 return self.rawsize(rev)
969
970 return len(self.revision(rev, raw=False))
971
972 def chainbase(self, rev):
973 base = self._chainbasecache.get(rev)
974 if base is not None:
975 return base
976
977 index = self.index
978 iterrev = rev
979 base = index[iterrev][3]
980 while base != iterrev:
981 iterrev = base
982 base = index[iterrev][3]
983
984 self._chainbasecache[rev] = base
985 return base
986
987 def linkrev(self, rev):
988 return self.index[rev][4]
989
990 def parentrevs(self, rev):
991 try:
992 entry = self.index[rev]
993 except IndexError:
994 if rev == wdirrev:
995 raise error.WdirUnsupported
996 raise
997 if entry[5] == nullrev:
998 return entry[6], entry[5]
999 else:
1000 return entry[5], entry[6]
1001
1002 # fast parentrevs(rev) where rev isn't filtered
1003 _uncheckedparentrevs = parentrevs
1004
1005 def node(self, rev):
1006 try:
1007 return self.index[rev][7]
1008 except IndexError:
1009 if rev == wdirrev:
1010 raise error.WdirUnsupported
1011 raise
1012
1013 # Derived from index values.
1014
1015 def end(self, rev):
1016 return self.start(rev) + self.length(rev)
1017
1018 def parents(self, node):
1019 i = self.index
1020 d = i[self.rev(node)]
1021 # inline node() to avoid function call overhead
1022 if d[5] == self.nullid:
1023 return i[d[6]][7], i[d[5]][7]
1024 else:
1025 return i[d[5]][7], i[d[6]][7]
1026
1027 def chainlen(self, rev):
1028 return self._chaininfo(rev)[0]
1029
1030 def _chaininfo(self, rev):
1031 chaininfocache = self._chaininfocache
1032 if rev in chaininfocache:
1033 return chaininfocache[rev]
1034 index = self.index
1035 generaldelta = self._generaldelta
1036 iterrev = rev
1037 e = index[iterrev]
1038 clen = 0
1039 compresseddeltalen = 0
1040 while iterrev != e[3]:
1041 clen += 1
1042 compresseddeltalen += e[1]
1043 if generaldelta:
1044 iterrev = e[3]
1045 else:
1046 iterrev -= 1
1047 if iterrev in chaininfocache:
1048 t = chaininfocache[iterrev]
1049 clen += t[0]
1050 compresseddeltalen += t[1]
1051 break
1052 e = index[iterrev]
1053 else:
1054 # Add text length of base since decompressing that also takes
1055 # work. For cache hits the length is already included.
1056 compresseddeltalen += e[1]
1057 r = (clen, compresseddeltalen)
1058 chaininfocache[rev] = r
1059 return r
1060
1061 def _deltachain(self, rev, stoprev=None):
1062 """Obtain the delta chain for a revision.
1063
1064 ``stoprev`` specifies a revision to stop at. If not specified, we
1065 stop at the base of the chain.
1066
1067 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
1068 revs in ascending order and ``stopped`` is a bool indicating whether
1069 ``stoprev`` was hit.
1070 """
1071 # Try C implementation.
1072 try:
1073 return self.index.deltachain(rev, stoprev, self._generaldelta)
1074 except AttributeError:
1075 pass
1076
1077 chain = []
1078
1079 # Alias to prevent attribute lookup in tight loop.
1080 index = self.index
1081 generaldelta = self._generaldelta
1082
1083 iterrev = rev
1084 e = index[iterrev]
1085 while iterrev != e[3] and iterrev != stoprev:
1086 chain.append(iterrev)
1087 if generaldelta:
1088 iterrev = e[3]
1089 else:
1090 iterrev -= 1
1091 e = index[iterrev]
1092
1093 if iterrev == stoprev:
1094 stopped = True
1095 else:
1096 chain.append(iterrev)
1097 stopped = False
1098
1099 chain.reverse()
1100 return chain, stopped
1101
1102 def ancestors(self, revs, stoprev=0, inclusive=False):
1103 """Generate the ancestors of 'revs' in reverse revision order.
1104 Does not generate revs lower than stoprev.
1105
1106 See the documentation for ancestor.lazyancestors for more details."""
1107
1108 # first, make sure start revisions aren't filtered
1109 revs = list(revs)
1110 checkrev = self.node
1111 for r in revs:
1112 checkrev(r)
1113 # and we're sure ancestors aren't filtered as well
1114
1115 if rustancestor is not None and self.index.rust_ext_compat:
1116 lazyancestors = rustancestor.LazyAncestors
1117 arg = self.index
1118 else:
1119 lazyancestors = ancestor.lazyancestors
1120 arg = self._uncheckedparentrevs
1121 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1122
1123 def descendants(self, revs):
1124 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1125
1126 def findcommonmissing(self, common=None, heads=None):
1127 """Return a tuple of the ancestors of common and the ancestors of heads
1128 that are not ancestors of common. In revset terminology, we return the
1129 tuple:
1130
1131 ::common, (::heads) - (::common)
1132
1133 The list is sorted by revision number, meaning it is
1134 topologically sorted.
1135
1136 'heads' and 'common' are both lists of node IDs. If heads is
1137 not supplied, uses all of the revlog's heads. If common is not
1138 supplied, uses nullid."""
1139 if common is None:
1140 common = [self.nullid]
1141 if heads is None:
1142 heads = self.heads()
1143
1144 common = [self.rev(n) for n in common]
1145 heads = [self.rev(n) for n in heads]
1146
1147 # we want the ancestors, but inclusive
1148 class lazyset(object):
1149 def __init__(self, lazyvalues):
1150 self.addedvalues = set()
1151 self.lazyvalues = lazyvalues
1152
1153 def __contains__(self, value):
1154 return value in self.addedvalues or value in self.lazyvalues
1155
1156 def __iter__(self):
1157 added = self.addedvalues
1158 for r in added:
1159 yield r
1160 for r in self.lazyvalues:
1161 if not r in added:
1162 yield r
1163
1164 def add(self, value):
1165 self.addedvalues.add(value)
1166
1167 def update(self, values):
1168 self.addedvalues.update(values)
1169
1170 has = lazyset(self.ancestors(common))
1171 has.add(nullrev)
1172 has.update(common)
1173
1174 # take all ancestors from heads that aren't in has
1175 missing = set()
1176 visit = collections.deque(r for r in heads if r not in has)
1177 while visit:
1178 r = visit.popleft()
1179 if r in missing:
1180 continue
1181 else:
1182 missing.add(r)
1183 for p in self.parentrevs(r):
1184 if p not in has:
1185 visit.append(p)
1186 missing = list(missing)
1187 missing.sort()
1188 return has, [self.node(miss) for miss in missing]
1189
1190 def incrementalmissingrevs(self, common=None):
1191 """Return an object that can be used to incrementally compute the
1192 revision numbers of the ancestors of arbitrary sets that are not
1193 ancestors of common. This is an ancestor.incrementalmissingancestors
1194 object.
1195
53
1196 'common' is a list of revision numbers. If common is not supplied, uses
54 for rev in rl.revs():
1197 nullrev.
55 node = rl.node(rev)
1198 """
56 p1, p2 = rl.parents(node)
1199 if common is None:
1200 common = [nullrev]
1201
1202 if rustancestor is not None and self.index.rust_ext_compat:
1203 return rustancestor.MissingAncestors(self.index, common)
1204 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1205
1206 def findmissingrevs(self, common=None, heads=None):
1207 """Return the revision numbers of the ancestors of heads that
1208 are not ancestors of common.
1209
1210 More specifically, return a list of revision numbers corresponding to
1211 nodes N such that every N satisfies the following constraints:
1212
1213 1. N is an ancestor of some node in 'heads'
1214 2. N is not an ancestor of any node in 'common'
1215
1216 The list is sorted by revision number, meaning it is
1217 topologically sorted.
1218
1219 'heads' and 'common' are both lists of revision numbers. If heads is
1220 not supplied, uses all of the revlog's heads. If common is not
1221 supplied, uses nullid."""
1222 if common is None:
1223 common = [nullrev]
1224 if heads is None:
1225 heads = self.headrevs()
1226
1227 inc = self.incrementalmissingrevs(common=common)
1228 return inc.missingancestors(heads)
1229
1230 def findmissing(self, common=None, heads=None):
1231 """Return the ancestors of heads that are not ancestors of common.
1232
1233 More specifically, return a list of nodes N such that every N
1234 satisfies the following constraints:
1235
1236 1. N is an ancestor of some node in 'heads'
1237 2. N is not an ancestor of any node in 'common'
1238
1239 The list is sorted by revision number, meaning it is
1240 topologically sorted.
1241
1242 'heads' and 'common' are both lists of node IDs. If heads is
1243 not supplied, uses all of the revlog's heads. If common is not
1244 supplied, uses nullid."""
1245 if common is None:
1246 common = [self.nullid]
1247 if heads is None:
1248 heads = self.heads()
1249
1250 common = [self.rev(n) for n in common]
1251 heads = [self.rev(n) for n in heads]
1252
1253 inc = self.incrementalmissingrevs(common=common)
1254 return [self.node(r) for r in inc.missingancestors(heads)]
1255
1256 def nodesbetween(self, roots=None, heads=None):
1257 """Return a topological path from 'roots' to 'heads'.
1258
1259 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1260 topologically sorted list of all nodes N that satisfy both of
1261 these constraints:
1262
1263 1. N is a descendant of some node in 'roots'
1264 2. N is an ancestor of some node in 'heads'
1265
1266 Every node is considered to be both a descendant and an ancestor
1267 of itself, so every reachable node in 'roots' and 'heads' will be
1268 included in 'nodes'.
1269
1270 'outroots' is the list of reachable nodes in 'roots', i.e., the
1271 subset of 'roots' that is returned in 'nodes'. Likewise,
1272 'outheads' is the subset of 'heads' that is also in 'nodes'.
1273
1274 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1275 unspecified, uses nullid as the only root. If 'heads' is
1276 unspecified, uses list of all of the revlog's heads."""
1277 nonodes = ([], [], [])
1278 if roots is not None:
1279 roots = list(roots)
1280 if not roots:
1281 return nonodes
1282 lowestrev = min([self.rev(n) for n in roots])
1283 else:
1284 roots = [self.nullid] # Everybody's a descendant of nullid
1285 lowestrev = nullrev
1286 if (lowestrev == nullrev) and (heads is None):
1287 # We want _all_ the nodes!
1288 return (
1289 [self.node(r) for r in self],
1290 [self.nullid],
1291 list(self.heads()),
1292 )
1293 if heads is None:
1294 # All nodes are ancestors, so the latest ancestor is the last
1295 # node.
1296 highestrev = len(self) - 1
1297 # Set ancestors to None to signal that every node is an ancestor.
1298 ancestors = None
1299 # Set heads to an empty dictionary for later discovery of heads
1300 heads = {}
1301 else:
1302 heads = list(heads)
1303 if not heads:
1304 return nonodes
1305 ancestors = set()
1306 # Turn heads into a dictionary so we can remove 'fake' heads.
1307 # Also, later we will be using it to filter out the heads we can't
1308 # find from roots.
1309 heads = dict.fromkeys(heads, False)
1310 # Start at the top and keep marking parents until we're done.
1311 nodestotag = set(heads)
1312 # Remember where the top was so we can use it as a limit later.
1313 highestrev = max([self.rev(n) for n in nodestotag])
1314 while nodestotag:
1315 # grab a node to tag
1316 n = nodestotag.pop()
1317 # Never tag nullid
1318 if n == self.nullid:
1319 continue
1320 # A node's revision number represents its place in a
1321 # topologically sorted list of nodes.
1322 r = self.rev(n)
1323 if r >= lowestrev:
1324 if n not in ancestors:
1325 # If we are possibly a descendant of one of the roots
1326 # and we haven't already been marked as an ancestor
1327 ancestors.add(n) # Mark as ancestor
1328 # Add non-nullid parents to list of nodes to tag.
1329 nodestotag.update(
1330 [p for p in self.parents(n) if p != self.nullid]
1331 )
1332 elif n in heads: # We've seen it before, is it a fake head?
1333 # So it is, real heads should not be the ancestors of
1334 # any other heads.
1335 heads.pop(n)
1336 if not ancestors:
1337 return nonodes
1338 # Now that we have our set of ancestors, we want to remove any
1339 # roots that are not ancestors.
1340
1341 # If one of the roots was nullid, everything is included anyway.
1342 if lowestrev > nullrev:
1343 # But, since we weren't, let's recompute the lowest rev to not
1344 # include roots that aren't ancestors.
1345
57
1346 # Filter out roots that aren't ancestors of heads
58 if rev == censorrev:
1347 roots = [root for root in roots if root in ancestors]
59 newrl.addrawrevision(
1348 # Recompute the lowest revision
60 tombstone,
1349 if roots:
61 tr,
1350 lowestrev = min([self.rev(root) for root in roots])
62 rl.linkrev(censorrev),
1351 else:
63 p1,
1352 # No more roots? Return empty list
64 p2,
1353 return nonodes
65 censornode,
1354 else:
66 constants.REVIDX_ISCENSORED,
1355 # We are descending from nullid, and don't need to care about
1356 # any other roots.
1357 lowestrev = nullrev
1358 roots = [self.nullid]
1359 # Transform our roots list into a set.
1360 descendants = set(roots)
1361 # Also, keep the original roots so we can filter out roots that aren't
1362 # 'real' roots (i.e. are descended from other roots).
1363 roots = descendants.copy()
1364 # Our topologically sorted list of output nodes.
1365 orderedout = []
1366 # Don't start at nullid since we don't want nullid in our output list,
1367 # and if nullid shows up in descendants, empty parents will look like
1368 # they're descendants.
1369 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1370 n = self.node(r)
1371 isdescendant = False
1372 if lowestrev == nullrev: # Everybody is a descendant of nullid
1373 isdescendant = True
1374 elif n in descendants:
1375 # n is already a descendant
1376 isdescendant = True
1377 # This check only needs to be done here because all the roots
1378 # will start being marked is descendants before the loop.
1379 if n in roots:
1380 # If n was a root, check if it's a 'real' root.
1381 p = tuple(self.parents(n))
1382 # If any of its parents are descendants, it's not a root.
1383 if (p[0] in descendants) or (p[1] in descendants):
1384 roots.remove(n)
1385 else:
1386 p = tuple(self.parents(n))
1387 # A node is a descendant if either of its parents are
1388 # descendants. (We seeded the dependents list with the roots
1389 # up there, remember?)
1390 if (p[0] in descendants) or (p[1] in descendants):
1391 descendants.add(n)
1392 isdescendant = True
1393 if isdescendant and ((ancestors is None) or (n in ancestors)):
1394 # Only include nodes that are both descendants and ancestors.
1395 orderedout.append(n)
1396 if (ancestors is not None) and (n in heads):
1397 # We're trying to figure out which heads are reachable
1398 # from roots.
1399 # Mark this head as having been reached
1400 heads[n] = True
1401 elif ancestors is None:
1402 # Otherwise, we're trying to discover the heads.
1403 # Assume this is a head because if it isn't, the next step
1404 # will eventually remove it.
1405 heads[n] = True
1406 # But, obviously its parents aren't.
1407 for p in self.parents(n):
1408 heads.pop(p, None)
1409 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1410 roots = list(roots)
1411 assert orderedout
1412 assert roots
1413 assert heads
1414 return (orderedout, roots, heads)
1415
1416 def headrevs(self, revs=None):
1417 if revs is None:
1418 try:
1419 return self.index.headrevs()
1420 except AttributeError:
1421 return self._headrevs()
1422 if rustdagop is not None and self.index.rust_ext_compat:
1423 return rustdagop.headrevs(self.index, revs)
1424 return dagop.headrevs(revs, self._uncheckedparentrevs)
1425
1426 def computephases(self, roots):
1427 return self.index.computephasesmapsets(roots)
1428
1429 def _headrevs(self):
1430 count = len(self)
1431 if not count:
1432 return [nullrev]
1433 # we won't iter over filtered rev so nobody is a head at start
1434 ishead = [0] * (count + 1)
1435 index = self.index
1436 for r in self:
1437 ishead[r] = 1 # I may be an head
1438 e = index[r]
1439 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1440 return [r for r, val in enumerate(ishead) if val]
1441
1442 def heads(self, start=None, stop=None):
1443 """return the list of all nodes that have no children
1444
1445 if start is specified, only heads that are descendants of
1446 start will be returned
1447 if stop is specified, it will consider all the revs from stop
1448 as if they had no children
1449 """
1450 if start is None and stop is None:
1451 if not len(self):
1452 return [self.nullid]
1453 return [self.node(r) for r in self.headrevs()]
1454
1455 if start is None:
1456 start = nullrev
1457 else:
1458 start = self.rev(start)
1459
1460 stoprevs = {self.rev(n) for n in stop or []}
1461
1462 revs = dagop.headrevssubset(
1463 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1464 )
1465
1466 return [self.node(rev) for rev in revs]
1467
1468 def children(self, node):
1469 """find the children of a given node"""
1470 c = []
1471 p = self.rev(node)
1472 for r in self.revs(start=p + 1):
1473 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1474 if prevs:
1475 for pr in prevs:
1476 if pr == p:
1477 c.append(self.node(r))
1478 elif p == nullrev:
1479 c.append(self.node(r))
1480 return c
1481
1482 def commonancestorsheads(self, a, b):
1483 """calculate all the heads of the common ancestors of nodes a and b"""
1484 a, b = self.rev(a), self.rev(b)
1485 ancs = self._commonancestorsheads(a, b)
1486 return pycompat.maplist(self.node, ancs)
1487
1488 def _commonancestorsheads(self, *revs):
1489 """calculate all the heads of the common ancestors of revs"""
1490 try:
1491 ancs = self.index.commonancestorsheads(*revs)
1492 except (AttributeError, OverflowError): # C implementation failed
1493 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1494 return ancs
1495
1496 def isancestor(self, a, b):
1497 """return True if node a is an ancestor of node b
1498
1499 A revision is considered an ancestor of itself."""
1500 a, b = self.rev(a), self.rev(b)
1501 return self.isancestorrev(a, b)
1502
1503 def isancestorrev(self, a, b):
1504 """return True if revision a is an ancestor of revision b
1505
1506 A revision is considered an ancestor of itself.
1507
1508 The implementation of this is trivial but the use of
1509 reachableroots is not."""
1510 if a == nullrev:
1511 return True
1512 elif a == b:
1513 return True
1514 elif a > b:
1515 return False
1516 return bool(self.reachableroots(a, [b], [a], includepath=False))
1517
1518 def reachableroots(self, minroot, heads, roots, includepath=False):
1519 """return (heads(::(<roots> and <roots>::<heads>)))
1520
1521 If includepath is True, return (<roots>::<heads>)."""
1522 try:
1523 return self.index.reachableroots2(
1524 minroot, heads, roots, includepath
1525 )
1526 except AttributeError:
1527 return dagop._reachablerootspure(
1528 self.parentrevs, minroot, roots, heads, includepath
1529 )
1530
1531 def ancestor(self, a, b):
1532 """calculate the "best" common ancestor of nodes a and b"""
1533
1534 a, b = self.rev(a), self.rev(b)
1535 try:
1536 ancs = self.index.ancestors(a, b)
1537 except (AttributeError, OverflowError):
1538 ancs = ancestor.ancestors(self.parentrevs, a, b)
1539 if ancs:
1540 # choose a consistent winner when there's a tie
1541 return min(map(self.node, ancs))
1542 return self.nullid
1543
1544 def _match(self, id):
1545 if isinstance(id, int):
1546 # rev
1547 return self.node(id)
1548 if len(id) == self.nodeconstants.nodelen:
1549 # possibly a binary node
1550 # odds of a binary node being all hex in ASCII are 1 in 10**25
1551 try:
1552 node = id
1553 self.rev(node) # quick search the index
1554 return node
1555 except error.LookupError:
1556 pass # may be partial hex id
1557 try:
1558 # str(rev)
1559 rev = int(id)
1560 if b"%d" % rev != id:
1561 raise ValueError
1562 if rev < 0:
1563 rev = len(self) + rev
1564 if rev < 0 or rev >= len(self):
1565 raise ValueError
1566 return self.node(rev)
1567 except (ValueError, OverflowError):
1568 pass
1569 if len(id) == 2 * self.nodeconstants.nodelen:
1570 try:
1571 # a full hex nodeid?
1572 node = bin(id)
1573 self.rev(node)
1574 return node
1575 except (TypeError, error.LookupError):
1576 pass
1577
1578 def _partialmatch(self, id):
1579 # we don't care wdirfilenodeids as they should be always full hash
1580 maybewdir = self.nodeconstants.wdirhex.startswith(id)
1581 ambiguous = False
1582 try:
1583 partial = self.index.partialmatch(id)
1584 if partial and self.hasnode(partial):
1585 if maybewdir:
1586 # single 'ff...' match in radix tree, ambiguous with wdir
1587 ambiguous = True
1588 else:
1589 return partial
1590 elif maybewdir:
1591 # no 'ff...' match in radix tree, wdir identified
1592 raise error.WdirUnsupported
1593 else:
1594 return None
1595 except error.RevlogError:
1596 # parsers.c radix tree lookup gave multiple matches
1597 # fast path: for unfiltered changelog, radix tree is accurate
1598 if not getattr(self, 'filteredrevs', None):
1599 ambiguous = True
1600 # fall through to slow path that filters hidden revisions
1601 except (AttributeError, ValueError):
1602 # we are pure python, or key was too short to search radix tree
1603 pass
1604 if ambiguous:
1605 raise error.AmbiguousPrefixLookupError(
1606 id, self.display_id, _(b'ambiguous identifier')
1607 )
67 )
1608
68
1609 if id in self._pcache:
69 if newrl.deltaparent(rev) != nullrev:
1610 return self._pcache[id]
70 m = _(b'censored revision stored as delta; cannot censor')
1611
71 h = _(
1612 if len(id) <= 40:
72 b'censoring of revlogs is not fully implemented;'
1613 try:
73 b' please report this bug'
1614 # hex(node)[:...]
74 )
1615 l = len(id) // 2 # grab an even number of digits
75 raise error.Abort(m, hint=h)
1616 prefix = bin(id[: l * 2])
76 continue
1617 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1618 nl = [
1619 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1620 ]
1621 if self.nodeconstants.nullhex.startswith(id):
1622 nl.append(self.nullid)
1623 if len(nl) > 0:
1624 if len(nl) == 1 and not maybewdir:
1625 self._pcache[id] = nl[0]
1626 return nl[0]
1627 raise error.AmbiguousPrefixLookupError(
1628 id, self.display_id, _(b'ambiguous identifier')
1629 )
1630 if maybewdir:
1631 raise error.WdirUnsupported
1632 return None
1633 except TypeError:
1634 pass
1635
1636 def lookup(self, id):
1637 """locate a node based on:
1638 - revision number or str(revision number)
1639 - nodeid or subset of hex nodeid
1640 """
1641 n = self._match(id)
1642 if n is not None:
1643 return n
1644 n = self._partialmatch(id)
1645 if n:
1646 return n
1647
1648 raise error.LookupError(id, self.display_id, _(b'no match found'))
1649
1650 def shortest(self, node, minlength=1):
1651 """Find the shortest unambiguous prefix that matches node."""
1652
1653 def isvalid(prefix):
1654 try:
1655 matchednode = self._partialmatch(prefix)
1656 except error.AmbiguousPrefixLookupError:
1657 return False
1658 except error.WdirUnsupported:
1659 # single 'ff...' match
1660 return True
1661 if matchednode is None:
1662 raise error.LookupError(node, self.display_id, _(b'no node'))
1663 return True
1664
1665 def maybewdir(prefix):
1666 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1667
1668 hexnode = hex(node)
1669
1670 def disambiguate(hexnode, minlength):
1671 """Disambiguate against wdirid."""
1672 for length in range(minlength, len(hexnode) + 1):
1673 prefix = hexnode[:length]
1674 if not maybewdir(prefix):
1675 return prefix
1676
1677 if not getattr(self, 'filteredrevs', None):
1678 try:
1679 length = max(self.index.shortest(node), minlength)
1680 return disambiguate(hexnode, length)
1681 except error.RevlogError:
1682 if node != self.nodeconstants.wdirid:
1683 raise error.LookupError(
1684 node, self.display_id, _(b'no node')
1685 )
1686 except AttributeError:
1687 # Fall through to pure code
1688 pass
1689
1690 if node == self.nodeconstants.wdirid:
1691 for length in range(minlength, len(hexnode) + 1):
1692 prefix = hexnode[:length]
1693 if isvalid(prefix):
1694 return prefix
1695
1696 for length in range(minlength, len(hexnode) + 1):
1697 prefix = hexnode[:length]
1698 if isvalid(prefix):
1699 return disambiguate(hexnode, length)
1700
1701 def cmp(self, node, text):
1702 """compare text with a given file revision
1703
1704 returns True if text is different than what is stored.
1705 """
1706 p1, p2 = self.parents(node)
1707 return storageutil.hashrevisionsha1(text, p1, p2) != node
1708
1709 def _cachesegment(self, offset, data):
1710 """Add a segment to the revlog cache.
1711
1712 Accepts an absolute offset and the data that is at that location.
1713 """
1714 o, d = self._chunkcache
1715 # try to add to existing cache
1716 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1717 self._chunkcache = o, d + data
1718 else:
1719 self._chunkcache = offset, data
1720
1721 def _readsegment(self, offset, length, df=None):
1722 """Load a segment of raw data from the revlog.
1723
1724 Accepts an absolute offset, length to read, and an optional existing
1725 file handle to read from.
1726
1727 If an existing file handle is passed, it will be seeked and the
1728 original seek position will NOT be restored.
1729
1730 Returns a str or buffer of raw byte data.
1731
1732 Raises if the requested number of bytes could not be read.
1733 """
1734 # Cache data both forward and backward around the requested
1735 # data, in a fixed size window. This helps speed up operations
1736 # involving reading the revlog backwards.
1737 cachesize = self._chunkcachesize
1738 realoffset = offset & ~(cachesize - 1)
1739 reallength = (
1740 (offset + length + cachesize) & ~(cachesize - 1)
1741 ) - realoffset
1742 with self._datareadfp(df) as df:
1743 df.seek(realoffset)
1744 d = df.read(reallength)
1745
1746 self._cachesegment(realoffset, d)
1747 if offset != realoffset or reallength != length:
1748 startoffset = offset - realoffset
1749 if len(d) - startoffset < length:
1750 filename = self._indexfile if self._inline else self._datafile
1751 got = len(d) - startoffset
1752 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1753 raise error.RevlogError(m)
1754 return util.buffer(d, startoffset, length)
1755
1756 if len(d) < length:
1757 filename = self._indexfile if self._inline else self._datafile
1758 got = len(d) - startoffset
1759 m = PARTIAL_READ_MSG % (filename, length, offset, got)
1760 raise error.RevlogError(m)
1761
1762 return d
1763
1764 def _getsegment(self, offset, length, df=None):
1765 """Obtain a segment of raw data from the revlog.
1766
1767 Accepts an absolute offset, length of bytes to obtain, and an
1768 optional file handle to the already-opened revlog. If the file
1769 handle is used, it's original seek position will not be preserved.
1770
1771 Requests for data may be returned from a cache.
1772
1773 Returns a str or a buffer instance of raw byte data.
1774 """
1775 o, d = self._chunkcache
1776 l = len(d)
1777
1778 # is it in the cache?
1779 cachestart = offset - o
1780 cacheend = cachestart + length
1781 if cachestart >= 0 and cacheend <= l:
1782 if cachestart == 0 and cacheend == l:
1783 return d # avoid a copy
1784 return util.buffer(d, cachestart, cacheend - cachestart)
1785
1786 return self._readsegment(offset, length, df=df)
1787
1788 def _getsegmentforrevs(self, startrev, endrev, df=None):
1789 """Obtain a segment of raw data corresponding to a range of revisions.
1790
1791 Accepts the start and end revisions and an optional already-open
1792 file handle to be used for reading. If the file handle is read, its
1793 seek position will not be preserved.
1794
1795 Requests for data may be satisfied by a cache.
1796
1797 Returns a 2-tuple of (offset, data) for the requested range of
1798 revisions. Offset is the integer offset from the beginning of the
1799 revlog and data is a str or buffer of the raw byte data.
1800
1801 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1802 to determine where each revision's data begins and ends.
1803 """
1804 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1805 # (functions are expensive).
1806 index = self.index
1807 istart = index[startrev]
1808 start = int(istart[0] >> 16)
1809 if startrev == endrev:
1810 end = start + istart[1]
1811 else:
1812 iend = index[endrev]
1813 end = int(iend[0] >> 16) + iend[1]
1814
1815 if self._inline:
1816 start += (startrev + 1) * self.index.entry_size
1817 end += (endrev + 1) * self.index.entry_size
1818 length = end - start
1819
1820 return start, self._getsegment(start, length, df=df)
1821
1822 def _chunk(self, rev, df=None):
1823 """Obtain a single decompressed chunk for a revision.
1824
1825 Accepts an integer revision and an optional already-open file handle
1826 to be used for reading. If used, the seek position of the file will not
1827 be preserved.
1828
1829 Returns a str holding uncompressed data for the requested revision.
1830 """
1831 compression_mode = self.index[rev][10]
1832 data = self._getsegmentforrevs(rev, rev, df=df)[1]
1833 if compression_mode == COMP_MODE_PLAIN:
1834 return data
1835 elif compression_mode == COMP_MODE_DEFAULT:
1836 return self._decompressor(data)
1837 elif compression_mode == COMP_MODE_INLINE:
1838 return self.decompress(data)
1839 else:
1840 msg = 'unknown compression mode %d'
1841 msg %= compression_mode
1842 raise error.RevlogError(msg)
1843
1844 def _chunks(self, revs, df=None, targetsize=None):
1845 """Obtain decompressed chunks for the specified revisions.
1846
1847 Accepts an iterable of numeric revisions that are assumed to be in
1848 ascending order. Also accepts an optional already-open file handle
1849 to be used for reading. If used, the seek position of the file will
1850 not be preserved.
1851
1852 This function is similar to calling ``self._chunk()`` multiple times,
1853 but is faster.
1854
1855 Returns a list with decompressed data for each requested revision.
1856 """
1857 if not revs:
1858 return []
1859 start = self.start
1860 length = self.length
1861 inline = self._inline
1862 iosize = self.index.entry_size
1863 buffer = util.buffer
1864
1865 l = []
1866 ladd = l.append
1867
1868 if not self._withsparseread:
1869 slicedchunks = (revs,)
1870 else:
1871 slicedchunks = deltautil.slicechunk(
1872 self, revs, targetsize=targetsize
1873 )
1874
1875 for revschunk in slicedchunks:
1876 firstrev = revschunk[0]
1877 # Skip trailing revisions with empty diff
1878 for lastrev in revschunk[::-1]:
1879 if length(lastrev) != 0:
1880 break
1881
1882 try:
1883 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1884 except OverflowError:
1885 # issue4215 - we can't cache a run of chunks greater than
1886 # 2G on Windows
1887 return [self._chunk(rev, df=df) for rev in revschunk]
1888
1889 decomp = self.decompress
1890 # self._decompressor might be None, but will not be used in that case
1891 def_decomp = self._decompressor
1892 for rev in revschunk:
1893 chunkstart = start(rev)
1894 if inline:
1895 chunkstart += (rev + 1) * iosize
1896 chunklength = length(rev)
1897 comp_mode = self.index[rev][10]
1898 c = buffer(data, chunkstart - offset, chunklength)
1899 if comp_mode == COMP_MODE_PLAIN:
1900 ladd(c)
1901 elif comp_mode == COMP_MODE_INLINE:
1902 ladd(decomp(c))
1903 elif comp_mode == COMP_MODE_DEFAULT:
1904 ladd(def_decomp(c))
1905 else:
1906 msg = 'unknown compression mode %d'
1907 msg %= comp_mode
1908 raise error.RevlogError(msg)
1909
1910 return l
1911
1912 def _chunkclear(self):
1913 """Clear the raw chunk cache."""
1914 self._chunkcache = (0, b'')
1915
1916 def deltaparent(self, rev):
1917 """return deltaparent of the given revision"""
1918 base = self.index[rev][3]
1919 if base == rev:
1920 return nullrev
1921 elif self._generaldelta:
1922 return base
1923 else:
1924 return rev - 1
1925
1926 def issnapshot(self, rev):
1927 """tells whether rev is a snapshot"""
1928 if not self._sparserevlog:
1929 return self.deltaparent(rev) == nullrev
1930 elif util.safehasattr(self.index, b'issnapshot'):
1931 # directly assign the method to cache the testing and access
1932 self.issnapshot = self.index.issnapshot
1933 return self.issnapshot(rev)
1934 if rev == nullrev:
1935 return True
1936 entry = self.index[rev]
1937 base = entry[3]
1938 if base == rev:
1939 return True
1940 if base == nullrev:
1941 return True
1942 p1 = entry[5]
1943 p2 = entry[6]
1944 if base == p1 or base == p2:
1945 return False
1946 return self.issnapshot(base)
1947
1948 def snapshotdepth(self, rev):
1949 """number of snapshot in the chain before this one"""
1950 if not self.issnapshot(rev):
1951 raise error.ProgrammingError(b'revision %d not a snapshot')
1952 return len(self._deltachain(rev)[0]) - 1
1953
1954 def revdiff(self, rev1, rev2):
1955 """return or calculate a delta between two revisions
1956
1957 The delta calculated is in binary form and is intended to be written to
1958 revlog data directly. So this function needs raw revision data.
1959 """
1960 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1961 return bytes(self._chunk(rev2))
1962
1963 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1964
1965 def _processflags(self, text, flags, operation, raw=False):
1966 """deprecated entry point to access flag processors"""
1967 msg = b'_processflag(...) use the specialized variant'
1968 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1969 if raw:
1970 return text, flagutil.processflagsraw(self, text, flags)
1971 elif operation == b'read':
1972 return flagutil.processflagsread(self, text, flags)
1973 else: # write operation
1974 return flagutil.processflagswrite(self, text, flags)
1975
1976 def revision(self, nodeorrev, _df=None, raw=False):
1977 """return an uncompressed revision of a given node or revision
1978 number.
1979
1980 _df - an existing file handle to read from. (internal-only)
1981 raw - an optional argument specifying if the revision data is to be
1982 treated as raw data when applying flag transforms. 'raw' should be set
1983 to True when generating changegroups or in debug commands.
1984 """
1985 if raw:
1986 msg = (
1987 b'revlog.revision(..., raw=True) is deprecated, '
1988 b'use revlog.rawdata(...)'
1989 )
1990 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1991 return self._revisiondata(nodeorrev, _df, raw=raw)
1992
1993 def sidedata(self, nodeorrev, _df=None):
1994 """a map of extra data related to the changeset but not part of the hash
1995
1996 This function currently return a dictionary. However, more advanced
1997 mapping object will likely be used in the future for a more
1998 efficient/lazy code.
1999 """
2000 # deal with <nodeorrev> argument type
2001 if isinstance(nodeorrev, int):
2002 rev = nodeorrev
2003 else:
2004 rev = self.rev(nodeorrev)
2005 return self._sidedata(rev)
2006
77
2007 def _revisiondata(self, nodeorrev, _df=None, raw=False):
78 if rl.iscensored(rev):
2008 # deal with <nodeorrev> argument type
79 if rl.deltaparent(rev) != nullrev:
2009 if isinstance(nodeorrev, int):
80 m = _(
2010 rev = nodeorrev
81 b'cannot censor due to censored '
2011 node = self.node(rev)
82 b'revision having delta stored'
2012 else:
83 )
2013 node = nodeorrev
84 raise error.Abort(m)
2014 rev = None
85 rawtext = rl._chunk(rev)
2015
2016 # fast path the special `nullid` rev
2017 if node == self.nullid:
2018 return b""
2019
2020 # ``rawtext`` is the text as stored inside the revlog. Might be the
2021 # revision or might need to be processed to retrieve the revision.
2022 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
2023
2024 if raw and validated:
2025 # if we don't want to process the raw text and that raw
2026 # text is cached, we can exit early.
2027 return rawtext
2028 if rev is None:
2029 rev = self.rev(node)
2030 # the revlog's flag for this revision
2031 # (usually alter its state or content)
2032 flags = self.flags(rev)
2033
2034 if validated and flags == REVIDX_DEFAULT_FLAGS:
2035 # no extra flags set, no flag processor runs, text = rawtext
2036 return rawtext
2037
2038 if raw:
2039 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2040 text = rawtext
2041 else:
86 else:
2042 r = flagutil.processflagsread(self, rawtext, flags)
87 rawtext = rl.rawdata(rev)
2043 text, validatehash = r
2044 if validatehash:
2045 self.checkhash(text, node, rev=rev)
2046 if not validated:
2047 self._revisioncache = (node, rev, rawtext)
2048
2049 return text
2050
2051 def _rawtext(self, node, rev, _df=None):
2052 """return the possibly unvalidated rawtext for a revision
2053
2054 returns (rev, rawtext, validated)
2055 """
2056
2057 # revision in the cache (could be useful to apply delta)
2058 cachedrev = None
2059 # An intermediate text to apply deltas to
2060 basetext = None
2061
2062 # Check if we have the entry in cache
2063 # The cache entry looks like (node, rev, rawtext)
2064 if self._revisioncache:
2065 if self._revisioncache[0] == node:
2066 return (rev, self._revisioncache[2], True)
2067 cachedrev = self._revisioncache[1]
2068
2069 if rev is None:
2070 rev = self.rev(node)
2071
2072 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
2073 if stopped:
2074 basetext = self._revisioncache[2]
2075
2076 # drop cache to save memory, the caller is expected to
2077 # update self._revisioncache after validating the text
2078 self._revisioncache = None
2079
2080 targetsize = None
2081 rawsize = self.index[rev][2]
2082 if 0 <= rawsize:
2083 targetsize = 4 * rawsize
2084
2085 bins = self._chunks(chain, df=_df, targetsize=targetsize)
2086 if basetext is None:
2087 basetext = bytes(bins[0])
2088 bins = bins[1:]
2089
2090 rawtext = mdiff.patches(basetext, bins)
2091 del basetext # let us have a chance to free memory early
2092 return (rev, rawtext, False)
2093
2094 def _sidedata(self, rev):
2095 """Return the sidedata for a given revision number."""
2096 index_entry = self.index[rev]
2097 sidedata_offset = index_entry[8]
2098 sidedata_size = index_entry[9]
2099
2100 if self._inline:
2101 sidedata_offset += self.index.entry_size * (1 + rev)
2102 if sidedata_size == 0:
2103 return {}
2104
2105 # XXX this need caching, as we do for data
2106 with self._sidedatareadfp() as sdf:
2107 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2108 filename = self._sidedatafile
2109 end = self._docket.sidedata_end
2110 offset = sidedata_offset
2111 length = sidedata_size
2112 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2113 raise error.RevlogError(m)
2114
2115 sdf.seek(sidedata_offset, os.SEEK_SET)
2116 comp_segment = sdf.read(sidedata_size)
2117
2118 if len(comp_segment) < sidedata_size:
2119 filename = self._sidedatafile
2120 length = sidedata_size
2121 offset = sidedata_offset
2122 got = len(comp_segment)
2123 m = PARTIAL_READ_MSG % (filename, length, offset, got)
2124 raise error.RevlogError(m)
2125
2126 comp = self.index[rev][11]
2127 if comp == COMP_MODE_PLAIN:
2128 segment = comp_segment
2129 elif comp == COMP_MODE_DEFAULT:
2130 segment = self._decompressor(comp_segment)
2131 elif comp == COMP_MODE_INLINE:
2132 segment = self.decompress(comp_segment)
2133 else:
2134 msg = 'unknown compression mode %d'
2135 msg %= comp
2136 raise error.RevlogError(msg)
2137
2138 sidedata = sidedatautil.deserialize_sidedata(segment)
2139 return sidedata
2140
2141 def rawdata(self, nodeorrev, _df=None):
2142 """return an uncompressed raw data of a given node or revision number.
2143
2144 _df - an existing file handle to read from. (internal-only)
2145 """
2146 return self._revisiondata(nodeorrev, _df, raw=True)
2147
2148 def hash(self, text, p1, p2):
2149 """Compute a node hash.
2150
2151 Available as a function so that subclasses can replace the hash
2152 as needed.
2153 """
2154 return storageutil.hashrevisionsha1(text, p1, p2)
2155
2156 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2157 """Check node hash integrity.
2158
2159 Available as a function so that subclasses can extend hash mismatch
2160 behaviors as needed.
2161 """
2162 try:
2163 if p1 is None and p2 is None:
2164 p1, p2 = self.parents(node)
2165 if node != self.hash(text, p1, p2):
2166 # Clear the revision cache on hash failure. The revision cache
2167 # only stores the raw revision and clearing the cache does have
2168 # the side-effect that we won't have a cache hit when the raw
2169 # revision data is accessed. But this case should be rare and
2170 # it is extra work to teach the cache about the hash
2171 # verification state.
2172 if self._revisioncache and self._revisioncache[0] == node:
2173 self._revisioncache = None
2174
2175 revornode = rev
2176 if revornode is None:
2177 revornode = templatefilters.short(hex(node))
2178 raise error.RevlogError(
2179 _(b"integrity check failed on %s:%s")
2180 % (self.display_id, pycompat.bytestr(revornode))
2181 )
2182 except error.RevlogError:
2183 if self._censorable and storageutil.iscensoredtext(text):
2184 raise error.CensoredNodeError(self.display_id, node, text)
2185 raise
2186
2187 def _enforceinlinesize(self, tr):
2188 """Check if the revlog is too big for inline and convert if so.
2189
2190 This should be called after revisions are added to the revlog. If the
2191 revlog has grown too large to be an inline revlog, it will convert it
2192 to use multiple index and data files.
2193 """
2194 tiprev = len(self) - 1
2195 total_size = self.start(tiprev) + self.length(tiprev)
2196 if not self._inline or total_size < _maxinline:
2197 return
2198
2199 troffset = tr.findoffset(self._indexfile)
2200 if troffset is None:
2201 raise error.RevlogError(
2202 _(b"%s not found in the transaction") % self._indexfile
2203 )
2204 trindex = 0
2205 tr.add(self._datafile, 0)
2206
88
2207 existing_handles = False
89 newrl.addrawrevision(
2208 if self._writinghandles is not None:
90 rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
2209 existing_handles = True
2210 fp = self._writinghandles[0]
2211 fp.flush()
2212 fp.close()
2213 # We can't use the cached file handle after close(). So prevent
2214 # its usage.
2215 self._writinghandles = None
2216
2217 new_dfh = self._datafp(b'w+')
2218 new_dfh.truncate(0) # drop any potentially existing data
2219 try:
2220 with self._indexfp() as read_ifh:
2221 for r in self:
2222 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2223 if troffset <= self.start(r) + r * self.index.entry_size:
2224 trindex = r
2225 new_dfh.flush()
2226
2227 with self.__index_new_fp() as fp:
2228 self._format_flags &= ~FLAG_INLINE_DATA
2229 self._inline = False
2230 for i in self:
2231 e = self.index.entry_binary(i)
2232 if i == 0 and self._docket is None:
2233 header = self._format_flags | self._format_version
2234 header = self.index.pack_header(header)
2235 e = header + e
2236 fp.write(e)
2237 if self._docket is not None:
2238 self._docket.index_end = fp.tell()
2239
2240 # There is a small transactional race here. If the rename of
2241 # the index fails, we should remove the datafile. It is more
2242 # important to ensure that the data file is not truncated
2243 # when the index is replaced as otherwise data is lost.
2244 tr.replace(self._datafile, self.start(trindex))
2245
2246 # the temp file replace the real index when we exit the context
2247 # manager
2248
2249 tr.replace(self._indexfile, trindex * self.index.entry_size)
2250 nodemaputil.setup_persistent_nodemap(tr, self)
2251 self._chunkclear()
2252
2253 if existing_handles:
2254 # switched from inline to conventional reopen the index
2255 ifh = self.__index_write_fp()
2256 self._writinghandles = (ifh, new_dfh, None)
2257 new_dfh = None
2258 finally:
2259 if new_dfh is not None:
2260 new_dfh.close()
2261
2262 def _nodeduplicatecallback(self, transaction, node):
2263 """called when trying to add a node already stored."""
2264
2265 @contextlib.contextmanager
2266 def _writing(self, transaction):
2267 if self._trypending:
2268 msg = b'try to write in a `trypending` revlog: %s'
2269 msg %= self.display_id
2270 raise error.ProgrammingError(msg)
2271 if self._writinghandles is not None:
2272 yield
2273 else:
2274 ifh = dfh = sdfh = None
2275 try:
2276 r = len(self)
2277 # opening the data file.
2278 dsize = 0
2279 if r:
2280 dsize = self.end(r - 1)
2281 dfh = None
2282 if not self._inline:
2283 try:
2284 dfh = self._datafp(b"r+")
2285 if self._docket is None:
2286 dfh.seek(0, os.SEEK_END)
2287 else:
2288 dfh.seek(self._docket.data_end, os.SEEK_SET)
2289 except IOError as inst:
2290 if inst.errno != errno.ENOENT:
2291 raise
2292 dfh = self._datafp(b"w+")
2293 transaction.add(self._datafile, dsize)
2294 if self._sidedatafile is not None:
2295 try:
2296 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2297 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2298 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2300 raise
2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 transaction.add(
2303 self._sidedatafile, self._docket.sidedata_end
2304 )
2305
2306 # opening the index file.
2307 isize = r * self.index.entry_size
2308 ifh = self.__index_write_fp()
2309 if self._inline:
2310 transaction.add(self._indexfile, dsize + isize)
2311 else:
2312 transaction.add(self._indexfile, isize)
2313 # exposing all file handle for writing.
2314 self._writinghandles = (ifh, dfh, sdfh)
2315 yield
2316 if self._docket is not None:
2317 self._write_docket(transaction)
2318 finally:
2319 self._writinghandles = None
2320 if dfh is not None:
2321 dfh.close()
2322 if sdfh is not None:
2323 dfh.close()
2324 # closing the index file last to avoid exposing referent to
2325 # potential unflushed data content.
2326 if ifh is not None:
2327 ifh.close()
2328
2329 def _write_docket(self, transaction):
2330 """write the current docket on disk
2331
2332 Exist as a method to help changelog to implement transaction logic
2333
2334 We could also imagine using the same transaction logic for all revlog
2335 since docket are cheap."""
2336 self._docket.write(transaction)
2337
2338 def addrevision(
2339 self,
2340 text,
2341 transaction,
2342 link,
2343 p1,
2344 p2,
2345 cachedelta=None,
2346 node=None,
2347 flags=REVIDX_DEFAULT_FLAGS,
2348 deltacomputer=None,
2349 sidedata=None,
2350 ):
2351 """add a revision to the log
2352
2353 text - the revision data to add
2354 transaction - the transaction object used for rollback
2355 link - the linkrev data to add
2356 p1, p2 - the parent nodeids of the revision
2357 cachedelta - an optional precomputed delta
2358 node - nodeid of revision; typically node is not specified, and it is
2359 computed by default as hash(text, p1, p2), however subclasses might
2360 use different hashing method (and override checkhash() in such case)
2361 flags - the known flags to set on the revision
2362 deltacomputer - an optional deltacomputer instance shared between
2363 multiple calls
2364 """
2365 if link == nullrev:
2366 raise error.RevlogError(
2367 _(b"attempted to add linkrev -1 to %s") % self.display_id
2368 )
2369
2370 if sidedata is None:
2371 sidedata = {}
2372 elif sidedata and not self.hassidedata:
2373 raise error.ProgrammingError(
2374 _(b"trying to add sidedata to a revlog who don't support them")
2375 )
2376
2377 if flags:
2378 node = node or self.hash(text, p1, p2)
2379
2380 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2381
2382 # If the flag processor modifies the revision data, ignore any provided
2383 # cachedelta.
2384 if rawtext != text:
2385 cachedelta = None
2386
2387 if len(rawtext) > _maxentrysize:
2388 raise error.RevlogError(
2389 _(
2390 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2391 )
2392 % (self.display_id, len(rawtext))
2393 )
2394
2395 node = node or self.hash(rawtext, p1, p2)
2396 rev = self.index.get_rev(node)
2397 if rev is not None:
2398 return rev
2399
2400 if validatehash:
2401 self.checkhash(rawtext, node, p1=p1, p2=p2)
2402
2403 return self.addrawrevision(
2404 rawtext,
2405 transaction,
2406 link,
2407 p1,
2408 p2,
2409 node,
2410 flags,
2411 cachedelta=cachedelta,
2412 deltacomputer=deltacomputer,
2413 sidedata=sidedata,
2414 )
91 )
2415
92
2416 def addrawrevision(
93 tr.addbackup(rl._indexfile, location=b'store')
2417 self,
94 if not rl._inline:
2418 rawtext,
95 tr.addbackup(rl._datafile, location=b'store')
2419 transaction,
2420 link,
2421 p1,
2422 p2,
2423 node,
2424 flags,
2425 cachedelta=None,
2426 deltacomputer=None,
2427 sidedata=None,
2428 ):
2429 """add a raw revision with known flags, node and parents
2430 useful when reusing a revision not stored in this revlog (ex: received
2431 over wire, or read from an external bundle).
2432 """
2433 with self._writing(transaction):
2434 return self._addrevision(
2435 node,
2436 rawtext,
2437 transaction,
2438 link,
2439 p1,
2440 p2,
2441 flags,
2442 cachedelta,
2443 deltacomputer=deltacomputer,
2444 sidedata=sidedata,
2445 )
2446
2447 def compress(self, data):
2448 """Generate a possibly-compressed representation of data."""
2449 if not data:
2450 return b'', data
2451
2452 compressed = self._compressor.compress(data)
2453
2454 if compressed:
2455 # The revlog compressor added the header in the returned data.
2456 return b'', compressed
2457
2458 if data[0:1] == b'\0':
2459 return b'', data
2460 return b'u', data
2461
2462 def decompress(self, data):
2463 """Decompress a revlog chunk.
2464
2465 The chunk is expected to begin with a header identifying the
2466 format type so it can be routed to an appropriate decompressor.
2467 """
2468 if not data:
2469 return data
2470
2471 # Revlogs are read much more frequently than they are written and many
2472 # chunks only take microseconds to decompress, so performance is
2473 # important here.
2474 #
2475 # We can make a few assumptions about revlogs:
2476 #
2477 # 1) the majority of chunks will be compressed (as opposed to inline
2478 # raw data).
2479 # 2) decompressing *any* data will likely by at least 10x slower than
2480 # returning raw inline data.
2481 # 3) we want to prioritize common and officially supported compression
2482 # engines
2483 #
2484 # It follows that we want to optimize for "decompress compressed data
2485 # when encoded with common and officially supported compression engines"
2486 # case over "raw data" and "data encoded by less common or non-official
2487 # compression engines." That is why we have the inline lookup first
2488 # followed by the compengines lookup.
2489 #
2490 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2491 # compressed chunks. And this matters for changelog and manifest reads.
2492 t = data[0:1]
2493
2494 if t == b'x':
2495 try:
2496 return _zlibdecompress(data)
2497 except zlib.error as e:
2498 raise error.RevlogError(
2499 _(b'revlog decompress error: %s')
2500 % stringutil.forcebytestr(e)
2501 )
2502 # '\0' is more common than 'u' so it goes first.
2503 elif t == b'\0':
2504 return data
2505 elif t == b'u':
2506 return util.buffer(data, 1)
2507
2508 compressor = self._get_decompressor(t)
2509
2510 return compressor.decompress(data)
2511
2512 def _addrevision(
2513 self,
2514 node,
2515 rawtext,
2516 transaction,
2517 link,
2518 p1,
2519 p2,
2520 flags,
2521 cachedelta,
2522 alwayscache=False,
2523 deltacomputer=None,
2524 sidedata=None,
2525 ):
2526 """internal function to add revisions to the log
2527
2528 see addrevision for argument descriptions.
2529
2530 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2531
2532 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2533 be used.
2534
2535 invariants:
2536 - rawtext is optional (can be None); if not set, cachedelta must be set.
2537 if both are set, they must correspond to each other.
2538 """
2539 if node == self.nullid:
2540 raise error.RevlogError(
2541 _(b"%s: attempt to add null revision") % self.display_id
2542 )
2543 if (
2544 node == self.nodeconstants.wdirid
2545 or node in self.nodeconstants.wdirfilenodeids
2546 ):
2547 raise error.RevlogError(
2548 _(b"%s: attempt to add wdir revision") % self.display_id
2549 )
2550 if self._writinghandles is None:
2551 msg = b'adding revision outside `revlog._writing` context'
2552 raise error.ProgrammingError(msg)
2553
2554 if self._inline:
2555 fh = self._writinghandles[0]
2556 else:
2557 fh = self._writinghandles[1]
2558
2559 btext = [rawtext]
2560
2561 curr = len(self)
2562 prev = curr - 1
2563
2564 offset = self._get_data_offset(prev)
2565
2566 if self._concurrencychecker:
2567 ifh, dfh, sdfh = self._writinghandles
2568 # XXX no checking for the sidedata file
2569 if self._inline:
2570 # offset is "as if" it were in the .d file, so we need to add on
2571 # the size of the entry metadata.
2572 self._concurrencychecker(
2573 ifh, self._indexfile, offset + curr * self.index.entry_size
2574 )
2575 else:
2576 # Entries in the .i are a consistent size.
2577 self._concurrencychecker(
2578 ifh, self._indexfile, curr * self.index.entry_size
2579 )
2580 self._concurrencychecker(dfh, self._datafile, offset)
2581
2582 p1r, p2r = self.rev(p1), self.rev(p2)
2583
2584 # full versions are inserted when the needed deltas
2585 # become comparable to the uncompressed text
2586 if rawtext is None:
2587 # need rawtext size, before changed by flag processors, which is
2588 # the non-raw size. use revlog explicitly to avoid filelog's extra
2589 # logic that might remove metadata size.
2590 textlen = mdiff.patchedsize(
2591 revlog.size(self, cachedelta[0]), cachedelta[1]
2592 )
2593 else:
2594 textlen = len(rawtext)
2595
2596 if deltacomputer is None:
2597 deltacomputer = deltautil.deltacomputer(self)
2598
2599 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2600
2601 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2602
2603 compression_mode = COMP_MODE_INLINE
2604 if self._docket is not None:
2605 h, d = deltainfo.data
2606 if not h and not d:
2607 # not data to store at all... declare them uncompressed
2608 compression_mode = COMP_MODE_PLAIN
2609 elif not h:
2610 t = d[0:1]
2611 if t == b'\0':
2612 compression_mode = COMP_MODE_PLAIN
2613 elif t == self._docket.default_compression_header:
2614 compression_mode = COMP_MODE_DEFAULT
2615 elif h == b'u':
2616 # we have a more efficient way to declare uncompressed
2617 h = b''
2618 compression_mode = COMP_MODE_PLAIN
2619 deltainfo = deltautil.drop_u_compression(deltainfo)
2620
2621 sidedata_compression_mode = COMP_MODE_INLINE
2622 if sidedata and self.hassidedata:
2623 sidedata_compression_mode = COMP_MODE_PLAIN
2624 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2625 sidedata_offset = self._docket.sidedata_end
2626 h, comp_sidedata = self.compress(serialized_sidedata)
2627 if (
2628 h != b'u'
2629 and comp_sidedata[0:1] != b'\0'
2630 and len(comp_sidedata) < len(serialized_sidedata)
2631 ):
2632 assert not h
2633 if (
2634 comp_sidedata[0:1]
2635 == self._docket.default_compression_header
2636 ):
2637 sidedata_compression_mode = COMP_MODE_DEFAULT
2638 serialized_sidedata = comp_sidedata
2639 else:
2640 sidedata_compression_mode = COMP_MODE_INLINE
2641 serialized_sidedata = comp_sidedata
2642 else:
2643 serialized_sidedata = b""
2644 # Don't store the offset if the sidedata is empty, that way
2645 # we can easily detect empty sidedata and they will be no different
2646 # than ones we manually add.
2647 sidedata_offset = 0
2648
2649 e = (
2650 offset_type(offset, flags),
2651 deltainfo.deltalen,
2652 textlen,
2653 deltainfo.base,
2654 link,
2655 p1r,
2656 p2r,
2657 node,
2658 sidedata_offset,
2659 len(serialized_sidedata),
2660 compression_mode,
2661 sidedata_compression_mode,
2662 )
2663
2664 self.index.append(e)
2665 entry = self.index.entry_binary(curr)
2666 if curr == 0 and self._docket is None:
2667 header = self._format_flags | self._format_version
2668 header = self.index.pack_header(header)
2669 entry = header + entry
2670 self._writeentry(
2671 transaction,
2672 entry,
2673 deltainfo.data,
2674 link,
2675 offset,
2676 serialized_sidedata,
2677 sidedata_offset,
2678 )
2679
2680 rawtext = btext[0]
2681
2682 if alwayscache and rawtext is None:
2683 rawtext = deltacomputer.buildtext(revinfo, fh)
2684
2685 if type(rawtext) == bytes: # only accept immutable objects
2686 self._revisioncache = (node, curr, rawtext)
2687 self._chainbasecache[curr] = deltainfo.chainbase
2688 return curr
2689
2690 def _get_data_offset(self, prev):
2691 """Returns the current offset in the (in-transaction) data file.
2692 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2693 file to store that information: since sidedata can be rewritten to the
2694 end of the data file within a transaction, you can have cases where, for
2695 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2696 to `n - 1`'s sidedata being written after `n`'s data.
2697
2698 TODO cache this in a docket file before getting out of experimental."""
2699 if self._docket is None:
2700 return self.end(prev)
2701 else:
2702 return self._docket.data_end
2703
2704 def _writeentry(
2705 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2706 ):
2707 # Files opened in a+ mode have inconsistent behavior on various
2708 # platforms. Windows requires that a file positioning call be made
2709 # when the file handle transitions between reads and writes. See
2710 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2711 # platforms, Python or the platform itself can be buggy. Some versions
2712 # of Solaris have been observed to not append at the end of the file
2713 # if the file was seeked to before the end. See issue4943 for more.
2714 #
2715 # We work around this issue by inserting a seek() before writing.
2716 # Note: This is likely not necessary on Python 3. However, because
2717 # the file handle is reused for reads and may be seeked there, we need
2718 # to be careful before changing this.
2719 if self._writinghandles is None:
2720 msg = b'adding revision outside `revlog._writing` context'
2721 raise error.ProgrammingError(msg)
2722 ifh, dfh, sdfh = self._writinghandles
2723 if self._docket is None:
2724 ifh.seek(0, os.SEEK_END)
2725 else:
2726 ifh.seek(self._docket.index_end, os.SEEK_SET)
2727 if dfh:
2728 if self._docket is None:
2729 dfh.seek(0, os.SEEK_END)
2730 else:
2731 dfh.seek(self._docket.data_end, os.SEEK_SET)
2732 if sdfh:
2733 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2734
2735 curr = len(self) - 1
2736 if not self._inline:
2737 transaction.add(self._datafile, offset)
2738 if self._sidedatafile:
2739 transaction.add(self._sidedatafile, sidedata_offset)
2740 transaction.add(self._indexfile, curr * len(entry))
2741 if data[0]:
2742 dfh.write(data[0])
2743 dfh.write(data[1])
2744 if sidedata:
2745 sdfh.write(sidedata)
2746 ifh.write(entry)
2747 else:
2748 offset += curr * self.index.entry_size
2749 transaction.add(self._indexfile, offset)
2750 ifh.write(entry)
2751 ifh.write(data[0])
2752 ifh.write(data[1])
2753 assert not sidedata
2754 self._enforceinlinesize(transaction)
2755 if self._docket is not None:
2756 self._docket.index_end = self._writinghandles[0].tell()
2757 self._docket.data_end = self._writinghandles[1].tell()
2758 self._docket.sidedata_end = self._writinghandles[2].tell()
2759
2760 nodemaputil.setup_persistent_nodemap(transaction, self)
2761
2762 def addgroup(
2763 self,
2764 deltas,
2765 linkmapper,
2766 transaction,
2767 alwayscache=False,
2768 addrevisioncb=None,
2769 duplicaterevisioncb=None,
2770 ):
2771 """
2772 add a delta group
2773
2774 given a set of deltas, add them to the revision log. the
2775 first delta is against its parent, which should be in our
2776 log, the rest are against the previous delta.
2777
2778 If ``addrevisioncb`` is defined, it will be called with arguments of
2779 this revlog and the node that was added.
2780 """
2781
2782 if self._adding_group:
2783 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2784
2785 self._adding_group = True
2786 empty = True
2787 try:
2788 with self._writing(transaction):
2789 deltacomputer = deltautil.deltacomputer(self)
2790 # loop through our set of deltas
2791 for data in deltas:
2792 (
2793 node,
2794 p1,
2795 p2,
2796 linknode,
2797 deltabase,
2798 delta,
2799 flags,
2800 sidedata,
2801 ) = data
2802 link = linkmapper(linknode)
2803 flags = flags or REVIDX_DEFAULT_FLAGS
2804
2805 rev = self.index.get_rev(node)
2806 if rev is not None:
2807 # this can happen if two branches make the same change
2808 self._nodeduplicatecallback(transaction, rev)
2809 if duplicaterevisioncb:
2810 duplicaterevisioncb(self, rev)
2811 empty = False
2812 continue
2813
2814 for p in (p1, p2):
2815 if not self.index.has_node(p):
2816 raise error.LookupError(
2817 p, self.radix, _(b'unknown parent')
2818 )
2819
2820 if not self.index.has_node(deltabase):
2821 raise error.LookupError(
2822 deltabase, self.display_id, _(b'unknown delta base')
2823 )
2824
2825 baserev = self.rev(deltabase)
2826
2827 if baserev != nullrev and self.iscensored(baserev):
2828 # if base is censored, delta must be full replacement in a
2829 # single patch operation
2830 hlen = struct.calcsize(b">lll")
2831 oldlen = self.rawsize(baserev)
2832 newlen = len(delta) - hlen
2833 if delta[:hlen] != mdiff.replacediffheader(
2834 oldlen, newlen
2835 ):
2836 raise error.CensoredBaseError(
2837 self.display_id, self.node(baserev)
2838 )
2839
2840 if not flags and self._peek_iscensored(baserev, delta):
2841 flags |= REVIDX_ISCENSORED
2842
2843 # We assume consumers of addrevisioncb will want to retrieve
2844 # the added revision, which will require a call to
2845 # revision(). revision() will fast path if there is a cache
2846 # hit. So, we tell _addrevision() to always cache in this case.
2847 # We're only using addgroup() in the context of changegroup
2848 # generation so the revision data can always be handled as raw
2849 # by the flagprocessor.
2850 rev = self._addrevision(
2851 node,
2852 None,
2853 transaction,
2854 link,
2855 p1,
2856 p2,
2857 flags,
2858 (baserev, delta),
2859 alwayscache=alwayscache,
2860 deltacomputer=deltacomputer,
2861 sidedata=sidedata,
2862 )
2863
2864 if addrevisioncb:
2865 addrevisioncb(self, rev)
2866 empty = False
2867 finally:
2868 self._adding_group = False
2869 return not empty
2870
2871 def iscensored(self, rev):
2872 """Check if a file revision is censored."""
2873 if not self._censorable:
2874 return False
2875
2876 return self.flags(rev) & REVIDX_ISCENSORED
2877
2878 def _peek_iscensored(self, baserev, delta):
2879 """Quickly check if a delta produces a censored revision."""
2880 if not self._censorable:
2881 return False
2882
2883 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2884
2885 def getstrippoint(self, minlink):
2886 """find the minimum rev that must be stripped to strip the linkrev
2887
2888 Returns a tuple containing the minimum rev and a set of all revs that
2889 have linkrevs that will be broken by this strip.
2890 """
2891 return storageutil.resolvestripinfo(
2892 minlink,
2893 len(self) - 1,
2894 self.headrevs(),
2895 self.linkrev,
2896 self.parentrevs,
2897 )
2898
2899 def strip(self, minlink, transaction):
2900 """truncate the revlog on the first revision with a linkrev >= minlink
2901
2902 This function is called when we're stripping revision minlink and
2903 its descendants from the repository.
2904
2905 We have to remove all revisions with linkrev >= minlink, because
2906 the equivalent changelog revisions will be renumbered after the
2907 strip.
2908
2909 So we truncate the revlog on the first of these revisions, and
2910 trust that the caller has saved the revisions that shouldn't be
2911 removed and that it'll re-add them after this truncation.
2912 """
2913 if len(self) == 0:
2914 return
2915
2916 rev, _ = self.getstrippoint(minlink)
2917 if rev == len(self):
2918 return
2919
2920 # first truncate the files on disk
2921 data_end = self.start(rev)
2922 if not self._inline:
2923 transaction.add(self._datafile, data_end)
2924 end = rev * self.index.entry_size
2925 else:
2926 end = data_end + (rev * self.index.entry_size)
2927
2928 if self._sidedatafile:
2929 sidedata_end = self.sidedata_cut_off(rev)
2930 transaction.add(self._sidedatafile, sidedata_end)
2931
2932 transaction.add(self._indexfile, end)
2933 if self._docket is not None:
2934 # XXX we could, leverage the docket while stripping. However it is
2935 # not powerfull enough at the time of this comment
2936 self._docket.index_end = end
2937 self._docket.data_end = data_end
2938 self._docket.sidedata_end = sidedata_end
2939 self._docket.write(transaction, stripping=True)
2940
2941 # then reset internal state in memory to forget those revisions
2942 self._revisioncache = None
2943 self._chaininfocache = util.lrucachedict(500)
2944 self._chunkclear()
2945
2946 del self.index[rev:-1]
2947
2948 def checksize(self):
2949 """Check size of index and data files
2950
2951 return a (dd, di) tuple.
2952 - dd: extra bytes for the "data" file
2953 - di: extra bytes for the "index" file
2954
2955 A healthy revlog will return (0, 0).
2956 """
2957 expected = 0
2958 if len(self):
2959 expected = max(0, self.end(len(self) - 1))
2960
2961 try:
2962 with self._datafp() as f:
2963 f.seek(0, io.SEEK_END)
2964 actual = f.tell()
2965 dd = actual - expected
2966 except IOError as inst:
2967 if inst.errno != errno.ENOENT:
2968 raise
2969 dd = 0
2970
96
2971 try:
97 rl.opener.rename(newrl._indexfile, rl._indexfile)
2972 f = self.opener(self._indexfile)
98 if not rl._inline:
2973 f.seek(0, io.SEEK_END)
99 rl.opener.rename(newrl._datafile, rl._datafile)
2974 actual = f.tell()
2975 f.close()
2976 s = self.index.entry_size
2977 i = max(0, actual // s)
2978 di = actual - (i * s)
2979 if self._inline:
2980 databytes = 0
2981 for r in self:
2982 databytes += max(0, self.length(r))
2983 dd = 0
2984 di = actual - len(self) * s - databytes
2985 except IOError as inst:
2986 if inst.errno != errno.ENOENT:
2987 raise
2988 di = 0
2989
2990 return (dd, di)
2991
2992 def files(self):
2993 res = [self._indexfile]
2994 if not self._inline:
2995 res.append(self._datafile)
2996 return res
2997
2998 def emitrevisions(
2999 self,
3000 nodes,
3001 nodesorder=None,
3002 revisiondata=False,
3003 assumehaveparentrevisions=False,
3004 deltamode=repository.CG_DELTAMODE_STD,
3005 sidedata_helpers=None,
3006 ):
3007 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3008 raise error.ProgrammingError(
3009 b'unhandled value for nodesorder: %s' % nodesorder
3010 )
3011
3012 if nodesorder is None and not self._generaldelta:
3013 nodesorder = b'storage'
3014
3015 if (
3016 not self._storedeltachains
3017 and deltamode != repository.CG_DELTAMODE_PREV
3018 ):
3019 deltamode = repository.CG_DELTAMODE_FULL
3020
3021 return storageutil.emitrevisions(
3022 self,
3023 nodes,
3024 nodesorder,
3025 revlogrevisiondelta,
3026 deltaparentfn=self.deltaparent,
3027 candeltafn=self.candelta,
3028 rawsizefn=self.rawsize,
3029 revdifffn=self.revdiff,
3030 flagsfn=self.flags,
3031 deltamode=deltamode,
3032 revisiondata=revisiondata,
3033 assumehaveparentrevisions=assumehaveparentrevisions,
3034 sidedata_helpers=sidedata_helpers,
3035 )
3036
3037 DELTAREUSEALWAYS = b'always'
3038 DELTAREUSESAMEREVS = b'samerevs'
3039 DELTAREUSENEVER = b'never'
3040
3041 DELTAREUSEFULLADD = b'fulladd'
3042
3043 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3044
3045 def clone(
3046 self,
3047 tr,
3048 destrevlog,
3049 addrevisioncb=None,
3050 deltareuse=DELTAREUSESAMEREVS,
3051 forcedeltabothparents=None,
3052 sidedata_helpers=None,
3053 ):
3054 """Copy this revlog to another, possibly with format changes.
3055
3056 The destination revlog will contain the same revisions and nodes.
3057 However, it may not be bit-for-bit identical due to e.g. delta encoding
3058 differences.
3059
3060 The ``deltareuse`` argument control how deltas from the existing revlog
3061 are preserved in the destination revlog. The argument can have the
3062 following values:
3063
3064 DELTAREUSEALWAYS
3065 Deltas will always be reused (if possible), even if the destination
3066 revlog would not select the same revisions for the delta. This is the
3067 fastest mode of operation.
3068 DELTAREUSESAMEREVS
3069 Deltas will be reused if the destination revlog would pick the same
3070 revisions for the delta. This mode strikes a balance between speed
3071 and optimization.
3072 DELTAREUSENEVER
3073 Deltas will never be reused. This is the slowest mode of execution.
3074 This mode can be used to recompute deltas (e.g. if the diff/delta
3075 algorithm changes).
3076 DELTAREUSEFULLADD
3077 Revision will be re-added as if their were new content. This is
3078 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3079 eg: large file detection and handling.
3080
3081 Delta computation can be slow, so the choice of delta reuse policy can
3082 significantly affect run time.
3083
3084 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3085 two extremes. Deltas will be reused if they are appropriate. But if the
3086 delta could choose a better revision, it will do so. This means if you
3087 are converting a non-generaldelta revlog to a generaldelta revlog,
3088 deltas will be recomputed if the delta's parent isn't a parent of the
3089 revision.
3090
3091 In addition to the delta policy, the ``forcedeltabothparents``
3092 argument controls whether to force compute deltas against both parents
3093 for merges. By default, the current default is used.
3094
3095 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3096 `sidedata_helpers`.
3097 """
3098 if deltareuse not in self.DELTAREUSEALL:
3099 raise ValueError(
3100 _(b'value for deltareuse invalid: %s') % deltareuse
3101 )
3102
3103 if len(destrevlog):
3104 raise ValueError(_(b'destination revlog is not empty'))
3105
3106 if getattr(self, 'filteredrevs', None):
3107 raise ValueError(_(b'source revlog has filtered revisions'))
3108 if getattr(destrevlog, 'filteredrevs', None):
3109 raise ValueError(_(b'destination revlog has filtered revisions'))
3110
3111 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3112 # if possible.
3113 oldlazydelta = destrevlog._lazydelta
3114 oldlazydeltabase = destrevlog._lazydeltabase
3115 oldamd = destrevlog._deltabothparents
3116
3117 try:
3118 if deltareuse == self.DELTAREUSEALWAYS:
3119 destrevlog._lazydeltabase = True
3120 destrevlog._lazydelta = True
3121 elif deltareuse == self.DELTAREUSESAMEREVS:
3122 destrevlog._lazydeltabase = False
3123 destrevlog._lazydelta = True
3124 elif deltareuse == self.DELTAREUSENEVER:
3125 destrevlog._lazydeltabase = False
3126 destrevlog._lazydelta = False
3127
3128 destrevlog._deltabothparents = forcedeltabothparents or oldamd
3129
3130 self._clone(
3131 tr,
3132 destrevlog,
3133 addrevisioncb,
3134 deltareuse,
3135 forcedeltabothparents,
3136 sidedata_helpers,
3137 )
3138
3139 finally:
3140 destrevlog._lazydelta = oldlazydelta
3141 destrevlog._lazydeltabase = oldlazydeltabase
3142 destrevlog._deltabothparents = oldamd
3143
3144 def _clone(
3145 self,
3146 tr,
3147 destrevlog,
3148 addrevisioncb,
3149 deltareuse,
3150 forcedeltabothparents,
3151 sidedata_helpers,
3152 ):
3153 """perform the core duty of `revlog.clone` after parameter processing"""
3154 deltacomputer = deltautil.deltacomputer(destrevlog)
3155 index = self.index
3156 for rev in self:
3157 entry = index[rev]
3158
3159 # Some classes override linkrev to take filtered revs into
3160 # account. Use raw entry from index.
3161 flags = entry[0] & 0xFFFF
3162 linkrev = entry[4]
3163 p1 = index[entry[5]][7]
3164 p2 = index[entry[6]][7]
3165 node = entry[7]
3166
3167 # (Possibly) reuse the delta from the revlog if allowed and
3168 # the revlog chunk is a delta.
3169 cachedelta = None
3170 rawtext = None
3171 if deltareuse == self.DELTAREUSEFULLADD:
3172 text = self._revisiondata(rev)
3173 sidedata = self.sidedata(rev)
3174
3175 if sidedata_helpers is not None:
3176 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3177 self, sidedata_helpers, sidedata, rev
3178 )
3179 flags = flags | new_flags[0] & ~new_flags[1]
3180
3181 destrevlog.addrevision(
3182 text,
3183 tr,
3184 linkrev,
3185 p1,
3186 p2,
3187 cachedelta=cachedelta,
3188 node=node,
3189 flags=flags,
3190 deltacomputer=deltacomputer,
3191 sidedata=sidedata,
3192 )
3193 else:
3194 if destrevlog._lazydelta:
3195 dp = self.deltaparent(rev)
3196 if dp != nullrev:
3197 cachedelta = (dp, bytes(self._chunk(rev)))
3198
3199 sidedata = None
3200 if not cachedelta:
3201 rawtext = self._revisiondata(rev)
3202 sidedata = self.sidedata(rev)
3203 if sidedata is None:
3204 sidedata = self.sidedata(rev)
3205
3206 if sidedata_helpers is not None:
3207 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3208 self, sidedata_helpers, sidedata, rev
3209 )
3210 flags = flags | new_flags[0] & ~new_flags[1]
3211
3212 with destrevlog._writing(tr):
3213 destrevlog._addrevision(
3214 node,
3215 rawtext,
3216 tr,
3217 linkrev,
3218 p1,
3219 p2,
3220 flags,
3221 cachedelta,
3222 deltacomputer=deltacomputer,
3223 sidedata=sidedata,
3224 )
3225
3226 if addrevisioncb:
3227 addrevisioncb(self, rev, node)
3228
3229 def censorrevision(self, tr, censornode, tombstone=b''):
3230 if self._format_version == REVLOGV0:
3231 raise error.RevlogError(
3232 _(b'cannot censor with version %d revlogs')
3233 % self._format_version
3234 )
3235
3236 censorrev = self.rev(censornode)
3237 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
3238
3239 if len(tombstone) > self.rawsize(censorrev):
3240 raise error.Abort(
3241 _(b'censor tombstone must be no longer than censored data')
3242 )
3243
100
3244 # Rewriting the revlog in place is hard. Our strategy for censoring is
101 rl.clearcaches()
3245 # to create a new revlog, copy all revisions to it, then replace the
102 rl._loadindex()
3246 # revlogs on transaction close.
3247 #
3248 # This is a bit dangerous. We could easily have a mismatch of state.
3249 newrl = revlog(
3250 self.opener,
3251 target=self.target,
3252 radix=self.radix,
3253 postfix=b'tmpcensored',
3254 censorable=True,
3255 )
3256 newrl._format_version = self._format_version
3257 newrl._format_flags = self._format_flags
3258 newrl._generaldelta = self._generaldelta
3259 newrl._parse_index = self._parse_index
3260
3261 for rev in self.revs():
3262 node = self.node(rev)
3263 p1, p2 = self.parents(node)
3264
3265 if rev == censorrev:
3266 newrl.addrawrevision(
3267 tombstone,
3268 tr,
3269 self.linkrev(censorrev),
3270 p1,
3271 p2,
3272 censornode,
3273 REVIDX_ISCENSORED,
3274 )
3275
3276 if newrl.deltaparent(rev) != nullrev:
3277 raise error.Abort(
3278 _(
3279 b'censored revision stored as delta; '
3280 b'cannot censor'
3281 ),
3282 hint=_(
3283 b'censoring of revlogs is not '
3284 b'fully implemented; please report '
3285 b'this bug'
3286 ),
3287 )
3288 continue
3289
3290 if self.iscensored(rev):
3291 if self.deltaparent(rev) != nullrev:
3292 raise error.Abort(
3293 _(
3294 b'cannot censor due to censored '
3295 b'revision having delta stored'
3296 )
3297 )
3298 rawtext = self._chunk(rev)
3299 else:
3300 rawtext = self.rawdata(rev)
3301
3302 newrl.addrawrevision(
3303 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3304 )
3305
3306 tr.addbackup(self._indexfile, location=b'store')
3307 if not self._inline:
3308 tr.addbackup(self._datafile, location=b'store')
3309
3310 self.opener.rename(newrl._indexfile, self._indexfile)
3311 if not self._inline:
3312 self.opener.rename(newrl._datafile, self._datafile)
3313
3314 self.clearcaches()
3315 self._loadindex()
3316
3317 def verifyintegrity(self, state):
3318 """Verifies the integrity of the revlog.
3319
3320 Yields ``revlogproblem`` instances describing problems that are
3321 found.
3322 """
3323 dd, di = self.checksize()
3324 if dd:
3325 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3326 if di:
3327 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3328
3329 version = self._format_version
3330
3331 # The verifier tells us what version revlog we should be.
3332 if version != state[b'expectedversion']:
3333 yield revlogproblem(
3334 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3335 % (self.display_id, version, state[b'expectedversion'])
3336 )
3337
3338 state[b'skipread'] = set()
3339 state[b'safe_renamed'] = set()
3340
3341 for rev in self:
3342 node = self.node(rev)
3343
3344 # Verify contents. 4 cases to care about:
3345 #
3346 # common: the most common case
3347 # rename: with a rename
3348 # meta: file content starts with b'\1\n', the metadata
3349 # header defined in filelog.py, but without a rename
3350 # ext: content stored externally
3351 #
3352 # More formally, their differences are shown below:
3353 #
3354 # | common | rename | meta | ext
3355 # -------------------------------------------------------
3356 # flags() | 0 | 0 | 0 | not 0
3357 # renamed() | False | True | False | ?
3358 # rawtext[0:2]=='\1\n'| False | True | True | ?
3359 #
3360 # "rawtext" means the raw text stored in revlog data, which
3361 # could be retrieved by "rawdata(rev)". "text"
3362 # mentioned below is "revision(rev)".
3363 #
3364 # There are 3 different lengths stored physically:
3365 # 1. L1: rawsize, stored in revlog index
3366 # 2. L2: len(rawtext), stored in revlog data
3367 # 3. L3: len(text), stored in revlog data if flags==0, or
3368 # possibly somewhere else if flags!=0
3369 #
3370 # L1 should be equal to L2. L3 could be different from them.
3371 # "text" may or may not affect commit hash depending on flag
3372 # processors (see flagutil.addflagprocessor).
3373 #
3374 # | common | rename | meta | ext
3375 # -------------------------------------------------
3376 # rawsize() | L1 | L1 | L1 | L1
3377 # size() | L1 | L2-LM | L1(*) | L1 (?)
3378 # len(rawtext) | L2 | L2 | L2 | L2
3379 # len(text) | L2 | L2 | L2 | L3
3380 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3381 #
3382 # LM: length of metadata, depending on rawtext
3383 # (*): not ideal, see comment in filelog.size
3384 # (?): could be "- len(meta)" if the resolved content has
3385 # rename metadata
3386 #
3387 # Checks needed to be done:
3388 # 1. length check: L1 == L2, in all cases.
3389 # 2. hash check: depending on flag processor, we may need to
3390 # use either "text" (external), or "rawtext" (in revlog).
3391
3392 try:
3393 skipflags = state.get(b'skipflags', 0)
3394 if skipflags:
3395 skipflags &= self.flags(rev)
3396
3397 _verify_revision(self, skipflags, state, node)
3398
3399 l1 = self.rawsize(rev)
3400 l2 = len(self.rawdata(node))
3401
3402 if l1 != l2:
3403 yield revlogproblem(
3404 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3405 node=node,
3406 )
3407
3408 except error.CensoredNodeError:
3409 if state[b'erroroncensored']:
3410 yield revlogproblem(
3411 error=_(b'censored file data'), node=node
3412 )
3413 state[b'skipread'].add(node)
3414 except Exception as e:
3415 yield revlogproblem(
3416 error=_(b'unpacking %s: %s')
3417 % (short(node), stringutil.forcebytestr(e)),
3418 node=node,
3419 )
3420 state[b'skipread'].add(node)
3421
3422 def storageinfo(
3423 self,
3424 exclusivefiles=False,
3425 sharedfiles=False,
3426 revisionscount=False,
3427 trackedsize=False,
3428 storedsize=False,
3429 ):
3430 d = {}
3431
3432 if exclusivefiles:
3433 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
3434 if not self._inline:
3435 d[b'exclusivefiles'].append((self.opener, self._datafile))
3436
3437 if sharedfiles:
3438 d[b'sharedfiles'] = []
3439
3440 if revisionscount:
3441 d[b'revisionscount'] = len(self)
3442
3443 if trackedsize:
3444 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3445
3446 if storedsize:
3447 d[b'storedsize'] = sum(
3448 self.opener.stat(path).st_size for path in self.files()
3449 )
3450
3451 return d
3452
3453 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3454 if not self.hassidedata:
3455 return
3456 # revlog formats with sidedata support does not support inline
3457 assert not self._inline
3458 if not helpers[1] and not helpers[2]:
3459 # Nothing to generate or remove
3460 return
3461
3462 new_entries = []
3463 # append the new sidedata
3464 with self._writing(transaction):
3465 ifh, dfh, sdfh = self._writinghandles
3466 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3467
3468 current_offset = sdfh.tell()
3469 for rev in range(startrev, endrev + 1):
3470 entry = self.index[rev]
3471 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
3472 store=self,
3473 sidedata_helpers=helpers,
3474 sidedata={},
3475 rev=rev,
3476 )
3477
3478 serialized_sidedata = sidedatautil.serialize_sidedata(
3479 new_sidedata
3480 )
3481
3482 sidedata_compression_mode = COMP_MODE_INLINE
3483 if serialized_sidedata and self.hassidedata:
3484 sidedata_compression_mode = COMP_MODE_PLAIN
3485 h, comp_sidedata = self.compress(serialized_sidedata)
3486 if (
3487 h != b'u'
3488 and comp_sidedata[0] != b'\0'
3489 and len(comp_sidedata) < len(serialized_sidedata)
3490 ):
3491 assert not h
3492 if (
3493 comp_sidedata[0]
3494 == self._docket.default_compression_header
3495 ):
3496 sidedata_compression_mode = COMP_MODE_DEFAULT
3497 serialized_sidedata = comp_sidedata
3498 else:
3499 sidedata_compression_mode = COMP_MODE_INLINE
3500 serialized_sidedata = comp_sidedata
3501 if entry[8] != 0 or entry[9] != 0:
3502 # rewriting entries that already have sidedata is not
3503 # supported yet, because it introduces garbage data in the
3504 # revlog.
3505 msg = b"rewriting existing sidedata is not supported yet"
3506 raise error.Abort(msg)
3507
3508 # Apply (potential) flags to add and to remove after running
3509 # the sidedata helpers
3510 new_offset_flags = entry[0] | flags[0] & ~flags[1]
3511 entry_update = (
3512 current_offset,
3513 len(serialized_sidedata),
3514 new_offset_flags,
3515 sidedata_compression_mode,
3516 )
3517
3518 # the sidedata computation might have move the file cursors around
3519 sdfh.seek(current_offset, os.SEEK_SET)
3520 sdfh.write(serialized_sidedata)
3521 new_entries.append(entry_update)
3522 current_offset += len(serialized_sidedata)
3523 self._docket.sidedata_end = sdfh.tell()
3524
3525 # rewrite the new index entries
3526 ifh.seek(startrev * self.index.entry_size)
3527 for i, e in enumerate(new_entries):
3528 rev = startrev + i
3529 self.index.replace_sidedata_info(rev, *e)
3530 packed = self.index.entry_binary(rev)
3531 if rev == 0 and self._docket is None:
3532 header = self._format_flags | self._format_version
3533 header = self.index.pack_header(header)
3534 packed = header + packed
3535 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now