upstream/mercurial-mirror Commit - r48183:33d62691

revlog: move censoring code in a dedicated module...

marmoute -

r48183:33d62691 default

parent child

mercurial/revlog.py

0 +8 -80

             # revlog.py - storage back-end for mercurial
             # coding: utf8
             #
             # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
             #
             # This software may be used and distributed according to the terms of the
             # GNU General Public License version 2 or any later version.
             """Storage back-end for Mercurial.
             This provides efficient delta storage with O(1) retrieve and append
             and O(changes) merge between branches.
             """
             from __future__ import absolute_import
             import binascii
             import collections
             import contextlib
             import errno
             import io
             import os
             import struct
             import zlib
             # import stuff from node for others to import from revlog
             from .node import (
                 bin,
                 hex,
                 nullrev,
                 sha1nodeconstants,
                 short,
                 wdirrev,
             )
             from .i18n import _
             from .pycompat import getattr
             from .revlogutils.constants import (
                 ALL_KINDS,
                 CHANGELOGV2,
                 COMP_MODE_DEFAULT,
                 COMP_MODE_INLINE,
                 COMP_MODE_PLAIN,
                 FEATURES_BY_VERSION,
                 FLAG_GENERALDELTA,
                 FLAG_INLINE_DATA,
                 INDEX_HEADER,
                 KIND_CHANGELOG,
                 REVLOGV0,
                 REVLOGV1,
                 REVLOGV1_FLAGS,
                 REVLOGV2,
                 REVLOGV2_FLAGS,
                 REVLOG_DEFAULT_FLAGS,
                 REVLOG_DEFAULT_FORMAT,
                 REVLOG_DEFAULT_VERSION,
                 SUPPORTED_FLAGS,
             )
             from .revlogutils.flagutil import (
                 REVIDX_DEFAULT_FLAGS,
                 REVIDX_ELLIPSIS,
                 REVIDX_EXTSTORED,
                 REVIDX_FLAGS_ORDER,
                 REVIDX_HASCOPIESINFO,
                 REVIDX_ISCENSORED,
                 REVIDX_RAWTEXT_CHANGING_FLAGS,
             )
             from .thirdparty import attr
             from . import (
                 ancestor,
                 dagop,
                 error,
                 mdiff,
                 policy,
                 pycompat,
                 templatefilters,
                 util,
             )
             from .interfaces import (
                 repository,
                 util as interfaceutil,
             )
             from .revlogutils import (
+                censor,
                 deltas as deltautil,
                 docket as docketutil,
                 flagutil,
                 nodemap as nodemaputil,
                 revlogv0,
                 sidedata as sidedatautil,
             )
             from .utils import (
                 storageutil,
                 stringutil,
             )
             # blanked usage of all the name to prevent pyflakes constraints
             # We need these name available in the module for extensions.
             REVLOGV0
             REVLOGV1
             REVLOGV2
             FLAG_INLINE_DATA
             FLAG_GENERALDELTA
             REVLOG_DEFAULT_FLAGS
             REVLOG_DEFAULT_FORMAT
             REVLOG_DEFAULT_VERSION
             REVLOGV1_FLAGS
             REVLOGV2_FLAGS
             REVIDX_ISCENSORED
             REVIDX_ELLIPSIS
             REVIDX_HASCOPIESINFO
             REVIDX_EXTSTORED
             REVIDX_DEFAULT_FLAGS
             REVIDX_FLAGS_ORDER
             REVIDX_RAWTEXT_CHANGING_FLAGS
             parsers = policy.importmod('parsers')
             rustancestor = policy.importrust('ancestor')
             rustdagop = policy.importrust('dagop')
             rustrevlog = policy.importrust('revlog')
             # Aliased for performance.
             _zlibdecompress = zlib.decompress
             # max size of revlog with inline data
             _maxinline = 131072
             _chunksize = 1048576
             # Flag processors for REVIDX_ELLIPSIS.
             def ellipsisreadprocessor(rl, text):
                 return text, False
             def ellipsiswriteprocessor(rl, text):
                 return text, False
             def ellipsisrawprocessor(rl, text):
                 return False
             ellipsisprocessor = (
                 ellipsisreadprocessor,
                 ellipsiswriteprocessor,
                 ellipsisrawprocessor,
             )
             def offset_type(offset, type):
                 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
                     raise ValueError(b'unknown revlog index flags')
                 return int(int(offset) << 16 | type)
             def _verify_revision(rl, skipflags, state, node):
                 """Verify the integrity of the given revlog ``node`` while providing a hook
                 point for extensions to influence the operation."""
                 if skipflags:
                     state[b'skipread'].add(node)
                 else:
                     # Side-effect: read content and verify hash.
                     rl.revision(node)
             # True if a fast implementation for persistent-nodemap is available
             #
             # We also consider we have a "fast" implementation in "pure" python because
             # people using pure don't really have performance consideration (and a
             # wheelbarrow of other slowness source)
             HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
                 parsers, 'BaseIndexObject'
             )
             @attr.s(slots=True, frozen=True)
             class _revisioninfo(object):
                 """Information about a revision that allows building its fulltext
                 node:       expected hash of the revision
                 p1, p2:     parent revs of the revision
                 btext:      built text cache consisting of a one-element list
                 cachedelta: (baserev, uncompressed_delta) or None
                 flags:      flags associated to the revision storage
                 One of btext[0] or cachedelta must be set.
                 """
                 node = attr.ib()
                 p1 = attr.ib()
                 p2 = attr.ib()
                 btext = attr.ib()
                 textlen = attr.ib()
                 cachedelta = attr.ib()
                 flags = attr.ib()
             @interfaceutil.implementer(repository.irevisiondelta)
             @attr.s(slots=True)
             class revlogrevisiondelta(object):
                 node = attr.ib()
                 p1node = attr.ib()
                 p2node = attr.ib()
                 basenode = attr.ib()
                 flags = attr.ib()
                 baserevisionsize = attr.ib()
                 revision = attr.ib()
                 delta = attr.ib()
                 sidedata = attr.ib()
                 protocol_flags = attr.ib()
                 linknode = attr.ib(default=None)
             @interfaceutil.implementer(repository.iverifyproblem)
             @attr.s(frozen=True)
             class revlogproblem(object):
                 warning = attr.ib(default=None)
                 error = attr.ib(default=None)
                 node = attr.ib(default=None)
             def parse_index_v1(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline)
                 return index, cache
             def parse_index_v2(data, inline):
                 # call the C implementation to parse the index data
                 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
                 return index, cache
             def parse_index_cl_v2(data, inline):
                 # call the C implementation to parse the index data
                 assert not inline
                 from .pure.parsers import parse_index_cl_v2
                 index, cache = parse_index_cl_v2(data)
                 return index, cache
             if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
                 def parse_index_v1_nodemap(data, inline):
                     index, cache = parsers.parse_index_devel_nodemap(data, inline)
                     return index, cache
             else:
                 parse_index_v1_nodemap = None
             def parse_index_v1_mixed(data, inline):
                 index, cache = parse_index_v1(data, inline)
                 return rustrevlog.MixedIndex(index), cache
             # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
             # signed integer)
             _maxentrysize = 0x7FFFFFFF
             PARTIAL_READ_MSG = _(
                 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
             )
             FILE_TOO_SHORT_MSG = _(
                 b'cannot read from revlog %s;'
                 b'  expected %d bytes from offset %d, data size is %d'
             )
             class revlog(object):
                 """
                 the underlying revision storage object
                 A revlog consists of two parts, an index and the revision data.
                 The index is a file with a fixed record size containing
                 information on each revision, including its nodeid (hash), the
                 nodeids of its parents, the position and offset of its data within
                 the data file, and the revision it's based on. Finally, each entry
                 contains a linkrev entry that can serve as a pointer to external
                 data.
                 The revision data itself is a linear collection of data chunks.
                 Each chunk represents a revision and is usually represented as a
                 delta against the previous chunk. To bound lookup time, runs of
                 deltas are limited to about 2 times the length of the original
                 version data. This makes retrieval of a version proportional to
                 its size, or O(1) relative to the number of revisions.
                 Both pieces of the revlog are written to in an append-only
                 fashion, which means we never need to rewrite a file to insert or
                 remove data, and can use some simple techniques to avoid the need
                 for locking while reading.
                 If checkambig, indexfile is opened with checkambig=True at
                 writing, to avoid file stat ambiguity.
                 If mmaplargeindex is True, and an mmapindexthreshold is set, the
                 index will be mmapped rather than read if it is larger than the
                 configured threshold.
                 If censorable is True, the revlog can have censored revisions.
                 If `upperboundcomp` is not None, this is the expected maximal gain from
                 compression for the data content.
                 `concurrencychecker` is an optional function that receives 3 arguments: a
                 file handle, a filename, and an expected position. It should check whether
                 the current position in the file handle is valid, and log/warn/fail (by
                 raising).
                 Internal details
                 ----------------
                 A large part of the revlog logic deals with revisions' "index entries", tuple
                 objects that contains the same "items" whatever the revlog version.
                 Different versions will have different ways of storing these items (sometimes
                 not having them at all), but the tuple will always be the same. New fields
                 are usually added at the end to avoid breaking existing code that relies
                 on the existing order. The field are defined as follows:
                 [0] offset:
                         The byte index of the start of revision data chunk.
                         That value is shifted up by 16 bits. use "offset = field >> 16" to
                         retrieve it.
                     flags:
                         A flag field that carries special information or changes the behavior
                         of the revision. (see `REVIDX_*` constants for details)
                         The flag field only occupies the first 16 bits of this field,
                         use "flags = field & 0xFFFF" to retrieve the value.
                 [1] compressed length:
                         The size, in bytes, of the chunk on disk
                 [2] uncompressed length:
                         The size, in bytes, of the full revision once reconstructed.
                 [3] base rev:
                         Either the base of the revision delta chain (without general
                         delta), or the base of the delta (stored in the data chunk)
                         with general delta.
                 [4] link rev:
                         Changelog revision number of the changeset introducing this
                         revision.
                 [5] parent 1 rev:
                         Revision number of the first parent
                 [6] parent 2 rev:
                         Revision number of the second parent
                 [7] node id:
                         The node id of the current revision
                 [8] sidedata offset:
                         The byte index of the start of the revision's side-data chunk.
                 [9] sidedata chunk length:
                         The size, in bytes, of the revision's side-data chunk.
                 [10] data compression mode:
                         two bits that detail the way the data chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details). For revlog version 0 and
 this will always be COMP_MODE_INLINE.
                 [11] side-data compression mode:
                         two bits that detail the way the sidedata chunk is compressed on disk.
                         (see "COMP_MODE_*" constants for details)
                 """
                 _flagserrorclass = error.RevlogError
                 def __init__(
                     self,
                     opener,
                     target,
                     radix,
                     postfix=None,  # only exist for `tmpcensored` now
                     checkambig=False,
                     mmaplargeindex=False,
                     censorable=False,
                     upperboundcomp=None,
                     persistentnodemap=False,
                     concurrencychecker=None,
                     trypending=False,
                 ):
                     """
                     create a revlog object
                     opener is a function that abstracts the file opening operation
                     and can be used to implement COW semantics or the like.
                     `target`: a (KIND, ID) tuple that identify the content stored in
                     this revlog. It help the rest of the code to understand what the revlog
                     is about without having to resort to heuristic and index filename
                     analysis. Note: that this must be reliably be set by normal code, but
                     that test, debug, or performance measurement code might not set this to
                     accurate value.
                     """
                     self.upperboundcomp = upperboundcomp
                     self.radix = radix
                     self._docket_file = None
                     self._indexfile = None
                     self._datafile = None
                     self._sidedatafile = None
                     self._nodemap_file = None
                     self.postfix = postfix
                     self._trypending = trypending
                     self.opener = opener
                     if persistentnodemap:
                         self._nodemap_file = nodemaputil.get_nodemap_file(self)
                     assert target[0] in ALL_KINDS
                     assert len(target) == 2
                     self.target = target
                     #  When True, indexfile is opened with checkambig=True at writing, to
                     #  avoid file stat ambiguity.
                     self._checkambig = checkambig
                     self._mmaplargeindex = mmaplargeindex
                     self._censorable = censorable
                     # 3-tuple of (node, rev, text) for a raw revision.
                     self._revisioncache = None
                     # Maps rev to chain base rev.
                     self._chainbasecache = util.lrucachedict(100)
                     # 2-tuple of (offset, data) of raw data from the revlog at an offset.
                     self._chunkcache = (0, b'')
                     # How much data to read and cache into the raw revlog data cache.
                     self._chunkcachesize = 65536
                     self._maxchainlen = None
                     self._deltabothparents = True
                     self.index = None
                     self._docket = None
                     self._nodemap_docket = None
                     # Mapping of partial identifiers to full nodes.
                     self._pcache = {}
                     # Mapping of revision integer to full node.
                     self._compengine = b'zlib'
                     self._compengineopts = {}
                     self._maxdeltachainspan = -1
                     self._withsparseread = False
                     self._sparserevlog = False
                     self.hassidedata = False
                     self._srdensitythreshold = 0.50
                     self._srmingapsize = 262144
                     # Make copy of flag processors so each revlog instance can support
                     # custom flags.
                     self._flagprocessors = dict(flagutil.flagprocessors)
                     # 3-tuple of file handles being used for active writing.
                     self._writinghandles = None
                     # prevent nesting of addgroup
                     self._adding_group = None
                     self._loadindex()
                     self._concurrencychecker = concurrencychecker
                 def _init_opts(self):
                     """process options (from above/config) to setup associated default revlog mode
                     These values might be affected when actually reading on disk information.
                     The relevant values are returned for use in _loadindex().
                     * newversionflags:
                         version header to use if we need to create a new revlog
                     * mmapindexthreshold:
                         minimal index size for start to use mmap
                     * force_nodemap:
                         force the usage of a "development" version of the nodemap code
                     """
                     mmapindexthreshold = None
                     opts = self.opener.options
                     if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
                         new_header = CHANGELOGV2
                     elif b'revlogv2' in opts:
                         new_header = REVLOGV2
                     elif b'revlogv1' in opts:
                         new_header = REVLOGV1 | FLAG_INLINE_DATA
                         if b'generaldelta' in opts:
                             new_header |= FLAG_GENERALDELTA
                     elif b'revlogv0' in self.opener.options:
                         new_header = REVLOGV0
                     else:
                         new_header = REVLOG_DEFAULT_VERSION
                     if b'chunkcachesize' in opts:
                         self._chunkcachesize = opts[b'chunkcachesize']
                     if b'maxchainlen' in opts:
                         self._maxchainlen = opts[b'maxchainlen']
                     if b'deltabothparents' in opts:
                         self._deltabothparents = opts[b'deltabothparents']
                     self._lazydelta = bool(opts.get(b'lazydelta', True))
                     self._lazydeltabase = False
                     if self._lazydelta:
                         self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
                     if b'compengine' in opts:
                         self._compengine = opts[b'compengine']
                     if b'zlib.level' in opts:
                         self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
                     if b'zstd.level' in opts:
                         self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
                     if b'maxdeltachainspan' in opts:
                         self._maxdeltachainspan = opts[b'maxdeltachainspan']
                     if self._mmaplargeindex and b'mmapindexthreshold' in opts:
                         mmapindexthreshold = opts[b'mmapindexthreshold']
                     self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
                     withsparseread = bool(opts.get(b'with-sparse-read', False))
                     # sparse-revlog forces sparse-read
                     self._withsparseread = self._sparserevlog or withsparseread
                     if b'sparse-read-density-threshold' in opts:
                         self._srdensitythreshold = opts[b'sparse-read-density-threshold']
                     if b'sparse-read-min-gap-size' in opts:
                         self._srmingapsize = opts[b'sparse-read-min-gap-size']
                     if opts.get(b'enableellipsis'):
                         self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
                     # revlog v0 doesn't have flag processors
                     for flag, processor in pycompat.iteritems(
                         opts.get(b'flagprocessors', {})
                     ):
                         flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
                     if self._chunkcachesize <= 0:
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not greater than 0')
                             % self._chunkcachesize
                         )
                     elif self._chunkcachesize & (self._chunkcachesize - 1):
                         raise error.RevlogError(
                             _(b'revlog chunk cache size %r is not a power of 2')
                             % self._chunkcachesize
                         )
                     force_nodemap = opts.get(b'devel-force-nodemap', False)
                     return new_header, mmapindexthreshold, force_nodemap
                 def _get_data(self, filepath, mmap_threshold, size=None):
                     """return a file content with or without mmap
                     If the file is missing return the empty string"""
                     try:
                         with self.opener(filepath) as fp:
                             if mmap_threshold is not None:
                                 file_size = self.opener.fstat(fp).st_size
                                 if file_size >= mmap_threshold:
                                     if size is not None:
                                         # avoid potentiel mmap crash
                                         size = min(file_size, size)
                                     # TODO: should .close() to release resources without
                                     # relying on Python GC
                                     if size is None:
                                         return util.buffer(util.mmapread(fp))
                                     else:
                                         return util.buffer(util.mmapread(fp, size))
                             if size is None:
                                 return fp.read()
                             else:
                                 return fp.read(size)
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return b''
                 def _loadindex(self):
                     new_header, mmapindexthreshold, force_nodemap = self._init_opts()
                     if self.postfix is not None:
                         entry_point = b'%s.i.%s' % (self.radix, self.postfix)
                     elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
                         entry_point = b'%s.i.a' % self.radix
                     else:
                         entry_point = b'%s.i' % self.radix
                     entry_data = b''
                     self._initempty = True
                     entry_data = self._get_data(entry_point, mmapindexthreshold)
                     if len(entry_data) > 0:
                         header = INDEX_HEADER.unpack(entry_data[:4])[0]
                         self._initempty = False
                     else:
                         header = new_header
                     self._format_flags = header & ~0xFFFF
                     self._format_version = header & 0xFFFF
                     supported_flags = SUPPORTED_FLAGS.get(self._format_version)
                     if supported_flags is None:
                         msg = _(b'unknown version (%d) in revlog %s')
                         msg %= (self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     elif self._format_flags & ~supported_flags:
                         msg = _(b'unknown flags (%#04x) in version %d revlog %s')
                         display_flag = self._format_flags >> 16
                         msg %= (display_flag, self._format_version, self.display_id)
                         raise error.RevlogError(msg)
                     features = FEATURES_BY_VERSION[self._format_version]
                     self._inline = features[b'inline'](self._format_flags)
                     self._generaldelta = features[b'generaldelta'](self._format_flags)
                     self.hassidedata = features[b'sidedata']
                     if not features[b'docket']:
                         self._indexfile = entry_point
                         index_data = entry_data
                     else:
                         self._docket_file = entry_point
                         if self._initempty:
                             self._docket = docketutil.default_docket(self, header)
                         else:
                             self._docket = docketutil.parse_docket(
                                 self, entry_data, use_pending=self._trypending
                             )
                         self._indexfile = self._docket.index_filepath()
                         index_data = b''
                         index_size = self._docket.index_end
                         if index_size > 0:
                             index_data = self._get_data(
                                 self._indexfile, mmapindexthreshold, size=index_size
                             )
                             if len(index_data) < index_size:
                                 msg = _(b'too few index data for %s: got %d, expected %d')
                                 msg %= (self.display_id, len(index_data), index_size)
                                 raise error.RevlogError(msg)
                         self._inline = False
                         # generaldelta implied by version 2 revlogs.
                         self._generaldelta = True
                         # the logic for persistent nodemap will be dealt with within the
                         # main docket, so disable it for now.
                         self._nodemap_file = None
                     if self._docket is not None:
                         self._datafile = self._docket.data_filepath()
                         self._sidedatafile = self._docket.sidedata_filepath()
                     elif self.postfix is None:
                         self._datafile = b'%s.d' % self.radix
                     else:
                         self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
                     self.nodeconstants = sha1nodeconstants
                     self.nullid = self.nodeconstants.nullid
                     # sparse-revlog can't be on without general-delta (issue6056)
                     if not self._generaldelta:
                         self._sparserevlog = False
                     self._storedeltachains = True
                     devel_nodemap = (
                         self._nodemap_file
                         and force_nodemap
                         and parse_index_v1_nodemap is not None
                     )
                     use_rust_index = False
                     if rustrevlog is not None:
                         if self._nodemap_file is not None:
                             use_rust_index = True
                         else:
                             use_rust_index = self.opener.options.get(b'rust.index')
                     self._parse_index = parse_index_v1
                     if self._format_version == REVLOGV0:
                         self._parse_index = revlogv0.parse_index_v0
                     elif self._format_version == REVLOGV2:
                         self._parse_index = parse_index_v2
                     elif self._format_version == CHANGELOGV2:
                         self._parse_index = parse_index_cl_v2
                     elif devel_nodemap:
                         self._parse_index = parse_index_v1_nodemap
                     elif use_rust_index:
                         self._parse_index = parse_index_v1_mixed
                     try:
                         d = self._parse_index(index_data, self._inline)
                         index, _chunkcache = d
                         use_nodemap = (
                             not self._inline
                             and self._nodemap_file is not None
                             and util.safehasattr(index, 'update_nodemap_data')
                         )
                         if use_nodemap:
                             nodemap_data = nodemaputil.persisted_data(self)
                             if nodemap_data is not None:
                                 docket = nodemap_data[0]
                                 if (
                                     len(d[0]) > docket.tip_rev
                                     and d[0][docket.tip_rev][7] == docket.tip_node
                                 ):
                                     # no changelog tampering
                                     self._nodemap_docket = docket
                                     index.update_nodemap_data(*nodemap_data)
                     except (ValueError, IndexError):
                         raise error.RevlogError(
                             _(b"index %s is corrupted") % self.display_id
                         )
                     self.index, self._chunkcache = d
                     if not self._chunkcache:
                         self._chunkclear()
                     # revnum -> (chain-length, sum-delta-length)
                     self._chaininfocache = util.lrucachedict(500)
                     # revlog header -> revlog compressor
                     self._decompressors = {}
                 @util.propertycache
                 def revlog_kind(self):
                     return self.target[0]
                 @util.propertycache
                 def display_id(self):
                     """The public facing "ID" of the revlog that we use in message"""
                     # Maybe we should build a user facing representation of
                     # revlog.target instead of using `self.radix`
                     return self.radix
                 def _get_decompressor(self, t):
                     try:
                         compressor = self._decompressors[t]
                     except KeyError:
                         try:
                             engine = util.compengines.forrevlogheader(t)
                             compressor = engine.revlogcompressor(self._compengineopts)
                             self._decompressors[t] = compressor
                         except KeyError:
                             raise error.RevlogError(
                                 _(b'unknown compression type %s') % binascii.hexlify(t)
                             )
                     return compressor
                 @util.propertycache
                 def _compressor(self):
                     engine = util.compengines[self._compengine]
                     return engine.revlogcompressor(self._compengineopts)
                 @util.propertycache
                 def _decompressor(self):
                     """the default decompressor"""
                     if self._docket is None:
                         return None
                     t = self._docket.default_compression_header
                     c = self._get_decompressor(t)
                     return c.decompress
                 def _indexfp(self):
                     """file object for the revlog's index file"""
                     return self.opener(self._indexfile, mode=b"r")
                 def __index_write_fp(self):
                     # You should not use this directly and use `_writing` instead
                     try:
                         f = self.opener(
                             self._indexfile, mode=b"r+", checkambig=self._checkambig
                         )
                         if self._docket is None:
                             f.seek(0, os.SEEK_END)
                         else:
                             f.seek(self._docket.index_end, os.SEEK_SET)
                         return f
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         return self.opener(
                             self._indexfile, mode=b"w+", checkambig=self._checkambig
                         )
                 def __index_new_fp(self):
                     # You should not use this unless you are upgrading from inline revlog
                     return self.opener(
                         self._indexfile,
                         mode=b"w",
                         checkambig=self._checkambig,
                         atomictemp=True,
                     )
                 def _datafp(self, mode=b'r'):
                     """file object for the revlog's data file"""
                     return self.opener(self._datafile, mode=mode)
                 @contextlib.contextmanager
                 def _datareadfp(self, existingfp=None):
                     """file object suitable to read data"""
                     # Use explicit file handle, if given.
                     if existingfp is not None:
                         yield existingfp
                     # Use a file handle being actively used for writes, if available.
                     # There is some danger to doing this because reads will seek the
                     # file. However, _writeentry() performs a SEEK_END before all writes,
                     # so we should be safe.
                     elif self._writinghandles:
                         if self._inline:
                             yield self._writinghandles[0]
                         else:
                             yield self._writinghandles[1]
                     # Otherwise open a new file handle.
                     else:
                         if self._inline:
                             func = self._indexfp
                         else:
                             func = self._datafp
                         with func() as fp:
                             yield fp
                 @contextlib.contextmanager
                 def _sidedatareadfp(self):
                     """file object suitable to read sidedata"""
                     if self._writinghandles:
                         yield self._writinghandles[2]
                     else:
                         with self.opener(self._sidedatafile) as fp:
                             yield fp
                 def tiprev(self):
                     return len(self.index) - 1
                 def tip(self):
                     return self.node(self.tiprev())
                 def __contains__(self, rev):
                     return 0 <= rev < len(self)
                 def __len__(self):
                     return len(self.index)
                 def __iter__(self):
                     return iter(pycompat.xrange(len(self)))
                 def revs(self, start=0, stop=None):
                     """iterate over all rev in this revlog (from start to stop)"""
                     return storageutil.iterrevs(len(self), start=start, stop=stop)
                 @property
                 def nodemap(self):
                     msg = (
                         b"revlog.nodemap is deprecated, "
                         b"use revlog.index.[has_node|rev|get_rev]"
                     )
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 @property
                 def _nodecache(self):
                     msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
                     util.nouideprecwarn(msg, b'5.3', stacklevel=2)
                     return self.index.nodemap
                 def hasnode(self, node):
                     try:
                         self.rev(node)
                         return True
                     except KeyError:
                         return False
                 def candelta(self, baserev, rev):
                     """whether two revisions (baserev, rev) can be delta-ed or not"""
                     # Disable delta if either rev requires a content-changing flag
                     # processor (ex. LFS). This is because such flag processor can alter
                     # the rawtext content that the delta will be based on, and two clients
                     # could have a same revlog node with different flags (i.e. different
                     # rawtext contents) and the delta could be incompatible.
                     if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
                         self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
                     ):
                         return False
                     return True
                 def update_caches(self, transaction):
                     if self._nodemap_file is not None:
                         if transaction is None:
                             nodemaputil.update_persistent_nodemap(self)
                         else:
                             nodemaputil.setup_persistent_nodemap(transaction, self)
                 def clearcaches(self):
                     self._revisioncache = None
                     self._chainbasecache.clear()
                     self._chunkcache = (0, b'')
                     self._pcache = {}
                     self._nodemap_docket = None
                     self.index.clearcaches()
                     # The python code is the one responsible for validating the docket, we
                     # end up having to refresh it here.
                     use_nodemap = (
                         not self._inline
                         and self._nodemap_file is not None
                         and util.safehasattr(self.index, 'update_nodemap_data')
                     )
                     if use_nodemap:
                         nodemap_data = nodemaputil.persisted_data(self)
                         if nodemap_data is not None:
                             self._nodemap_docket = nodemap_data[0]
                             self.index.update_nodemap_data(*nodemap_data)
                 def rev(self, node):
                     try:
                         return self.index.rev(node)
                     except TypeError:
                         raise
                     except error.RevlogError:
                         # parsers.c radix tree lookup failed
                         if (
                             node == self.nodeconstants.wdirid
                             or node in self.nodeconstants.wdirfilenodeids
                         ):
                             raise error.WdirUnsupported
                         raise error.LookupError(node, self.display_id, _(b'no node'))
                 # Accessors for index entries.
                 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
                 # are flags.
                 def start(self, rev):
                     return int(self.index[rev][0] >> 16)
                 def sidedata_cut_off(self, rev):
                     sd_cut_off = self.index[rev][8]
                     if sd_cut_off != 0:
                         return sd_cut_off
                     # This is some annoying dance, because entries without sidedata
                     # currently use 0 as their ofsset. (instead of previous-offset +
                     # previous-size)
                     #
                     # We should reconsider this sidedata → 0 sidata_offset policy.
                     # In the meantime, we need this.
                     while 0 <= rev:
                         e = self.index[rev]
                         if e[9] != 0:
                             return e[8] + e[9]
                         rev -= 1
                     return 0
                 def flags(self, rev):
                     return self.index[rev][0] & 0xFFFF
                 def length(self, rev):
                     return self.index[rev][1]
                 def sidedata_length(self, rev):
                     if not self.hassidedata:
                         return 0
                     return self.index[rev][9]
                 def rawsize(self, rev):
                     """return the length of the uncompressed text for a given revision"""
                     l = self.index[rev][2]
                     if l >= 0:
                         return l
                     t = self.rawdata(rev)
                     return len(t)
                 def size(self, rev):
                     """length of non-raw text (processed by a "read" flag processor)"""
                     # fast path: if no "read" flag processor could change the content,
                     # size is rawsize. note: ELLIPSIS is known to not change the content.
                     flags = self.flags(rev)
                     if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
                         return self.rawsize(rev)
                     return len(self.revision(rev, raw=False))
                 def chainbase(self, rev):
                     base = self._chainbasecache.get(rev)
                     if base is not None:
                         return base
                     index = self.index
                     iterrev = rev
                     base = index[iterrev][3]
                     while base != iterrev:
                         iterrev = base
                         base = index[iterrev][3]
                     self._chainbasecache[rev] = base
                     return base
                 def linkrev(self, rev):
                     return self.index[rev][4]
                 def parentrevs(self, rev):
                     try:
                         entry = self.index[rev]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                     if entry[5] == nullrev:
                         return entry[6], entry[5]
                     else:
                         return entry[5], entry[6]
                 # fast parentrevs(rev) where rev isn't filtered
                 _uncheckedparentrevs = parentrevs
                 def node(self, rev):
                     try:
                         return self.index[rev][7]
                     except IndexError:
                         if rev == wdirrev:
                             raise error.WdirUnsupported
                         raise
                 # Derived from index values.
                 def end(self, rev):
                     return self.start(rev) + self.length(rev)
                 def parents(self, node):
                     i = self.index
                     d = i[self.rev(node)]
                     # inline node() to avoid function call overhead
                     if d[5] == self.nullid:
                         return i[d[6]][7], i[d[5]][7]
                     else:
                         return i[d[5]][7], i[d[6]][7]
                 def chainlen(self, rev):
                     return self._chaininfo(rev)[0]
                 def _chaininfo(self, rev):
                     chaininfocache = self._chaininfocache
                     if rev in chaininfocache:
                         return chaininfocache[rev]
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     clen = 0
                     compresseddeltalen = 0
                     while iterrev != e[3]:
                         clen += 1
                         compresseddeltalen += e[1]
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         if iterrev in chaininfocache:
                             t = chaininfocache[iterrev]
                             clen += t[0]
                             compresseddeltalen += t[1]
                             break
                         e = index[iterrev]
                     else:
                         # Add text length of base since decompressing that also takes
                         # work. For cache hits the length is already included.
                         compresseddeltalen += e[1]
                     r = (clen, compresseddeltalen)
                     chaininfocache[rev] = r
                     return r
                 def _deltachain(self, rev, stoprev=None):
                     """Obtain the delta chain for a revision.
                     ``stoprev`` specifies a revision to stop at. If not specified, we
                     stop at the base of the chain.
                     Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
                     revs in ascending order and ``stopped`` is a bool indicating whether
                     ``stoprev`` was hit.
                     """
                     # Try C implementation.
                     try:
                         return self.index.deltachain(rev, stoprev, self._generaldelta)
                     except AttributeError:
                         pass
                     chain = []
                     # Alias to prevent attribute lookup in tight loop.
                     index = self.index
                     generaldelta = self._generaldelta
                     iterrev = rev
                     e = index[iterrev]
                     while iterrev != e[3] and iterrev != stoprev:
                         chain.append(iterrev)
                         if generaldelta:
                             iterrev = e[3]
                         else:
                             iterrev -= 1
                         e = index[iterrev]
                     if iterrev == stoprev:
                         stopped = True
                     else:
                         chain.append(iterrev)
                         stopped = False
                     chain.reverse()
                     return chain, stopped
                 def ancestors(self, revs, stoprev=0, inclusive=False):
                     """Generate the ancestors of 'revs' in reverse revision order.
                     Does not generate revs lower than stoprev.
                     See the documentation for ancestor.lazyancestors for more details."""
                     # first, make sure start revisions aren't filtered
                     revs = list(revs)
                     checkrev = self.node
                     for r in revs:
                         checkrev(r)
                     # and we're sure ancestors aren't filtered as well
                     if rustancestor is not None and self.index.rust_ext_compat:
                         lazyancestors = rustancestor.LazyAncestors
                         arg = self.index
                     else:
                         lazyancestors = ancestor.lazyancestors
                         arg = self._uncheckedparentrevs
                     return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
                 def descendants(self, revs):
                     return dagop.descendantrevs(revs, self.revs, self.parentrevs)
                 def findcommonmissing(self, common=None, heads=None):
                     """Return a tuple of the ancestors of common and the ancestors of heads
                     that are not ancestors of common. In revset terminology, we return the
                     tuple:
                       ::common, (::heads) - (::common)
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     # we want the ancestors, but inclusive
                     class lazyset(object):
                         def __init__(self, lazyvalues):
                             self.addedvalues = set()
                             self.lazyvalues = lazyvalues
                         def __contains__(self, value):
                             return value in self.addedvalues or value in self.lazyvalues
                         def __iter__(self):
                             added = self.addedvalues
                             for r in added:
                                 yield r
                             for r in self.lazyvalues:
                                 if not r in added:
                                     yield r
                         def add(self, value):
                             self.addedvalues.add(value)
                         def update(self, values):
                             self.addedvalues.update(values)
                     has = lazyset(self.ancestors(common))
                     has.add(nullrev)
                     has.update(common)
                     # take all ancestors from heads that aren't in has
                     missing = set()
                     visit = collections.deque(r for r in heads if r not in has)
                     while visit:
                         r = visit.popleft()
                         if r in missing:
                             continue
                         else:
                             missing.add(r)
                             for p in self.parentrevs(r):
                                 if p not in has:
                                     visit.append(p)
                     missing = list(missing)
                     missing.sort()
                     return has, [self.node(miss) for miss in missing]
                 def incrementalmissingrevs(self, common=None):
                     """Return an object that can be used to incrementally compute the
                     revision numbers of the ancestors of arbitrary sets that are not
                     ancestors of common. This is an ancestor.incrementalmissingancestors
                     object.
                     'common' is a list of revision numbers. If common is not supplied, uses
                     nullrev.
                     """
                     if common is None:
                         common = [nullrev]
                     if rustancestor is not None and self.index.rust_ext_compat:
                         return rustancestor.MissingAncestors(self.index, common)
                     return ancestor.incrementalmissingancestors(self.parentrevs, common)
                 def findmissingrevs(self, common=None, heads=None):
                     """Return the revision numbers of the ancestors of heads that
                     are not ancestors of common.
                     More specifically, return a list of revision numbers corresponding to
                     nodes N such that every N satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of revision numbers.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [nullrev]
                     if heads is None:
                         heads = self.headrevs()
                     inc = self.incrementalmissingrevs(common=common)
                     return inc.missingancestors(heads)
                 def findmissing(self, common=None, heads=None):
                     """Return the ancestors of heads that are not ancestors of common.
                     More specifically, return a list of nodes N such that every N
                     satisfies the following constraints:
 . N is an ancestor of some node in 'heads'
 . N is not an ancestor of any node in 'common'
                     The list is sorted by revision number, meaning it is
                     topologically sorted.
                     'heads' and 'common' are both lists of node IDs.  If heads is
                     not supplied, uses all of the revlog's heads.  If common is not
                     supplied, uses nullid."""
                     if common is None:
                         common = [self.nullid]
                     if heads is None:
                         heads = self.heads()
                     common = [self.rev(n) for n in common]
                     heads = [self.rev(n) for n in heads]
                     inc = self.incrementalmissingrevs(common=common)
                     return [self.node(r) for r in inc.missingancestors(heads)]
                 def nodesbetween(self, roots=None, heads=None):
                     """Return a topological path from 'roots' to 'heads'.
                     Return a tuple (nodes, outroots, outheads) where 'nodes' is a
                     topologically sorted list of all nodes N that satisfy both of
                     these constraints:
 . N is a descendant of some node in 'roots'
 . N is an ancestor of some node in 'heads'
                     Every node is considered to be both a descendant and an ancestor
                     of itself, so every reachable node in 'roots' and 'heads' will be
                     included in 'nodes'.
                     'outroots' is the list of reachable nodes in 'roots', i.e., the
                     subset of 'roots' that is returned in 'nodes'.  Likewise,
                     'outheads' is the subset of 'heads' that is also in 'nodes'.
                     'roots' and 'heads' are both lists of node IDs.  If 'roots' is
                     unspecified, uses nullid as the only root.  If 'heads' is
                     unspecified, uses list of all of the revlog's heads."""
                     nonodes = ([], [], [])
                     if roots is not None:
                         roots = list(roots)
                         if not roots:
                             return nonodes
                         lowestrev = min([self.rev(n) for n in roots])
                     else:
                         roots = [self.nullid]  # Everybody's a descendant of nullid
                         lowestrev = nullrev
                     if (lowestrev == nullrev) and (heads is None):
                         # We want _all_ the nodes!
                         return (
                             [self.node(r) for r in self],
                             [self.nullid],
                             list(self.heads()),
                         )
                     if heads is None:
                         # All nodes are ancestors, so the latest ancestor is the last
                         # node.
                         highestrev = len(self) - 1
                         # Set ancestors to None to signal that every node is an ancestor.
                         ancestors = None
                         # Set heads to an empty dictionary for later discovery of heads
                         heads = {}
                     else:
                         heads = list(heads)
                         if not heads:
                             return nonodes
                         ancestors = set()
                         # Turn heads into a dictionary so we can remove 'fake' heads.
                         # Also, later we will be using it to filter out the heads we can't
                         # find from roots.
                         heads = dict.fromkeys(heads, False)
                         # Start at the top and keep marking parents until we're done.
                         nodestotag = set(heads)
                         # Remember where the top was so we can use it as a limit later.
                         highestrev = max([self.rev(n) for n in nodestotag])
                         while nodestotag:
                             # grab a node to tag
                             n = nodestotag.pop()
                             # Never tag nullid
                             if n == self.nullid:
                                 continue
                             # A node's revision number represents its place in a
                             # topologically sorted list of nodes.
                             r = self.rev(n)
                             if r >= lowestrev:
                                 if n not in ancestors:
                                     # If we are possibly a descendant of one of the roots
                                     # and we haven't already been marked as an ancestor
                                     ancestors.add(n)  # Mark as ancestor
                                     # Add non-nullid parents to list of nodes to tag.
                                     nodestotag.update(
                                         [p for p in self.parents(n) if p != self.nullid]
                                     )
                                 elif n in heads:  # We've seen it before, is it a fake head?
                                     # So it is, real heads should not be the ancestors of
                                     # any other heads.
                                     heads.pop(n)
                         if not ancestors:
                             return nonodes
                         # Now that we have our set of ancestors, we want to remove any
                         # roots that are not ancestors.
                         # If one of the roots was nullid, everything is included anyway.
                         if lowestrev > nullrev:
                             # But, since we weren't, let's recompute the lowest rev to not
                             # include roots that aren't ancestors.
                             # Filter out roots that aren't ancestors of heads
                             roots = [root for root in roots if root in ancestors]
                             # Recompute the lowest revision
                             if roots:
                                 lowestrev = min([self.rev(root) for root in roots])
                             else:
                                 # No more roots?  Return empty list
                                 return nonodes
                         else:
                             # We are descending from nullid, and don't need to care about
                             # any other roots.
                             lowestrev = nullrev
                             roots = [self.nullid]
                     # Transform our roots list into a set.
                     descendants = set(roots)
                     # Also, keep the original roots so we can filter out roots that aren't
                     # 'real' roots (i.e. are descended from other roots).
                     roots = descendants.copy()
                     # Our topologically sorted list of output nodes.
                     orderedout = []
                     # Don't start at nullid since we don't want nullid in our output list,
                     # and if nullid shows up in descendants, empty parents will look like
                     # they're descendants.
                     for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
                         n = self.node(r)
                         isdescendant = False
                         if lowestrev == nullrev:  # Everybody is a descendant of nullid
                             isdescendant = True
                         elif n in descendants:
                             # n is already a descendant
                             isdescendant = True
                             # This check only needs to be done here because all the roots
                             # will start being marked is descendants before the loop.
                             if n in roots:
                                 # If n was a root, check if it's a 'real' root.
                                 p = tuple(self.parents(n))
                                 # If any of its parents are descendants, it's not a root.
                                 if (p[0] in descendants) or (p[1] in descendants):
                                     roots.remove(n)
                         else:
                             p = tuple(self.parents(n))
                             # A node is a descendant if either of its parents are
                             # descendants.  (We seeded the dependents list with the roots
                             # up there, remember?)
                             if (p[0] in descendants) or (p[1] in descendants):
                                 descendants.add(n)
                                 isdescendant = True
                         if isdescendant and ((ancestors is None) or (n in ancestors)):
                             # Only include nodes that are both descendants and ancestors.
                             orderedout.append(n)
                             if (ancestors is not None) and (n in heads):
                                 # We're trying to figure out which heads are reachable
                                 # from roots.
                                 # Mark this head as having been reached
                                 heads[n] = True
                             elif ancestors is None:
                                 # Otherwise, we're trying to discover the heads.
                                 # Assume this is a head because if it isn't, the next step
                                 # will eventually remove it.
                                 heads[n] = True
                                 # But, obviously its parents aren't.
                                 for p in self.parents(n):
                                     heads.pop(p, None)
                     heads = [head for head, flag in pycompat.iteritems(heads) if flag]
                     roots = list(roots)
                     assert orderedout
                     assert roots
                     assert heads
                     return (orderedout, roots, heads)
                 def headrevs(self, revs=None):
                     if revs is None:
                         try:
                             return self.index.headrevs()
                         except AttributeError:
                             return self._headrevs()
                     if rustdagop is not None and self.index.rust_ext_compat:
                         return rustdagop.headrevs(self.index, revs)
                     return dagop.headrevs(revs, self._uncheckedparentrevs)
                 def computephases(self, roots):
                     return self.index.computephasesmapsets(roots)
                 def _headrevs(self):
                     count = len(self)
                     if not count:
                         return [nullrev]
                     # we won't iter over filtered rev so nobody is a head at start
                     ishead = [0] * (count + 1)
                     index = self.index
                     for r in self:
                         ishead[r] = 1  # I may be an head
                         e = index[r]
                         ishead[e[5]] = ishead[e[6]] = 0  # my parent are not
                     return [r for r, val in enumerate(ishead) if val]
                 def heads(self, start=None, stop=None):
                     """return the list of all nodes that have no children
                     if start is specified, only heads that are descendants of
                     start will be returned
                     if stop is specified, it will consider all the revs from stop
                     as if they had no children
                     """
                     if start is None and stop is None:
                         if not len(self):
                             return [self.nullid]
                         return [self.node(r) for r in self.headrevs()]
                     if start is None:
                         start = nullrev
                     else:
                         start = self.rev(start)
                     stoprevs = {self.rev(n) for n in stop or []}
                     revs = dagop.headrevssubset(
                         self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
                     )
                     return [self.node(rev) for rev in revs]
                 def children(self, node):
                     """find the children of a given node"""
                     c = []
                     p = self.rev(node)
                     for r in self.revs(start=p + 1):
                         prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
                         if prevs:
                             for pr in prevs:
                                 if pr == p:
                                     c.append(self.node(r))
                         elif p == nullrev:
                             c.append(self.node(r))
                     return c
                 def commonancestorsheads(self, a, b):
                     """calculate all the heads of the common ancestors of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     ancs = self._commonancestorsheads(a, b)
                     return pycompat.maplist(self.node, ancs)
                 def _commonancestorsheads(self, *revs):
                     """calculate all the heads of the common ancestors of revs"""
                     try:
                         ancs = self.index.commonancestorsheads(*revs)
                     except (AttributeError, OverflowError):  # C implementation failed
                         ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
                     return ancs
                 def isancestor(self, a, b):
                     """return True if node a is an ancestor of node b
                     A revision is considered an ancestor of itself."""
                     a, b = self.rev(a), self.rev(b)
                     return self.isancestorrev(a, b)
                 def isancestorrev(self, a, b):
                     """return True if revision a is an ancestor of revision b
                     A revision is considered an ancestor of itself.
                     The implementation of this is trivial but the use of
                     reachableroots is not."""
                     if a == nullrev:
                         return True
                     elif a == b:
                         return True
                     elif a > b:
                         return False
                     return bool(self.reachableroots(a, [b], [a], includepath=False))
                 def reachableroots(self, minroot, heads, roots, includepath=False):
                     """return (heads(::(<roots> and <roots>::<heads>)))
                     If includepath is True, return (<roots>::<heads>)."""
                     try:
                         return self.index.reachableroots2(
                             minroot, heads, roots, includepath
                         )
                     except AttributeError:
                         return dagop._reachablerootspure(
                             self.parentrevs, minroot, roots, heads, includepath
                         )
                 def ancestor(self, a, b):
                     """calculate the "best" common ancestor of nodes a and b"""
                     a, b = self.rev(a), self.rev(b)
                     try:
                         ancs = self.index.ancestors(a, b)
                     except (AttributeError, OverflowError):
                         ancs = ancestor.ancestors(self.parentrevs, a, b)
                     if ancs:
                         # choose a consistent winner when there's a tie
                         return min(map(self.node, ancs))
                     return self.nullid
                 def _match(self, id):
                     if isinstance(id, int):
                         # rev
                         return self.node(id)
                     if len(id) == self.nodeconstants.nodelen:
                         # possibly a binary node
                         # odds of a binary node being all hex in ASCII are 1 in 10**25
                         try:
                             node = id
                             self.rev(node)  # quick search the index
                             return node
                         except error.LookupError:
                             pass  # may be partial hex id
                     try:
                         # str(rev)
                         rev = int(id)
                         if b"%d" % rev != id:
                             raise ValueError
                         if rev < 0:
                             rev = len(self) + rev
                         if rev < 0 or rev >= len(self):
                             raise ValueError
                         return self.node(rev)
                     except (ValueError, OverflowError):
                         pass
                     if len(id) == 2 * self.nodeconstants.nodelen:
                         try:
                             # a full hex nodeid?
                             node = bin(id)
                             self.rev(node)
                             return node
                         except (TypeError, error.LookupError):
                             pass
                 def _partialmatch(self, id):
                     # we don't care wdirfilenodeids as they should be always full hash
                     maybewdir = self.nodeconstants.wdirhex.startswith(id)
                     ambiguous = False
                     try:
                         partial = self.index.partialmatch(id)
                         if partial and self.hasnode(partial):
                             if maybewdir:
                                 # single 'ff...' match in radix tree, ambiguous with wdir
                                 ambiguous = True
                             else:
                                 return partial
                         elif maybewdir:
                             # no 'ff...' match in radix tree, wdir identified
                             raise error.WdirUnsupported
                         else:
                             return None
                     except error.RevlogError:
                         # parsers.c radix tree lookup gave multiple matches
                         # fast path: for unfiltered changelog, radix tree is accurate
                         if not getattr(self, 'filteredrevs', None):
                             ambiguous = True
                         # fall through to slow path that filters hidden revisions
                     except (AttributeError, ValueError):
                         # we are pure python, or key was too short to search radix tree
                         pass
                     if ambiguous:
                         raise error.AmbiguousPrefixLookupError(
                             id, self.display_id, _(b'ambiguous identifier')
                         )
                     if id in self._pcache:
                         return self._pcache[id]
                     if len(id) <= 40:
                         try:
                             # hex(node)[:...]
                             l = len(id) // 2  # grab an even number of digits
                             prefix = bin(id[: l * 2])
                             nl = [e[7] for e in self.index if e[7].startswith(prefix)]
                             nl = [
                                 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
                             ]
                             if self.nodeconstants.nullhex.startswith(id):
                                 nl.append(self.nullid)
                             if len(nl) > 0:
                                 if len(nl) == 1 and not maybewdir:
                                     self._pcache[id] = nl[0]
                                     return nl[0]
                                 raise error.AmbiguousPrefixLookupError(
                                     id, self.display_id, _(b'ambiguous identifier')
                                 )
                             if maybewdir:
                                 raise error.WdirUnsupported
                             return None
                         except TypeError:
                             pass
                 def lookup(self, id):
                     """locate a node based on:
                     - revision number or str(revision number)
                     - nodeid or subset of hex nodeid
                     """
                     n = self._match(id)
                     if n is not None:
                         return n
                     n = self._partialmatch(id)
                     if n:
                         return n
                     raise error.LookupError(id, self.display_id, _(b'no match found'))
                 def shortest(self, node, minlength=1):
                     """Find the shortest unambiguous prefix that matches node."""
                     def isvalid(prefix):
                         try:
                             matchednode = self._partialmatch(prefix)
                         except error.AmbiguousPrefixLookupError:
                             return False
                         except error.WdirUnsupported:
                             # single 'ff...' match
                             return True
                         if matchednode is None:
                             raise error.LookupError(node, self.display_id, _(b'no node'))
                         return True
                     def maybewdir(prefix):
                         return all(c == b'f' for c in pycompat.iterbytestr(prefix))
                     hexnode = hex(node)
                     def disambiguate(hexnode, minlength):
                         """Disambiguate against wdirid."""
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if not maybewdir(prefix):
                                 return prefix
                     if not getattr(self, 'filteredrevs', None):
                         try:
                             length = max(self.index.shortest(node), minlength)
                             return disambiguate(hexnode, length)
                         except error.RevlogError:
                             if node != self.nodeconstants.wdirid:
                                 raise error.LookupError(
                                     node, self.display_id, _(b'no node')
                                 )
                         except AttributeError:
                             # Fall through to pure code
                             pass
                     if node == self.nodeconstants.wdirid:
                         for length in range(minlength, len(hexnode) + 1):
                             prefix = hexnode[:length]
                             if isvalid(prefix):
                                 return prefix
                     for length in range(minlength, len(hexnode) + 1):
                         prefix = hexnode[:length]
                         if isvalid(prefix):
                             return disambiguate(hexnode, length)
                 def cmp(self, node, text):
                     """compare text with a given file revision
                     returns True if text is different than what is stored.
                     """
                     p1, p2 = self.parents(node)
                     return storageutil.hashrevisionsha1(text, p1, p2) != node
                 def _cachesegment(self, offset, data):
                     """Add a segment to the revlog cache.
                     Accepts an absolute offset and the data that is at that location.
                     """
                     o, d = self._chunkcache
                     # try to add to existing cache
                     if o + len(d) == offset and len(d) + len(data) < _chunksize:
                         self._chunkcache = o, d + data
                     else:
                         self._chunkcache = offset, data
                 def _readsegment(self, offset, length, df=None):
                     """Load a segment of raw data from the revlog.
                     Accepts an absolute offset, length to read, and an optional existing
                     file handle to read from.
                     If an existing file handle is passed, it will be seeked and the
                     original seek position will NOT be restored.
                     Returns a str or buffer of raw byte data.
                     Raises if the requested number of bytes could not be read.
                     """
                     # Cache data both forward and backward around the requested
                     # data, in a fixed size window. This helps speed up operations
                     # involving reading the revlog backwards.
                     cachesize = self._chunkcachesize
                     realoffset = offset & ~(cachesize - 1)
                     reallength = (
                         (offset + length + cachesize) & ~(cachesize - 1)
                     ) - realoffset
                     with self._datareadfp(df) as df:
                         df.seek(realoffset)
                         d = df.read(reallength)
                     self._cachesegment(realoffset, d)
                     if offset != realoffset or reallength != length:
                         startoffset = offset - realoffset
                         if len(d) - startoffset < length:
                             filename = self._indexfile if self._inline else self._datafile
                             got = len(d) - startoffset
                             m = PARTIAL_READ_MSG % (filename, length, offset, got)
                             raise error.RevlogError(m)
                         return util.buffer(d, startoffset, length)
                     if len(d) < length:
                         filename = self._indexfile if self._inline else self._datafile
                         got = len(d) - startoffset
                         m = PARTIAL_READ_MSG % (filename, length, offset, got)
                         raise error.RevlogError(m)
                     return d
                 def _getsegment(self, offset, length, df=None):
                     """Obtain a segment of raw data from the revlog.
                     Accepts an absolute offset, length of bytes to obtain, and an
                     optional file handle to the already-opened revlog. If the file
                     handle is used, it's original seek position will not be preserved.
                     Requests for data may be returned from a cache.
                     Returns a str or a buffer instance of raw byte data.
                     """
                     o, d = self._chunkcache
                     l = len(d)
                     # is it in the cache?
                     cachestart = offset - o
                     cacheend = cachestart + length
                     if cachestart >= 0 and cacheend <= l:
                         if cachestart == 0 and cacheend == l:
                             return d  # avoid a copy
                         return util.buffer(d, cachestart, cacheend - cachestart)
                     return self._readsegment(offset, length, df=df)
                 def _getsegmentforrevs(self, startrev, endrev, df=None):
                     """Obtain a segment of raw data corresponding to a range of revisions.
                     Accepts the start and end revisions and an optional already-open
                     file handle to be used for reading. If the file handle is read, its
                     seek position will not be preserved.
                     Requests for data may be satisfied by a cache.
                     Returns a 2-tuple of (offset, data) for the requested range of
                     revisions. Offset is the integer offset from the beginning of the
                     revlog and data is a str or buffer of the raw byte data.
                     Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
                     to determine where each revision's data begins and ends.
                     """
                     # Inlined self.start(startrev) & self.end(endrev) for perf reasons
                     # (functions are expensive).
                     index = self.index
                     istart = index[startrev]
                     start = int(istart[0] >> 16)
                     if startrev == endrev:
                         end = start + istart[1]
                     else:
                         iend = index[endrev]
                         end = int(iend[0] >> 16) + iend[1]
                     if self._inline:
                         start += (startrev + 1) * self.index.entry_size
                         end += (endrev + 1) * self.index.entry_size
                     length = end - start
                     return start, self._getsegment(start, length, df=df)
                 def _chunk(self, rev, df=None):
                     """Obtain a single decompressed chunk for a revision.
                     Accepts an integer revision and an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will not
                     be preserved.
                     Returns a str holding uncompressed data for the requested revision.
                     """
                     compression_mode = self.index[rev][10]
                     data = self._getsegmentforrevs(rev, rev, df=df)[1]
                     if compression_mode == COMP_MODE_PLAIN:
                         return data
                     elif compression_mode == COMP_MODE_DEFAULT:
                         return self._decompressor(data)
                     elif compression_mode == COMP_MODE_INLINE:
                         return self.decompress(data)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= compression_mode
                         raise error.RevlogError(msg)
                 def _chunks(self, revs, df=None, targetsize=None):
                     """Obtain decompressed chunks for the specified revisions.
                     Accepts an iterable of numeric revisions that are assumed to be in
                     ascending order. Also accepts an optional already-open file handle
                     to be used for reading. If used, the seek position of the file will
                     not be preserved.
                     This function is similar to calling ``self._chunk()`` multiple times,
                     but is faster.
                     Returns a list with decompressed data for each requested revision.
                     """
                     if not revs:
                         return []
                     start = self.start
                     length = self.length
                     inline = self._inline
                     iosize = self.index.entry_size
                     buffer = util.buffer
                     l = []
                     ladd = l.append
                     if not self._withsparseread:
                         slicedchunks = (revs,)
                     else:
                         slicedchunks = deltautil.slicechunk(
                             self, revs, targetsize=targetsize
                         )
                     for revschunk in slicedchunks:
                         firstrev = revschunk[0]
                         # Skip trailing revisions with empty diff
                         for lastrev in revschunk[::-1]:
                             if length(lastrev) != 0:
                                 break
                         try:
                             offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
                         except OverflowError:
                             # issue4215 - we can't cache a run of chunks greater than
                             # 2G on Windows
                             return [self._chunk(rev, df=df) for rev in revschunk]
                         decomp = self.decompress
                         # self._decompressor might be None, but will not be used in that case
                         def_decomp = self._decompressor
                         for rev in revschunk:
                             chunkstart = start(rev)
                             if inline:
                                 chunkstart += (rev + 1) * iosize
                             chunklength = length(rev)
                             comp_mode = self.index[rev][10]
                             c = buffer(data, chunkstart - offset, chunklength)
                             if comp_mode == COMP_MODE_PLAIN:
                                 ladd(c)
                             elif comp_mode == COMP_MODE_INLINE:
                                 ladd(decomp(c))
                             elif comp_mode == COMP_MODE_DEFAULT:
                                 ladd(def_decomp(c))
                             else:
                                 msg = 'unknown compression mode %d'
                                 msg %= comp_mode
                                 raise error.RevlogError(msg)
                     return l
                 def _chunkclear(self):
                     """Clear the raw chunk cache."""
                     self._chunkcache = (0, b'')
                 def deltaparent(self, rev):
                     """return deltaparent of the given revision"""
                     base = self.index[rev][3]
                     if base == rev:
                         return nullrev
                     elif self._generaldelta:
                         return base
                     else:
                         return rev - 1
                 def issnapshot(self, rev):
                     """tells whether rev is a snapshot"""
                     if not self._sparserevlog:
                         return self.deltaparent(rev) == nullrev
                     elif util.safehasattr(self.index, b'issnapshot'):
                         # directly assign the method to cache the testing and access
                         self.issnapshot = self.index.issnapshot
                         return self.issnapshot(rev)
                     if rev == nullrev:
                         return True
                     entry = self.index[rev]
                     base = entry[3]
                     if base == rev:
                         return True
                     if base == nullrev:
                         return True
                     p1 = entry[5]
                     p2 = entry[6]
                     if base == p1 or base == p2:
                         return False
                     return self.issnapshot(base)
                 def snapshotdepth(self, rev):
                     """number of snapshot in the chain before this one"""
                     if not self.issnapshot(rev):
                         raise error.ProgrammingError(b'revision %d not a snapshot')
                     return len(self._deltachain(rev)[0]) - 1
                 def revdiff(self, rev1, rev2):
                     """return or calculate a delta between two revisions
                     The delta calculated is in binary form and is intended to be written to
                     revlog data directly. So this function needs raw revision data.
                     """
                     if rev1 != nullrev and self.deltaparent(rev2) == rev1:
                         return bytes(self._chunk(rev2))
                     return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
                 def _processflags(self, text, flags, operation, raw=False):
                     """deprecated entry point to access flag processors"""
                     msg = b'_processflag(...) use the specialized variant'
                     util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     if raw:
                         return text, flagutil.processflagsraw(self, text, flags)
                     elif operation == b'read':
                         return flagutil.processflagsread(self, text, flags)
                     else:  # write operation
                         return flagutil.processflagswrite(self, text, flags)
                 def revision(self, nodeorrev, _df=None, raw=False):
                     """return an uncompressed revision of a given node or revision
                     number.
                     _df - an existing file handle to read from. (internal-only)
                     raw - an optional argument specifying if the revision data is to be
                     treated as raw data when applying flag transforms. 'raw' should be set
                     to True when generating changegroups or in debug commands.
                     """
                     if raw:
                         msg = (
                             b'revlog.revision(..., raw=True) is deprecated, '
                             b'use revlog.rawdata(...)'
                         )
                         util.nouideprecwarn(msg, b'5.2', stacklevel=2)
                     return self._revisiondata(nodeorrev, _df, raw=raw)
                 def sidedata(self, nodeorrev, _df=None):
                     """a map of extra data related to the changeset but not part of the hash
                     This function currently return a dictionary. However, more advanced
                     mapping object will likely be used in the future for a more
                     efficient/lazy code.
                     """
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                     else:
                         rev = self.rev(nodeorrev)
                     return self._sidedata(rev)
                 def _revisiondata(self, nodeorrev, _df=None, raw=False):
                     # deal with <nodeorrev> argument type
                     if isinstance(nodeorrev, int):
                         rev = nodeorrev
                         node = self.node(rev)
                     else:
                         node = nodeorrev
                         rev = None
                     # fast path the special `nullid` rev
                     if node == self.nullid:
                         return b""
                     # ``rawtext`` is the text as stored inside the revlog. Might be the
                     # revision or might need to be processed to retrieve the revision.
                     rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
                     if raw and validated:
                         # if we don't want to process the raw text and that raw
                         # text is cached, we can exit early.
                         return rawtext
                     if rev is None:
                         rev = self.rev(node)
                     # the revlog's flag for this revision
                     # (usually alter its state or content)
                     flags = self.flags(rev)
                     if validated and flags == REVIDX_DEFAULT_FLAGS:
                         # no extra flags set, no flag processor runs, text = rawtext
                         return rawtext
                     if raw:
                         validatehash = flagutil.processflagsraw(self, rawtext, flags)
                         text = rawtext
                     else:
                         r = flagutil.processflagsread(self, rawtext, flags)
                         text, validatehash = r
                     if validatehash:
                         self.checkhash(text, node, rev=rev)
                     if not validated:
                         self._revisioncache = (node, rev, rawtext)
                     return text
                 def _rawtext(self, node, rev, _df=None):
                     """return the possibly unvalidated rawtext for a revision
                     returns (rev, rawtext, validated)
                     """
                     # revision in the cache (could be useful to apply delta)
                     cachedrev = None
                     # An intermediate text to apply deltas to
                     basetext = None
                     # Check if we have the entry in cache
                     # The cache entry looks like (node, rev, rawtext)
                     if self._revisioncache:
                         if self._revisioncache[0] == node:
                             return (rev, self._revisioncache[2], True)
                         cachedrev = self._revisioncache[1]
                     if rev is None:
                         rev = self.rev(node)
                     chain, stopped = self._deltachain(rev, stoprev=cachedrev)
                     if stopped:
                         basetext = self._revisioncache[2]
                     # drop cache to save memory, the caller is expected to
                     # update self._revisioncache after validating the text
                     self._revisioncache = None
                     targetsize = None
                     rawsize = self.index[rev][2]
                     if 0 <= rawsize:
                         targetsize = 4 * rawsize
                     bins = self._chunks(chain, df=_df, targetsize=targetsize)
                     if basetext is None:
                         basetext = bytes(bins[0])
                         bins = bins[1:]
                     rawtext = mdiff.patches(basetext, bins)
                     del basetext  # let us have a chance to free memory early
                     return (rev, rawtext, False)
                 def _sidedata(self, rev):
                     """Return the sidedata for a given revision number."""
                     index_entry = self.index[rev]
                     sidedata_offset = index_entry[8]
                     sidedata_size = index_entry[9]
                     if self._inline:
                         sidedata_offset += self.index.entry_size * (1 + rev)
                     if sidedata_size == 0:
                         return {}
                     # XXX this need caching, as we do for data
                     with self._sidedatareadfp() as sdf:
                         if self._docket.sidedata_end < sidedata_offset + sidedata_size:
                             filename = self._sidedatafile
                             end = self._docket.sidedata_end
                             offset = sidedata_offset
                             length = sidedata_size
                             m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
                             raise error.RevlogError(m)
                         sdf.seek(sidedata_offset, os.SEEK_SET)
                         comp_segment = sdf.read(sidedata_size)
                         if len(comp_segment) < sidedata_size:
                             filename = self._sidedatafile
                             length = sidedata_size
                             offset = sidedata_offset
                             got = len(comp_segment)
                             m = PARTIAL_READ_MSG % (filename, length, offset, got)
                             raise error.RevlogError(m)
                     comp = self.index[rev][11]
                     if comp == COMP_MODE_PLAIN:
                         segment = comp_segment
                     elif comp == COMP_MODE_DEFAULT:
                         segment = self._decompressor(comp_segment)
                     elif comp == COMP_MODE_INLINE:
                         segment = self.decompress(comp_segment)
                     else:
                         msg = 'unknown compression mode %d'
                         msg %= comp
                         raise error.RevlogError(msg)
                     sidedata = sidedatautil.deserialize_sidedata(segment)
                     return sidedata
                 def rawdata(self, nodeorrev, _df=None):
                     """return an uncompressed raw data of a given node or revision number.
                     _df - an existing file handle to read from. (internal-only)
                     """
                     return self._revisiondata(nodeorrev, _df, raw=True)
                 def hash(self, text, p1, p2):
                     """Compute a node hash.
                     Available as a function so that subclasses can replace the hash
                     as needed.
                     """
                     return storageutil.hashrevisionsha1(text, p1, p2)
                 def checkhash(self, text, node, p1=None, p2=None, rev=None):
                     """Check node hash integrity.
                     Available as a function so that subclasses can extend hash mismatch
                     behaviors as needed.
                     """
                     try:
                         if p1 is None and p2 is None:
                             p1, p2 = self.parents(node)
                         if node != self.hash(text, p1, p2):
                             # Clear the revision cache on hash failure. The revision cache
                             # only stores the raw revision and clearing the cache does have
                             # the side-effect that we won't have a cache hit when the raw
                             # revision data is accessed. But this case should be rare and
                             # it is extra work to teach the cache about the hash
                             # verification state.
                             if self._revisioncache and self._revisioncache[0] == node:
                                 self._revisioncache = None
                             revornode = rev
                             if revornode is None:
                                 revornode = templatefilters.short(hex(node))
                             raise error.RevlogError(
                                 _(b"integrity check failed on %s:%s")
                                 % (self.display_id, pycompat.bytestr(revornode))
                             )
                     except error.RevlogError:
                         if self._censorable and storageutil.iscensoredtext(text):
                             raise error.CensoredNodeError(self.display_id, node, text)
                         raise
                 def _enforceinlinesize(self, tr):
                     """Check if the revlog is too big for inline and convert if so.
                     This should be called after revisions are added to the revlog. If the
                     revlog has grown too large to be an inline revlog, it will convert it
                     to use multiple index and data files.
                     """
                     tiprev = len(self) - 1
                     total_size = self.start(tiprev) + self.length(tiprev)
                     if not self._inline or total_size < _maxinline:
                         return
                     troffset = tr.findoffset(self._indexfile)
                     if troffset is None:
                         raise error.RevlogError(
                             _(b"%s not found in the transaction") % self._indexfile
                         )
                     trindex = 0
                     tr.add(self._datafile, 0)
                     existing_handles = False
                     if self._writinghandles is not None:
                         existing_handles = True
                         fp = self._writinghandles[0]
                         fp.flush()
                         fp.close()
                         # We can't use the cached file handle after close(). So prevent
                         # its usage.
                         self._writinghandles = None
                     new_dfh = self._datafp(b'w+')
                     new_dfh.truncate(0)  # drop any potentially existing data
                     try:
                         with self._indexfp() as read_ifh:
                             for r in self:
                                 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
                                 if troffset <= self.start(r) + r * self.index.entry_size:
                                     trindex = r
                             new_dfh.flush()
                         with self.__index_new_fp() as fp:
                             self._format_flags &= ~FLAG_INLINE_DATA
                             self._inline = False
                             for i in self:
                                 e = self.index.entry_binary(i)
                                 if i == 0 and self._docket is None:
                                     header = self._format_flags | self._format_version
                                     header = self.index.pack_header(header)
                                     e = header + e
                                 fp.write(e)
                             if self._docket is not None:
                                 self._docket.index_end = fp.tell()
                             # There is a small transactional race here. If the rename of
                             # the index fails, we should remove the datafile. It is more
                             # important to ensure that the data file is not truncated
                             # when the index is replaced as otherwise data is lost.
                             tr.replace(self._datafile, self.start(trindex))
                             # the temp file replace the real index when we exit the context
                             # manager
                         tr.replace(self._indexfile, trindex * self.index.entry_size)
                         nodemaputil.setup_persistent_nodemap(tr, self)
                         self._chunkclear()
                         if existing_handles:
                             # switched from inline to conventional reopen the index
                             ifh = self.__index_write_fp()
                             self._writinghandles = (ifh, new_dfh, None)
                             new_dfh = None
                     finally:
                         if new_dfh is not None:
                             new_dfh.close()
                 def _nodeduplicatecallback(self, transaction, node):
                     """called when trying to add a node already stored."""
                 @contextlib.contextmanager
                 def _writing(self, transaction):
                     if self._trypending:
                         msg = b'try to write in a `trypending` revlog: %s'
                         msg %= self.display_id
                         raise error.ProgrammingError(msg)
                     if self._writinghandles is not None:
                         yield
                     else:
                         ifh = dfh = sdfh = None
                         try:
                             r = len(self)
                             # opening the data file.
                             dsize = 0
                             if r:
                                 dsize = self.end(r - 1)
                             dfh = None
                             if not self._inline:
                                 try:
                                     dfh = self._datafp(b"r+")
                                     if self._docket is None:
                                         dfh.seek(0, os.SEEK_END)
                                     else:
                                         dfh.seek(self._docket.data_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     dfh = self._datafp(b"w+")
                                 transaction.add(self._datafile, dsize)
                             if self._sidedatafile is not None:
                                 try:
                                     sdfh = self.opener(self._sidedatafile, mode=b"r+")
                                     dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                                 except IOError as inst:
                                     if inst.errno != errno.ENOENT:
                                         raise
                                     sdfh = self.opener(self._sidedatafile, mode=b"w+")
                                 transaction.add(
                                     self._sidedatafile, self._docket.sidedata_end
                                 )
                             # opening the index file.
                             isize = r * self.index.entry_size
                             ifh = self.__index_write_fp()
                             if self._inline:
                                 transaction.add(self._indexfile, dsize + isize)
                             else:
                                 transaction.add(self._indexfile, isize)
                             # exposing all file handle for writing.
                             self._writinghandles = (ifh, dfh, sdfh)
                             yield
                             if self._docket is not None:
                                 self._write_docket(transaction)
                         finally:
                             self._writinghandles = None
                             if dfh is not None:
                                 dfh.close()
                             if sdfh is not None:
                                 dfh.close()
                             # closing the index file last to avoid exposing referent to
                             # potential unflushed data content.
                             if ifh is not None:
                                 ifh.close()
                 def _write_docket(self, transaction):
                     """write the current docket on disk
                     Exist as a method to help changelog to implement transaction logic
                     We could also imagine using the same transaction logic for all revlog
                     since docket are cheap."""
                     self._docket.write(transaction)
                 def addrevision(
                     self,
                     text,
                     transaction,
                     link,
                     p1,
                     p2,
                     cachedelta=None,
                     node=None,
                     flags=REVIDX_DEFAULT_FLAGS,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a revision to the log
                     text - the revision data to add
                     transaction - the transaction object used for rollback
                     link - the linkrev data to add
                     p1, p2 - the parent nodeids of the revision
                     cachedelta - an optional precomputed delta
                     node - nodeid of revision; typically node is not specified, and it is
                         computed by default as hash(text, p1, p2), however subclasses might
                         use different hashing method (and override checkhash() in such case)
                     flags - the known flags to set on the revision
                     deltacomputer - an optional deltacomputer instance shared between
                         multiple calls
                     """
                     if link == nullrev:
                         raise error.RevlogError(
                             _(b"attempted to add linkrev -1 to %s") % self.display_id
                         )
                     if sidedata is None:
                         sidedata = {}
                     elif sidedata and not self.hassidedata:
                         raise error.ProgrammingError(
                             _(b"trying to add sidedata to a revlog who don't support them")
                         )
                     if flags:
                         node = node or self.hash(text, p1, p2)
                     rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
                     # If the flag processor modifies the revision data, ignore any provided
                     # cachedelta.
                     if rawtext != text:
                         cachedelta = None
                     if len(rawtext) > _maxentrysize:
                         raise error.RevlogError(
                             _(
                                 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
                             )
                             % (self.display_id, len(rawtext))
                         )
                     node = node or self.hash(rawtext, p1, p2)
                     rev = self.index.get_rev(node)
                     if rev is not None:
                         return rev
                     if validatehash:
                         self.checkhash(rawtext, node, p1=p1, p2=p2)
                     return self.addrawrevision(
                         rawtext,
                         transaction,
                         link,
                         p1,
                         p2,
                         node,
                         flags,
                         cachedelta=cachedelta,
                         deltacomputer=deltacomputer,
                         sidedata=sidedata,
                     )
                 def addrawrevision(
                     self,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     node,
                     flags,
                     cachedelta=None,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """add a raw revision with known flags, node and parents
                     useful when reusing a revision not stored in this revlog (ex: received
                     over wire, or read from an external bundle).
                     """
                     with self._writing(transaction):
                         return self._addrevision(
                             node,
                             rawtext,
                             transaction,
                             link,
                             p1,
                             p2,
                             flags,
                             cachedelta,
                             deltacomputer=deltacomputer,
                             sidedata=sidedata,
                         )
                 def compress(self, data):
                     """Generate a possibly-compressed representation of data."""
                     if not data:
                         return b'', data
                     compressed = self._compressor.compress(data)
                     if compressed:
                         # The revlog compressor added the header in the returned data.
                         return b'', compressed
                     if data[0:1] == b'\0':
                         return b'', data
                     return b'u', data
                 def decompress(self, data):
                     """Decompress a revlog chunk.
                     The chunk is expected to begin with a header identifying the
                     format type so it can be routed to an appropriate decompressor.
                     """
                     if not data:
                         return data
                     # Revlogs are read much more frequently than they are written and many
                     # chunks only take microseconds to decompress, so performance is
                     # important here.
                     #
                     # We can make a few assumptions about revlogs:
                     #
                     # 1) the majority of chunks will be compressed (as opposed to inline
                     #    raw data).
                     # 2) decompressing *any* data will likely by at least 10x slower than
                     #    returning raw inline data.
                     # 3) we want to prioritize common and officially supported compression
                     #    engines
                     #
                     # It follows that we want to optimize for "decompress compressed data
                     # when encoded with common and officially supported compression engines"
                     # case over "raw data" and "data encoded by less common or non-official
                     # compression engines." That is why we have the inline lookup first
                     # followed by the compengines lookup.
                     #
                     # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
                     # compressed chunks. And this matters for changelog and manifest reads.
                     t = data[0:1]
                     if t == b'x':
                         try:
                             return _zlibdecompress(data)
                         except zlib.error as e:
                             raise error.RevlogError(
                                 _(b'revlog decompress error: %s')
                                 % stringutil.forcebytestr(e)
                             )
                     # '\0' is more common than 'u' so it goes first.
                     elif t == b'\0':
                         return data
                     elif t == b'u':
                         return util.buffer(data, 1)
                     compressor = self._get_decompressor(t)
                     return compressor.decompress(data)
                 def _addrevision(
                     self,
                     node,
                     rawtext,
                     transaction,
                     link,
                     p1,
                     p2,
                     flags,
                     cachedelta,
                     alwayscache=False,
                     deltacomputer=None,
                     sidedata=None,
                 ):
                     """internal function to add revisions to the log
                     see addrevision for argument descriptions.
                     note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
                     if "deltacomputer" is not provided or None, a defaultdeltacomputer will
                     be used.
                     invariants:
                     - rawtext is optional (can be None); if not set, cachedelta must be set.
                       if both are set, they must correspond to each other.
                     """
                     if node == self.nullid:
                         raise error.RevlogError(
                             _(b"%s: attempt to add null revision") % self.display_id
                         )
                     if (
                         node == self.nodeconstants.wdirid
                         or node in self.nodeconstants.wdirfilenodeids
                     ):
                         raise error.RevlogError(
                             _(b"%s: attempt to add wdir revision") % self.display_id
                         )
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     if self._inline:
                         fh = self._writinghandles[0]
                     else:
                         fh = self._writinghandles[1]
                     btext = [rawtext]
                     curr = len(self)
                     prev = curr - 1
                     offset = self._get_data_offset(prev)
                     if self._concurrencychecker:
                         ifh, dfh, sdfh = self._writinghandles
                         # XXX no checking for the sidedata file
                         if self._inline:
                             # offset is "as if" it were in the .d file, so we need to add on
                             # the size of the entry metadata.
                             self._concurrencychecker(
                                 ifh, self._indexfile, offset + curr * self.index.entry_size
                             )
                         else:
                             # Entries in the .i are a consistent size.
                             self._concurrencychecker(
                                 ifh, self._indexfile, curr * self.index.entry_size
                             )
                             self._concurrencychecker(dfh, self._datafile, offset)
                     p1r, p2r = self.rev(p1), self.rev(p2)
                     # full versions are inserted when the needed deltas
                     # become comparable to the uncompressed text
                     if rawtext is None:
                         # need rawtext size, before changed by flag processors, which is
                         # the non-raw size. use revlog explicitly to avoid filelog's extra
                         # logic that might remove metadata size.
                         textlen = mdiff.patchedsize(
                             revlog.size(self, cachedelta[0]), cachedelta[1]
                         )
                     else:
                         textlen = len(rawtext)
                     if deltacomputer is None:
                         deltacomputer = deltautil.deltacomputer(self)
                     revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
                     deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
                     compression_mode = COMP_MODE_INLINE
                     if self._docket is not None:
                         h, d = deltainfo.data
                         if not h and not d:
                             # not data to store at all... declare them uncompressed
                             compression_mode = COMP_MODE_PLAIN
                         elif not h:
                             t = d[0:1]
                             if t == b'\0':
                                 compression_mode = COMP_MODE_PLAIN
                             elif t == self._docket.default_compression_header:
                                 compression_mode = COMP_MODE_DEFAULT
                         elif h == b'u':
                             # we have a more efficient way to declare uncompressed
                             h = b''
                             compression_mode = COMP_MODE_PLAIN
                             deltainfo = deltautil.drop_u_compression(deltainfo)
                     sidedata_compression_mode = COMP_MODE_INLINE
                     if sidedata and self.hassidedata:
                         sidedata_compression_mode = COMP_MODE_PLAIN
                         serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
                         sidedata_offset = self._docket.sidedata_end
                         h, comp_sidedata = self.compress(serialized_sidedata)
                         if (
                             h != b'u'
                             and comp_sidedata[0:1] != b'\0'
                             and len(comp_sidedata) < len(serialized_sidedata)
                         ):
                             assert not h
                             if (
                                 comp_sidedata[0:1]
                                 == self._docket.default_compression_header
                             ):
                                 sidedata_compression_mode = COMP_MODE_DEFAULT
                                 serialized_sidedata = comp_sidedata
                             else:
                                 sidedata_compression_mode = COMP_MODE_INLINE
                                 serialized_sidedata = comp_sidedata
                     else:
                         serialized_sidedata = b""
                         # Don't store the offset if the sidedata is empty, that way
                         # we can easily detect empty sidedata and they will be no different
                         # than ones we manually add.
                         sidedata_offset = 0
                     e = (
                         offset_type(offset, flags),
                         deltainfo.deltalen,
                         textlen,
                         deltainfo.base,
                         link,
                         p1r,
                         p2r,
                         node,
                         sidedata_offset,
                         len(serialized_sidedata),
                         compression_mode,
                         sidedata_compression_mode,
                     )
                     self.index.append(e)
                     entry = self.index.entry_binary(curr)
                     if curr == 0 and self._docket is None:
                         header = self._format_flags | self._format_version
                         header = self.index.pack_header(header)
                         entry = header + entry
                     self._writeentry(
                         transaction,
                         entry,
                         deltainfo.data,
                         link,
                         offset,
                         serialized_sidedata,
                         sidedata_offset,
                     )
                     rawtext = btext[0]
                     if alwayscache and rawtext is None:
                         rawtext = deltacomputer.buildtext(revinfo, fh)
                     if type(rawtext) == bytes:  # only accept immutable objects
                         self._revisioncache = (node, curr, rawtext)
                     self._chainbasecache[curr] = deltainfo.chainbase
                     return curr
                 def _get_data_offset(self, prev):
                     """Returns the current offset in the (in-transaction) data file.
                     Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
                     file to store that information: since sidedata can be rewritten to the
                     end of the data file within a transaction, you can have cases where, for
                     example, rev `n` does not have sidedata while rev `n - 1` does, leading
                     to `n - 1`'s sidedata being written after `n`'s data.
                     TODO cache this in a docket file before getting out of experimental."""
                     if self._docket is None:
                         return self.end(prev)
                     else:
                         return self._docket.data_end
                 def _writeentry(
                     self, transaction, entry, data, link, offset, sidedata, sidedata_offset
                 ):
                     # Files opened in a+ mode have inconsistent behavior on various
                     # platforms. Windows requires that a file positioning call be made
                     # when the file handle transitions between reads and writes. See
                     # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
                     # platforms, Python or the platform itself can be buggy. Some versions
                     # of Solaris have been observed to not append at the end of the file
                     # if the file was seeked to before the end. See issue4943 for more.
                     #
                     # We work around this issue by inserting a seek() before writing.
                     # Note: This is likely not necessary on Python 3. However, because
                     # the file handle is reused for reads and may be seeked there, we need
                     # to be careful before changing this.
                     if self._writinghandles is None:
                         msg = b'adding revision outside `revlog._writing` context'
                         raise error.ProgrammingError(msg)
                     ifh, dfh, sdfh = self._writinghandles
                     if self._docket is None:
                         ifh.seek(0, os.SEEK_END)
                     else:
                         ifh.seek(self._docket.index_end, os.SEEK_SET)
                     if dfh:
                         if self._docket is None:
                             dfh.seek(0, os.SEEK_END)
                         else:
                             dfh.seek(self._docket.data_end, os.SEEK_SET)
                     if sdfh:
                         sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                     curr = len(self) - 1
                     if not self._inline:
                         transaction.add(self._datafile, offset)
                         if self._sidedatafile:
                             transaction.add(self._sidedatafile, sidedata_offset)
                         transaction.add(self._indexfile, curr * len(entry))
                         if data[0]:
                             dfh.write(data[0])
                         dfh.write(data[1])
                         if sidedata:
                             sdfh.write(sidedata)
                         ifh.write(entry)
                     else:
                         offset += curr * self.index.entry_size
                         transaction.add(self._indexfile, offset)
                         ifh.write(entry)
                         ifh.write(data[0])
                         ifh.write(data[1])
                         assert not sidedata
                         self._enforceinlinesize(transaction)
                     if self._docket is not None:
                         self._docket.index_end = self._writinghandles[0].tell()
                         self._docket.data_end = self._writinghandles[1].tell()
                         self._docket.sidedata_end = self._writinghandles[2].tell()
                     nodemaputil.setup_persistent_nodemap(transaction, self)
                 def addgroup(
                     self,
                     deltas,
                     linkmapper,
                     transaction,
                     alwayscache=False,
                     addrevisioncb=None,
                     duplicaterevisioncb=None,
                 ):
                     """
                     add a delta group
                     given a set of deltas, add them to the revision log. the
                     first delta is against its parent, which should be in our
                     log, the rest are against the previous delta.
                     If ``addrevisioncb`` is defined, it will be called with arguments of
                     this revlog and the node that was added.
                     """
                     if self._adding_group:
                         raise error.ProgrammingError(b'cannot nest addgroup() calls')
                     self._adding_group = True
                     empty = True
                     try:
                         with self._writing(transaction):
                             deltacomputer = deltautil.deltacomputer(self)
                             # loop through our set of deltas
                             for data in deltas:
                                 (
                                     node,
                                     p1,
                                     p2,
                                     linknode,
                                     deltabase,
                                     delta,
                                     flags,
                                     sidedata,
                                 ) = data
                                 link = linkmapper(linknode)
                                 flags = flags or REVIDX_DEFAULT_FLAGS
                                 rev = self.index.get_rev(node)
                                 if rev is not None:
                                     # this can happen if two branches make the same change
                                     self._nodeduplicatecallback(transaction, rev)
                                     if duplicaterevisioncb:
                                         duplicaterevisioncb(self, rev)
                                     empty = False
                                     continue
                                 for p in (p1, p2):
                                     if not self.index.has_node(p):
                                         raise error.LookupError(
                                             p, self.radix, _(b'unknown parent')
                                         )
                                 if not self.index.has_node(deltabase):
                                     raise error.LookupError(
                                         deltabase, self.display_id, _(b'unknown delta base')
                                     )
                                 baserev = self.rev(deltabase)
                                 if baserev != nullrev and self.iscensored(baserev):
                                     # if base is censored, delta must be full replacement in a
                                     # single patch operation
                                     hlen = struct.calcsize(b">lll")
                                     oldlen = self.rawsize(baserev)
                                     newlen = len(delta) - hlen
                                     if delta[:hlen] != mdiff.replacediffheader(
                                         oldlen, newlen
                                     ):
                                         raise error.CensoredBaseError(
                                             self.display_id, self.node(baserev)
                                         )
                                 if not flags and self._peek_iscensored(baserev, delta):
                                     flags |= REVIDX_ISCENSORED
                                 # We assume consumers of addrevisioncb will want to retrieve
                                 # the added revision, which will require a call to
                                 # revision(). revision() will fast path if there is a cache
                                 # hit. So, we tell _addrevision() to always cache in this case.
                                 # We're only using addgroup() in the context of changegroup
                                 # generation so the revision data can always be handled as raw
                                 # by the flagprocessor.
                                 rev = self._addrevision(
                                     node,
                                     None,
                                     transaction,
                                     link,
                                     p1,
                                     p2,
                                     flags,
                                     (baserev, delta),
                                     alwayscache=alwayscache,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                                 if addrevisioncb:
                                     addrevisioncb(self, rev)
                                 empty = False
                     finally:
                         self._adding_group = False
                     return not empty
                 def iscensored(self, rev):
                     """Check if a file revision is censored."""
                     if not self._censorable:
                         return False
                     return self.flags(rev) & REVIDX_ISCENSORED
                 def _peek_iscensored(self, baserev, delta):
                     """Quickly check if a delta produces a censored revision."""
                     if not self._censorable:
                         return False
                     return storageutil.deltaiscensored(delta, baserev, self.rawsize)
                 def getstrippoint(self, minlink):
                     """find the minimum rev that must be stripped to strip the linkrev
                     Returns a tuple containing the minimum rev and a set of all revs that
                     have linkrevs that will be broken by this strip.
                     """
                     return storageutil.resolvestripinfo(
                         minlink,
                         len(self) - 1,
                         self.headrevs(),
                         self.linkrev,
                         self.parentrevs,
                     )
                 def strip(self, minlink, transaction):
                     """truncate the revlog on the first revision with a linkrev >= minlink
                     This function is called when we're stripping revision minlink and
                     its descendants from the repository.
                     We have to remove all revisions with linkrev >= minlink, because
                     the equivalent changelog revisions will be renumbered after the
                     strip.
                     So we truncate the revlog on the first of these revisions, and
                     trust that the caller has saved the revisions that shouldn't be
                     removed and that it'll re-add them after this truncation.
                     """
                     if len(self) == 0:
                         return
                     rev, _ = self.getstrippoint(minlink)
                     if rev == len(self):
                         return
                     # first truncate the files on disk
                     data_end = self.start(rev)
                     if not self._inline:
                         transaction.add(self._datafile, data_end)
                         end = rev * self.index.entry_size
                     else:
                         end = data_end + (rev * self.index.entry_size)
                     if self._sidedatafile:
                         sidedata_end = self.sidedata_cut_off(rev)
                         transaction.add(self._sidedatafile, sidedata_end)
                     transaction.add(self._indexfile, end)
                     if self._docket is not None:
                         # XXX we could, leverage the docket while stripping. However it is
                         # not powerfull enough at the time of this comment
                         self._docket.index_end = end
                         self._docket.data_end = data_end
                         self._docket.sidedata_end = sidedata_end
                         self._docket.write(transaction, stripping=True)
                     # then reset internal state in memory to forget those revisions
                     self._revisioncache = None
                     self._chaininfocache = util.lrucachedict(500)
                     self._chunkclear()
                     del self.index[rev:-1]
                 def checksize(self):
                     """Check size of index and data files
                     return a (dd, di) tuple.
                     - dd: extra bytes for the "data" file
                     - di: extra bytes for the "index" file
                     A healthy revlog will return (0, 0).
                     """
                     expected = 0
                     if len(self):
                         expected = max(0, self.end(len(self) - 1))
                     try:
                         with self._datafp() as f:
                             f.seek(0, io.SEEK_END)
                             actual = f.tell()
                         dd = actual - expected
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         dd = 0
                     try:
                         f = self.opener(self._indexfile)
                         f.seek(0, io.SEEK_END)
                         actual = f.tell()
                         f.close()
                         s = self.index.entry_size
                         i = max(0, actual // s)
                         di = actual - (i * s)
                         if self._inline:
                             databytes = 0
                             for r in self:
                                 databytes += max(0, self.length(r))
                             dd = 0
                             di = actual - len(self) * s - databytes
                     except IOError as inst:
                         if inst.errno != errno.ENOENT:
                             raise
                         di = 0
                     return (dd, di)
                 def files(self):
                     res = [self._indexfile]
                     if not self._inline:
                         res.append(self._datafile)
                     return res
                 def emitrevisions(
                     self,
                     nodes,
                     nodesorder=None,
                     revisiondata=False,
                     assumehaveparentrevisions=False,
                     deltamode=repository.CG_DELTAMODE_STD,
                     sidedata_helpers=None,
                 ):
                     if nodesorder not in (b'nodes', b'storage', b'linear', None):
                         raise error.ProgrammingError(
                             b'unhandled value for nodesorder: %s' % nodesorder
                         )
                     if nodesorder is None and not self._generaldelta:
                         nodesorder = b'storage'
                     if (
                         not self._storedeltachains
                         and deltamode != repository.CG_DELTAMODE_PREV
                     ):
                         deltamode = repository.CG_DELTAMODE_FULL
                     return storageutil.emitrevisions(
                         self,
                         nodes,
                         nodesorder,
                         revlogrevisiondelta,
                         deltaparentfn=self.deltaparent,
                         candeltafn=self.candelta,
                         rawsizefn=self.rawsize,
                         revdifffn=self.revdiff,
                         flagsfn=self.flags,
                         deltamode=deltamode,
                         revisiondata=revisiondata,
                         assumehaveparentrevisions=assumehaveparentrevisions,
                         sidedata_helpers=sidedata_helpers,
                     )
                 DELTAREUSEALWAYS = b'always'
                 DELTAREUSESAMEREVS = b'samerevs'
                 DELTAREUSENEVER = b'never'
                 DELTAREUSEFULLADD = b'fulladd'
                 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
                 def clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb=None,
                     deltareuse=DELTAREUSESAMEREVS,
                     forcedeltabothparents=None,
                     sidedata_helpers=None,
                 ):
                     """Copy this revlog to another, possibly with format changes.
                     The destination revlog will contain the same revisions and nodes.
                     However, it may not be bit-for-bit identical due to e.g. delta encoding
                     differences.
                     The ``deltareuse`` argument control how deltas from the existing revlog
                     are preserved in the destination revlog. The argument can have the
                     following values:
                     DELTAREUSEALWAYS
                        Deltas will always be reused (if possible), even if the destination
                        revlog would not select the same revisions for the delta. This is the
                        fastest mode of operation.
                     DELTAREUSESAMEREVS
                        Deltas will be reused if the destination revlog would pick the same
                        revisions for the delta. This mode strikes a balance between speed
                        and optimization.
                     DELTAREUSENEVER
                        Deltas will never be reused. This is the slowest mode of execution.
                        This mode can be used to recompute deltas (e.g. if the diff/delta
                        algorithm changes).
                     DELTAREUSEFULLADD
                        Revision will be re-added as if their were new content. This is
                        slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
                        eg: large file detection and handling.
                     Delta computation can be slow, so the choice of delta reuse policy can
                     significantly affect run time.
                     The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
                     two extremes. Deltas will be reused if they are appropriate. But if the
                     delta could choose a better revision, it will do so. This means if you
                     are converting a non-generaldelta revlog to a generaldelta revlog,
                     deltas will be recomputed if the delta's parent isn't a parent of the
                     revision.
                     In addition to the delta policy, the ``forcedeltabothparents``
                     argument controls whether to force compute deltas against both parents
                     for merges. By default, the current default is used.
                     See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
                     `sidedata_helpers`.
                     """
                     if deltareuse not in self.DELTAREUSEALL:
                         raise ValueError(
                             _(b'value for deltareuse invalid: %s') % deltareuse
                         )
                     if len(destrevlog):
                         raise ValueError(_(b'destination revlog is not empty'))
                     if getattr(self, 'filteredrevs', None):
                         raise ValueError(_(b'source revlog has filtered revisions'))
                     if getattr(destrevlog, 'filteredrevs', None):
                         raise ValueError(_(b'destination revlog has filtered revisions'))
                     # lazydelta and lazydeltabase controls whether to reuse a cached delta,
                     # if possible.
                     oldlazydelta = destrevlog._lazydelta
                     oldlazydeltabase = destrevlog._lazydeltabase
                     oldamd = destrevlog._deltabothparents
                     try:
                         if deltareuse == self.DELTAREUSEALWAYS:
                             destrevlog._lazydeltabase = True
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSESAMEREVS:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = True
                         elif deltareuse == self.DELTAREUSENEVER:
                             destrevlog._lazydeltabase = False
                             destrevlog._lazydelta = False
                         destrevlog._deltabothparents = forcedeltabothparents or oldamd
                         self._clone(
                             tr,
                             destrevlog,
                             addrevisioncb,
                             deltareuse,
                             forcedeltabothparents,
                             sidedata_helpers,
                         )
                     finally:
                         destrevlog._lazydelta = oldlazydelta
                         destrevlog._lazydeltabase = oldlazydeltabase
                         destrevlog._deltabothparents = oldamd
                 def _clone(
                     self,
                     tr,
                     destrevlog,
                     addrevisioncb,
                     deltareuse,
                     forcedeltabothparents,
                     sidedata_helpers,
                 ):
                     """perform the core duty of `revlog.clone` after parameter processing"""
                     deltacomputer = deltautil.deltacomputer(destrevlog)
                     index = self.index
                     for rev in self:
                         entry = index[rev]
                         # Some classes override linkrev to take filtered revs into
                         # account. Use raw entry from index.
                         flags = entry[0] & 0xFFFF
                         linkrev = entry[4]
                         p1 = index[entry[5]][7]
                         p2 = index[entry[6]][7]
                         node = entry[7]
                         # (Possibly) reuse the delta from the revlog if allowed and
                         # the revlog chunk is a delta.
                         cachedelta = None
                         rawtext = None
                         if deltareuse == self.DELTAREUSEFULLADD:
                             text = self._revisiondata(rev)
                             sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             destrevlog.addrevision(
                                 text,
                                 tr,
                                 linkrev,
                                 p1,
                                 p2,
                                 cachedelta=cachedelta,
                                 node=node,
                                 flags=flags,
                                 deltacomputer=deltacomputer,
                                 sidedata=sidedata,
                             )
                         else:
                             if destrevlog._lazydelta:
                                 dp = self.deltaparent(rev)
                                 if dp != nullrev:
                                     cachedelta = (dp, bytes(self._chunk(rev)))
                             sidedata = None
                             if not cachedelta:
                                 rawtext = self._revisiondata(rev)
                                 sidedata = self.sidedata(rev)
                             if sidedata is None:
                                 sidedata = self.sidedata(rev)
                             if sidedata_helpers is not None:
                                 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
                                     self, sidedata_helpers, sidedata, rev
                                 )
                                 flags = flags | new_flags[0] & ~new_flags[1]
                             with destrevlog._writing(tr):
                                 destrevlog._addrevision(
                                     node,
                                     rawtext,
                                     tr,
                                     linkrev,
                                     p1,
                                     p2,
                                     flags,
                                     cachedelta,
                                     deltacomputer=deltacomputer,
                                     sidedata=sidedata,
                                 )
                         if addrevisioncb:
                             addrevisioncb(self, rev, node)
                 def censorrevision(self, tr, censornode, tombstone=b''):
                     if self._format_version == REVLOGV0:
                         raise error.RevlogError(
                             _(b'cannot censor with version %d revlogs')
                             % self._format_version
                         )
+                    elif self._format_version == REVLOGV1:
-                    censorrev = self.rev(censornode)
+                        censor.v1_censor(self, tr, censornode, tombstone)
-                    tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
+                    else:
+                        # revlog v2
-                    if len(tombstone) > self.rawsize(censorrev):
+                        raise error.RevlogError(
-                        raise error.Abort(
+                            _(b'cannot censor with version %d revlogs')
-                            _(b'censor tombstone must be no longer than censored data')
+                            % self._format_version
                         )
-                    # Rewriting the revlog in place is hard. Our strategy for censoring is
-                    # to create a new revlog, copy all revisions to it, then replace the
-                    # revlogs on transaction close.
-                    # This is a bit dangerous. We could easily have a mismatch of state.
-                    newrl = revlog(
-                        self.opener,
-                        target=self.target,
-                        radix=self.radix,
-                        postfix=b'tmpcensored',
-                        censorable=True,
-                    newrl._format_version = self._format_version
-                    newrl._format_flags = self._format_flags
-                    newrl._generaldelta = self._generaldelta
-                    newrl._parse_index = self._parse_index
-                    for rev in self.revs():
-                        node = self.node(rev)
-                        p1, p2 = self.parents(node)
-                        if rev == censorrev:
-                            newrl.addrawrevision(
-                                tombstone,
-                                tr,
-                                self.linkrev(censorrev),
-                                p1,
-                                p2,
-                                censornode,
-                                REVIDX_ISCENSORED,
-                            if newrl.deltaparent(rev) != nullrev:
-                                raise error.Abort(
-                                    _(
-                                        b'censored revision stored as delta; '
-                                        b'cannot censor'
-                                    ),
-                                    hint=_(
-                                        b'censoring of revlogs is not '
-                                        b'fully implemented; please report '
-                                        b'this bug'
-                                    ),
-                            continue
-                        if self.iscensored(rev):
-                            if self.deltaparent(rev) != nullrev:
-                                raise error.Abort(
-                                    _(
-                                        b'cannot censor due to censored '
-                                        b'revision having delta stored'
-                            rawtext = self._chunk(rev)
-                        else:
-                            rawtext = self.rawdata(rev)
-                        newrl.addrawrevision(
-                            rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
-                    tr.addbackup(self._indexfile, location=b'store')
-                    if not self._inline:
-                        tr.addbackup(self._datafile, location=b'store')
-                    self.opener.rename(newrl._indexfile, self._indexfile)
-                    if not self._inline:
-                        self.opener.rename(newrl._datafile, self._datafile)
-                    self.clearcaches()
-                    self._loadindex()
                 def verifyintegrity(self, state):
                     """Verifies the integrity of the revlog.
                     Yields ``revlogproblem`` instances describing problems that are
                     found.
                     """
                     dd, di = self.checksize()
                     if dd:
                         yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
                     if di:
                         yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
                     version = self._format_version
                     # The verifier tells us what version revlog we should be.
                     if version != state[b'expectedversion']:
                         yield revlogproblem(
                             warning=_(b"warning: '%s' uses revlog format %d; expected %d")
                             % (self.display_id, version, state[b'expectedversion'])
                         )
                     state[b'skipread'] = set()
                     state[b'safe_renamed'] = set()
                     for rev in self:
                         node = self.node(rev)
                         # Verify contents. 4 cases to care about:
                         #
                         #   common: the most common case
                         #   rename: with a rename
                         #   meta: file content starts with b'\1\n', the metadata
                         #         header defined in filelog.py, but without a rename
                         #   ext: content stored externally
                         #
                         # More formally, their differences are shown below:
                         #
                         #                       | common | rename | meta  | ext
                         #  -------------------------------------------------------
                         #   flags()             | 0      | 0      | 0     | not 0
                         #   renamed()           | False  | True   | False | ?
                         #   rawtext[0:2]=='\1\n'| False  | True   | True  | ?
                         #
                         # "rawtext" means the raw text stored in revlog data, which
                         # could be retrieved by "rawdata(rev)". "text"
                         # mentioned below is "revision(rev)".
                         #
                         # There are 3 different lengths stored physically:
                         #  1. L1: rawsize, stored in revlog index
                         #  2. L2: len(rawtext), stored in revlog data
                         #  3. L3: len(text), stored in revlog data if flags==0, or
                         #     possibly somewhere else if flags!=0
                         #
                         # L1 should be equal to L2. L3 could be different from them.
                         # "text" may or may not affect commit hash depending on flag
                         # processors (see flagutil.addflagprocessor).
                         #
                         #              | common  | rename | meta  | ext
                         # -------------------------------------------------
                         #    rawsize() | L1      | L1     | L1    | L1
                         #       size() | L1      | L2-LM  | L1(*) | L1 (?)
                         # len(rawtext) | L2      | L2     | L2    | L2
                         #    len(text) | L2      | L2     | L2    | L3
                         #  len(read()) | L2      | L2-LM  | L2-LM | L3 (?)
                         #
                         # LM:  length of metadata, depending on rawtext
                         # (*): not ideal, see comment in filelog.size
                         # (?): could be "- len(meta)" if the resolved content has
                         #      rename metadata
                         #
                         # Checks needed to be done:
                         #  1. length check: L1 == L2, in all cases.
                         #  2. hash check: depending on flag processor, we may need to
                         #     use either "text" (external), or "rawtext" (in revlog).
                         try:
                             skipflags = state.get(b'skipflags', 0)
                             if skipflags:
                                 skipflags &= self.flags(rev)
                             _verify_revision(self, skipflags, state, node)
                             l1 = self.rawsize(rev)
                             l2 = len(self.rawdata(node))
                             if l1 != l2:
                                 yield revlogproblem(
                                     error=_(b'unpacked size is %d, %d expected') % (l2, l1),
                                     node=node,
                                 )
                         except error.CensoredNodeError:
                             if state[b'erroroncensored']:
                                 yield revlogproblem(
                                     error=_(b'censored file data'), node=node
                                 )
                                 state[b'skipread'].add(node)
                         except Exception as e:
                             yield revlogproblem(
                                 error=_(b'unpacking %s: %s')
                                 % (short(node), stringutil.forcebytestr(e)),
                                 node=node,
                             )
                             state[b'skipread'].add(node)
                 def storageinfo(
                     self,
                     exclusivefiles=False,
                     sharedfiles=False,
                     revisionscount=False,
                     trackedsize=False,
                     storedsize=False,
                 ):
                     d = {}
                     if exclusivefiles:
                         d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
                         if not self._inline:
                             d[b'exclusivefiles'].append((self.opener, self._datafile))
                     if sharedfiles:
                         d[b'sharedfiles'] = []
                     if revisionscount:
                         d[b'revisionscount'] = len(self)
                     if trackedsize:
                         d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
                     if storedsize:
                         d[b'storedsize'] = sum(
                             self.opener.stat(path).st_size for path in self.files()
                         )
                     return d
                 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
                     if not self.hassidedata:
                         return
                     # revlog formats with sidedata support does not support inline
                     assert not self._inline
                     if not helpers[1] and not helpers[2]:
                         # Nothing to generate or remove
                         return
                     new_entries = []
                     # append the new sidedata
                     with self._writing(transaction):
                         ifh, dfh, sdfh = self._writinghandles
                         dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
                         current_offset = sdfh.tell()
                         for rev in range(startrev, endrev + 1):
                             entry = self.index[rev]
                             new_sidedata, flags = sidedatautil.run_sidedata_helpers(
                                 store=self,
                                 sidedata_helpers=helpers,
                                 sidedata={},
                                 rev=rev,
                             )
                             serialized_sidedata = sidedatautil.serialize_sidedata(
                                 new_sidedata
                             )
                             sidedata_compression_mode = COMP_MODE_INLINE
                             if serialized_sidedata and self.hassidedata:
                                 sidedata_compression_mode = COMP_MODE_PLAIN
                                 h, comp_sidedata = self.compress(serialized_sidedata)
                                 if (
                                     h != b'u'
                                     and comp_sidedata[0] != b'\0'
                                     and len(comp_sidedata) < len(serialized_sidedata)
                                 ):
                                     assert not h
                                     if (
                                         comp_sidedata[0]
                                         == self._docket.default_compression_header
                                     ):
                                         sidedata_compression_mode = COMP_MODE_DEFAULT
                                         serialized_sidedata = comp_sidedata
                                     else:
                                         sidedata_compression_mode = COMP_MODE_INLINE
                                         serialized_sidedata = comp_sidedata
                             if entry[8] != 0 or entry[9] != 0:
                                 # rewriting entries that already have sidedata is not
                                 # supported yet, because it introduces garbage data in the
                                 # revlog.
                                 msg = b"rewriting existing sidedata is not supported yet"
                                 raise error.Abort(msg)
                             # Apply (potential) flags to add and to remove after running
                             # the sidedata helpers
                             new_offset_flags = entry[0] | flags[0] & ~flags[1]
                             entry_update = (
                                 current_offset,
                                 len(serialized_sidedata),
                                 new_offset_flags,
                                 sidedata_compression_mode,
                             )
                             # the sidedata computation might have move the file cursors around
                             sdfh.seek(current_offset, os.SEEK_SET)
                             sdfh.write(serialized_sidedata)
                             new_entries.append(entry_update)
                             current_offset += len(serialized_sidedata)
                             self._docket.sidedata_end = sdfh.tell()
                         # rewrite the new index entries
                         ifh.seek(startrev * self.index.entry_size)
                         for i, e in enumerate(new_entries):
                             rev = startrev + i
                             self.index.replace_sidedata_info(rev, *e)
                             packed = self.index.entry_binary(rev)
                             if rev == 0 and self._docket is None:
                                 header = self._format_flags | self._format_version
                                 header = self.index.pack_header(header)
                                 packed = header + packed
                             ifh.write(packed)

mercurial/revlogutils/censor.py mercurial/revlog.py

0 copied +75 -3508

This diff has been collapsed as it changes many lines, (3583 lines changed) Show them Hide them
	@@ -1,3535 +1,102 b''
	1	# revlog.py - storage back-end for mercurial	1	# censor code related to censoring revision
	2	# coding: utf8
	3	#	2	#
	4	# Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>	3	# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
			4	# Copyright 2015 Google, Inc <martinvonz@google.com>
	5	#	5	#
	6	# This software may be used and distributed according to the terms of the	6	# This software may be used and distributed according to the terms of the
	7	# GNU General Public License version 2 or any later version.	7	# GNU General Public License version 2 or any later version.
	8		8
	9	"""Storage back-end for Mercurial.	9	from ..node import (
	10
	11	This provides efficient delta storage with O(1) retrieve and append
	12	and O(changes) merge between branches.
	13	"""
	14
	15	from __future__ import absolute_import
	16
	17	import binascii
	18	import collections
	19	import contextlib
	20	import errno
	21	import io
	22	import os
	23	import struct
	24	import zlib
	25
	26	# import stuff from node for others to import from revlog
	27	from .node import (
	28	bin,
	29	hex,
	30	nullrev,	10	nullrev,
	31	sha1nodeconstants,
	32	short,
	33	wdirrev,
	34	)
	35	from .i18n import _
	36	from .pycompat import getattr
	37	from .revlogutils.constants import (
	38	ALL_KINDS,
	39	CHANGELOGV2,
	40	COMP_MODE_DEFAULT,
	41	COMP_MODE_INLINE,
	42	COMP_MODE_PLAIN,
	43	FEATURES_BY_VERSION,
	44	FLAG_GENERALDELTA,
	45	FLAG_INLINE_DATA,
	46	INDEX_HEADER,
	47	KIND_CHANGELOG,
	48	REVLOGV0,
	49	REVLOGV1,
	50	REVLOGV1_FLAGS,
	51	REVLOGV2,
	52	REVLOGV2_FLAGS,
	53	REVLOG_DEFAULT_FLAGS,
	54	REVLOG_DEFAULT_FORMAT,
	55	REVLOG_DEFAULT_VERSION,
	56	SUPPORTED_FLAGS,
	57	)
	58	from .revlogutils.flagutil import (
	59	REVIDX_DEFAULT_FLAGS,
	60	REVIDX_ELLIPSIS,
	61	REVIDX_EXTSTORED,
	62	REVIDX_FLAGS_ORDER,
	63	REVIDX_HASCOPIESINFO,
	64	REVIDX_ISCENSORED,
	65	REVIDX_RAWTEXT_CHANGING_FLAGS,
	66	)
	67	from .thirdparty import attr
	68	from . import (
	69	ancestor,
	70	dagop,
	71	error,
	72	mdiff,
	73	policy,
	74	pycompat,
	75	templatefilters,
	76	util,
	77	)
	78	from .interfaces import (
	79	repository,
	80	util as interfaceutil,
	81	)
	82	from .revlogutils import (
	83	deltas as deltautil,
	84	docket as docketutil,
	85	flagutil,
	86	nodemap as nodemaputil,
	87	revlogv0,
	88	sidedata as sidedatautil,
	89	)
	90	from .utils import (
	91	storageutil,
	92	stringutil,
	93	)
	94
	95	# blanked usage of all the name to prevent pyflakes constraints
	96	# We need these name available in the module for extensions.
	97
	98	REVLOGV0
	99	REVLOGV1
	100	REVLOGV2
	101	FLAG_INLINE_DATA
	102	FLAG_GENERALDELTA
	103	REVLOG_DEFAULT_FLAGS
	104	REVLOG_DEFAULT_FORMAT
	105	REVLOG_DEFAULT_VERSION
	106	REVLOGV1_FLAGS
	107	REVLOGV2_FLAGS
	108	REVIDX_ISCENSORED
	109	REVIDX_ELLIPSIS
	110	REVIDX_HASCOPIESINFO
	111	REVIDX_EXTSTORED
	112	REVIDX_DEFAULT_FLAGS
	113	REVIDX_FLAGS_ORDER
	114	REVIDX_RAWTEXT_CHANGING_FLAGS
	115
	116	parsers = policy.importmod('parsers')
	117	rustancestor = policy.importrust('ancestor')
	118	rustdagop = policy.importrust('dagop')
	119	rustrevlog = policy.importrust('revlog')
	120
	121	# Aliased for performance.
	122	_zlibdecompress = zlib.decompress
	123
	124	# max size of revlog with inline data
	125	_maxinline = 131072
	126	_chunksize = 1048576
	127
	128	# Flag processors for REVIDX_ELLIPSIS.
	129	def ellipsisreadprocessor(rl, text):
	130	return text, False
	131
	132
	133	def ellipsiswriteprocessor(rl, text):
	134	return text, False
	135
	136
	137	def ellipsisrawprocessor(rl, text):
	138	return False
	139
	140
	141	ellipsisprocessor = (
	142	ellipsisreadprocessor,
	143	ellipsiswriteprocessor,
	144	ellipsisrawprocessor,
	145	)	11	)
	146		12	from ..i18n import _
	147		13	from .. import (
	148	def offset_type(offset, type):	14	error,
	149	if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
	150	raise ValueError(b'unknown revlog index flags')
	151	return int(int(offset) << 16 \| type)
	152
	153
	154	def _verify_revision(rl, skipflags, state, node):
	155	"""Verify the integrity of the given revlog ``node`` while providing a hook
	156	point for extensions to influence the operation."""
	157	if skipflags:
	158	state[b'skipread'].add(node)
	159	else:
	160	# Side-effect: read content and verify hash.
	161	rl.revision(node)
	162
	163
	164	# True if a fast implementation for persistent-nodemap is available
	165	#
	166	# We also consider we have a "fast" implementation in "pure" python because
	167	# people using pure don't really have performance consideration (and a
	168	# wheelbarrow of other slowness source)
	169	HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
	170	parsers, 'BaseIndexObject'
	171	)	15	)
	172		16	from ..utils import (
	173		17	storageutil,
	174	@attr.s(slots=True, frozen=True)
	175	class _revisioninfo(object):
	176	"""Information about a revision that allows building its fulltext
	177	node: expected hash of the revision
	178	p1, p2: parent revs of the revision
	179	btext: built text cache consisting of a one-element list
	180	cachedelta: (baserev, uncompressed_delta) or None
	181	flags: flags associated to the revision storage
	182
	183	One of btext[0] or cachedelta must be set.
	184	"""
	185
	186	node = attr.ib()
	187	p1 = attr.ib()
	188	p2 = attr.ib()
	189	btext = attr.ib()
	190	textlen = attr.ib()
	191	cachedelta = attr.ib()
	192	flags = attr.ib()
	193
	194
	195	@interfaceutil.implementer(repository.irevisiondelta)
	196	@attr.s(slots=True)
	197	class revlogrevisiondelta(object):
	198	node = attr.ib()
	199	p1node = attr.ib()
	200	p2node = attr.ib()
	201	basenode = attr.ib()
	202	flags = attr.ib()
	203	baserevisionsize = attr.ib()
	204	revision = attr.ib()
	205	delta = attr.ib()
	206	sidedata = attr.ib()
	207	protocol_flags = attr.ib()
	208	linknode = attr.ib(default=None)
	209
	210
	211	@interfaceutil.implementer(repository.iverifyproblem)
	212	@attr.s(frozen=True)
	213	class revlogproblem(object):
	214	warning = attr.ib(default=None)
	215	error = attr.ib(default=None)
	216	node = attr.ib(default=None)
	217
	218
	219	def parse_index_v1(data, inline):
	220	# call the C implementation to parse the index data
	221	index, cache = parsers.parse_index2(data, inline)
	222	return index, cache
	223
	224
	225	def parse_index_v2(data, inline):
	226	# call the C implementation to parse the index data
	227	index, cache = parsers.parse_index2(data, inline, revlogv2=True)
	228	return index, cache
	229
	230
	231	def parse_index_cl_v2(data, inline):
	232	# call the C implementation to parse the index data
	233	assert not inline
	234	from .pure.parsers import parse_index_cl_v2
	235
	236	index, cache = parse_index_cl_v2(data)
	237	return index, cache
	238
	239
	240	if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
	241
	242	def parse_index_v1_nodemap(data, inline):
	243	index, cache = parsers.parse_index_devel_nodemap(data, inline)
	244	return index, cache
	245
	246
	247	else:
	248	parse_index_v1_nodemap = None
	249
	250
	251	def parse_index_v1_mixed(data, inline):
	252	index, cache = parse_index_v1(data, inline)
	253	return rustrevlog.MixedIndex(index), cache
	254
	255
	256	# corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
	257	# signed integer)
	258	_maxentrysize = 0x7FFFFFFF
	259
	260	PARTIAL_READ_MSG = _(
	261	b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
	262	)	18	)
	263		19	from . import constants
	264	FILE_TOO_SHORT_MSG = _(
	265	b'cannot read from revlog %s;'
	266	b' expected %d bytes from offset %d, data size is %d'
	267	)
	268
	269
	270	class revlog(object):
	271	"""
	272	the underlying revision storage object
	273
	274	A revlog consists of two parts, an index and the revision data.
	275
	276	The index is a file with a fixed record size containing
	277	information on each revision, including its nodeid (hash), the
	278	nodeids of its parents, the position and offset of its data within
	279	the data file, and the revision it's based on. Finally, each entry
	280	contains a linkrev entry that can serve as a pointer to external
	281	data.
	282
	283	The revision data itself is a linear collection of data chunks.
	284	Each chunk represents a revision and is usually represented as a
	285	delta against the previous chunk. To bound lookup time, runs of
	286	deltas are limited to about 2 times the length of the original
	287	version data. This makes retrieval of a version proportional to
	288	its size, or O(1) relative to the number of revisions.
	289
	290	Both pieces of the revlog are written to in an append-only
	291	fashion, which means we never need to rewrite a file to insert or
	292	remove data, and can use some simple techniques to avoid the need
	293	for locking while reading.
	294
	295	If checkambig, indexfile is opened with checkambig=True at
	296	writing, to avoid file stat ambiguity.
	297
	298	If mmaplargeindex is True, and an mmapindexthreshold is set, the
	299	index will be mmapped rather than read if it is larger than the
	300	configured threshold.
	301
	302	If censorable is True, the revlog can have censored revisions.
	303
	304	If `upperboundcomp` is not None, this is the expected maximal gain from
	305	compression for the data content.
	306
	307	`concurrencychecker` is an optional function that receives 3 arguments: a
	308	file handle, a filename, and an expected position. It should check whether
	309	the current position in the file handle is valid, and log/warn/fail (by
	310	raising).
	311		20
	312		21
	313	Internal details	22	def v1_censor(rl, tr, censornode, tombstone=b''):
	314	----------------	23	"""censors a revision in a "version 1" revlog"""
	315		24	assert rl._format_version == constants.REVLOGV1, rl._format_version
	316	A large part of the revlog logic deals with revisions' "index entries", tuple
	317	objects that contains the same "items" whatever the revlog version.
	318	Different versions will have different ways of storing these items (sometimes
	319	not having them at all), but the tuple will always be the same. New fields
	320	are usually added at the end to avoid breaking existing code that relies
	321	on the existing order. The field are defined as follows:
	322
	323	[0] offset:
	324	The byte index of the start of revision data chunk.
	325	That value is shifted up by 16 bits. use "offset = field >> 16" to
	326	retrieve it.
	327
	328	flags:
	329	A flag field that carries special information or changes the behavior
	330	of the revision. (see `REVIDX_*` constants for details)
	331	The flag field only occupies the first 16 bits of this field,
	332	use "flags = field & 0xFFFF" to retrieve the value.
	333
	334	[1] compressed length:
	335	The size, in bytes, of the chunk on disk
	336
	337	[2] uncompressed length:
	338	The size, in bytes, of the full revision once reconstructed.
	339
	340	[3] base rev:
	341	Either the base of the revision delta chain (without general
	342	delta), or the base of the delta (stored in the data chunk)
	343	with general delta.
	344
	345	[4] link rev:
	346	Changelog revision number of the changeset introducing this
	347	revision.
	348
	349	[5] parent 1 rev:
	350	Revision number of the first parent
	351
	352	[6] parent 2 rev:
	353	Revision number of the second parent
	354
	355	[7] node id:
	356	The node id of the current revision
	357
	358	[8] sidedata offset:
	359	The byte index of the start of the revision's side-data chunk.
	360
	361	[9] sidedata chunk length:
	362	The size, in bytes, of the revision's side-data chunk.
	363
	364	[10] data compression mode:
	365	two bits that detail the way the data chunk is compressed on disk.
	366	(see "COMP_MODE_*" constants for details). For revlog version 0 and
	367	1 this will always be COMP_MODE_INLINE.
	368
	369	[11] side-data compression mode:
	370	two bits that detail the way the sidedata chunk is compressed on disk.
	371	(see "COMP_MODE_*" constants for details)
	372	"""
	373
	374	_flagserrorclass = error.RevlogError
	375
	376	def __init__(
	377	self,
	378	opener,
	379	target,
	380	radix,
	381	postfix=None, # only exist for `tmpcensored` now
	382	checkambig=False,
	383	mmaplargeindex=False,
	384	censorable=False,
	385	upperboundcomp=None,
	386	persistentnodemap=False,
	387	concurrencychecker=None,
	388	trypending=False,
	389	):
	390	"""
	391	create a revlog object
	392
	393	opener is a function that abstracts the file opening operation
	394	and can be used to implement COW semantics or the like.
	395
	396	`target`: a (KIND, ID) tuple that identify the content stored in
	397	this revlog. It help the rest of the code to understand what the revlog
	398	is about without having to resort to heuristic and index filename
	399	analysis. Note: that this must be reliably be set by normal code, but
	400	that test, debug, or performance measurement code might not set this to
	401	accurate value.
	402	"""
	403	self.upperboundcomp = upperboundcomp
	404
	405	self.radix = radix
	406
	407	self._docket_file = None
	408	self._indexfile = None
	409	self._datafile = None
	410	self._sidedatafile = None
	411	self._nodemap_file = None
	412	self.postfix = postfix
	413	self._trypending = trypending
	414	self.opener = opener
	415	if persistentnodemap:
	416	self._nodemap_file = nodemaputil.get_nodemap_file(self)
	417		25
	418	assert target[0] in ALL_KINDS	26	# avoid cycle
	419	assert len(target) == 2	27	from .. import revlog
	420	self.target = target
	421	# When True, indexfile is opened with checkambig=True at writing, to
	422	# avoid file stat ambiguity.
	423	self._checkambig = checkambig
	424	self._mmaplargeindex = mmaplargeindex
	425	self._censorable = censorable
	426	# 3-tuple of (node, rev, text) for a raw revision.
	427	self._revisioncache = None
	428	# Maps rev to chain base rev.
	429	self._chainbasecache = util.lrucachedict(100)
	430	# 2-tuple of (offset, data) of raw data from the revlog at an offset.
	431	self._chunkcache = (0, b'')
	432	# How much data to read and cache into the raw revlog data cache.
	433	self._chunkcachesize = 65536
	434	self._maxchainlen = None
	435	self._deltabothparents = True
	436	self.index = None
	437	self._docket = None
	438	self._nodemap_docket = None
	439	# Mapping of partial identifiers to full nodes.
	440	self._pcache = {}
	441	# Mapping of revision integer to full node.
	442	self._compengine = b'zlib'
	443	self._compengineopts = {}
	444	self._maxdeltachainspan = -1
	445	self._withsparseread = False
	446	self._sparserevlog = False
	447	self.hassidedata = False
	448	self._srdensitythreshold = 0.50
	449	self._srmingapsize = 262144
	450
	451	# Make copy of flag processors so each revlog instance can support
	452	# custom flags.
	453	self._flagprocessors = dict(flagutil.flagprocessors)
	454
	455	# 3-tuple of file handles being used for active writing.
	456	self._writinghandles = None
	457	# prevent nesting of addgroup
	458	self._adding_group = None
	459
	460	self._loadindex()
	461
	462	self._concurrencychecker = concurrencychecker
	463
	464	def _init_opts(self):
	465	"""process options (from above/config) to setup associated default revlog mode
	466
	467	These values might be affected when actually reading on disk information.
	468
	469	The relevant values are returned for use in _loadindex().
	470
	471	* newversionflags:
	472	version header to use if we need to create a new revlog
	473
	474	* mmapindexthreshold:
	475	minimal index size for start to use mmap
	476
	477	* force_nodemap:
	478	force the usage of a "development" version of the nodemap code
	479	"""
	480	mmapindexthreshold = None
	481	opts = self.opener.options
	482
	483	if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
	484	new_header = CHANGELOGV2
	485	elif b'revlogv2' in opts:
	486	new_header = REVLOGV2
	487	elif b'revlogv1' in opts:
	488	new_header = REVLOGV1 \| FLAG_INLINE_DATA
	489	if b'generaldelta' in opts:
	490	new_header \|= FLAG_GENERALDELTA
	491	elif b'revlogv0' in self.opener.options:
	492	new_header = REVLOGV0
	493	else:
	494	new_header = REVLOG_DEFAULT_VERSION
	495
	496	if b'chunkcachesize' in opts:
	497	self._chunkcachesize = opts[b'chunkcachesize']
	498	if b'maxchainlen' in opts:
	499	self._maxchainlen = opts[b'maxchainlen']
	500	if b'deltabothparents' in opts:
	501	self._deltabothparents = opts[b'deltabothparents']
	502	self._lazydelta = bool(opts.get(b'lazydelta', True))
	503	self._lazydeltabase = False
	504	if self._lazydelta:
	505	self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
	506	if b'compengine' in opts:
	507	self._compengine = opts[b'compengine']
	508	if b'zlib.level' in opts:
	509	self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
	510	if b'zstd.level' in opts:
	511	self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
	512	if b'maxdeltachainspan' in opts:
	513	self._maxdeltachainspan = opts[b'maxdeltachainspan']
	514	if self._mmaplargeindex and b'mmapindexthreshold' in opts:
	515	mmapindexthreshold = opts[b'mmapindexthreshold']
	516	self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
	517	withsparseread = bool(opts.get(b'with-sparse-read', False))
	518	# sparse-revlog forces sparse-read
	519	self._withsparseread = self._sparserevlog or withsparseread
	520	if b'sparse-read-density-threshold' in opts:
	521	self._srdensitythreshold = opts[b'sparse-read-density-threshold']
	522	if b'sparse-read-min-gap-size' in opts:
	523	self._srmingapsize = opts[b'sparse-read-min-gap-size']
	524	if opts.get(b'enableellipsis'):
	525	self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
	526
	527	# revlog v0 doesn't have flag processors
	528	for flag, processor in pycompat.iteritems(
	529	opts.get(b'flagprocessors', {})
	530	):
	531	flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
	532
	533	if self._chunkcachesize <= 0:
	534	raise error.RevlogError(
	535	_(b'revlog chunk cache size %r is not greater than 0')
	536	% self._chunkcachesize
	537	)
	538	elif self._chunkcachesize & (self._chunkcachesize - 1):
	539	raise error.RevlogError(
	540	_(b'revlog chunk cache size %r is not a power of 2')
	541	% self._chunkcachesize
	542	)
	543	force_nodemap = opts.get(b'devel-force-nodemap', False)
	544	return new_header, mmapindexthreshold, force_nodemap
	545		28
	546	def _get_data(self, filepath, mmap_threshold, size=None):	29	censorrev = rl.rev(censornode)
	547	"""return a file content with or without mmap	30	tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
	548
	549	If the file is missing return the empty string"""
	550	try:
	551	with self.opener(filepath) as fp:
	552	if mmap_threshold is not None:
	553	file_size = self.opener.fstat(fp).st_size
	554	if file_size >= mmap_threshold:
	555	if size is not None:
	556	# avoid potentiel mmap crash
	557	size = min(file_size, size)
	558	# TODO: should .close() to release resources without
	559	# relying on Python GC
	560	if size is None:
	561	return util.buffer(util.mmapread(fp))
	562	else:
	563	return util.buffer(util.mmapread(fp, size))
	564	if size is None:
	565	return fp.read()
	566	else:
	567	return fp.read(size)
	568	except IOError as inst:
	569	if inst.errno != errno.ENOENT:
	570	raise
	571	return b''
	572
	573	def _loadindex(self):
	574
	575	new_header, mmapindexthreshold, force_nodemap = self._init_opts()
	576
	577	if self.postfix is not None:
	578	entry_point = b'%s.i.%s' % (self.radix, self.postfix)
	579	elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
	580	entry_point = b'%s.i.a' % self.radix
	581	else:
	582	entry_point = b'%s.i' % self.radix
	583
	584	entry_data = b''
	585	self._initempty = True
	586	entry_data = self._get_data(entry_point, mmapindexthreshold)
	587	if len(entry_data) > 0:
	588	header = INDEX_HEADER.unpack(entry_data[:4])[0]
	589	self._initempty = False
	590	else:
	591	header = new_header
	592
	593	self._format_flags = header & ~0xFFFF
	594	self._format_version = header & 0xFFFF
	595
	596	supported_flags = SUPPORTED_FLAGS.get(self._format_version)
	597	if supported_flags is None:
	598	msg = _(b'unknown version (%d) in revlog %s')
	599	msg %= (self._format_version, self.display_id)
	600	raise error.RevlogError(msg)
	601	elif self._format_flags & ~supported_flags:
	602	msg = _(b'unknown flags (%#04x) in version %d revlog %s')
	603	display_flag = self._format_flags >> 16
	604	msg %= (display_flag, self._format_version, self.display_id)
	605	raise error.RevlogError(msg)
	606
	607	features = FEATURES_BY_VERSION[self._format_version]
	608	self._inline = features[b'inline'](self._format_flags)
	609	self._generaldelta = features[b'generaldelta'](self._format_flags)
	610	self.hassidedata = features[b'sidedata']
	611
	612	if not features[b'docket']:
	613	self._indexfile = entry_point
	614	index_data = entry_data
	615	else:
	616	self._docket_file = entry_point
	617	if self._initempty:
	618	self._docket = docketutil.default_docket(self, header)
	619	else:
	620	self._docket = docketutil.parse_docket(
	621	self, entry_data, use_pending=self._trypending
	622	)
	623	self._indexfile = self._docket.index_filepath()
	624	index_data = b''
	625	index_size = self._docket.index_end
	626	if index_size > 0:
	627	index_data = self._get_data(
	628	self._indexfile, mmapindexthreshold, size=index_size
	629	)
	630	if len(index_data) < index_size:
	631	msg = _(b'too few index data for %s: got %d, expected %d')
	632	msg %= (self.display_id, len(index_data), index_size)
	633	raise error.RevlogError(msg)
	634
	635	self._inline = False
	636	# generaldelta implied by version 2 revlogs.
	637	self._generaldelta = True
	638	# the logic for persistent nodemap will be dealt with within the
	639	# main docket, so disable it for now.
	640	self._nodemap_file = None
	641
	642	if self._docket is not None:
	643	self._datafile = self._docket.data_filepath()
	644	self._sidedatafile = self._docket.sidedata_filepath()
	645	elif self.postfix is None:
	646	self._datafile = b'%s.d' % self.radix
	647	else:
	648	self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
	649
	650	self.nodeconstants = sha1nodeconstants
	651	self.nullid = self.nodeconstants.nullid
	652
	653	# sparse-revlog can't be on without general-delta (issue6056)
	654	if not self._generaldelta:
	655	self._sparserevlog = False
	656
	657	self._storedeltachains = True
	658		31
	659	devel_nodemap = (	32	if len(tombstone) > rl.rawsize(censorrev):
	660	self._nodemap_file	33	raise error.Abort(
	661	and force_nodemap	34	_(b'censor tombstone must be no longer than censored data')
	662	and parse_index_v1_nodemap is not None
	663	)
	664
	665	use_rust_index = False
	666	if rustrevlog is not None:
	667	if self._nodemap_file is not None:
	668	use_rust_index = True
	669	else:
	670	use_rust_index = self.opener.options.get(b'rust.index')
	671
	672	self._parse_index = parse_index_v1
	673	if self._format_version == REVLOGV0:
	674	self._parse_index = revlogv0.parse_index_v0
	675	elif self._format_version == REVLOGV2:
	676	self._parse_index = parse_index_v2
	677	elif self._format_version == CHANGELOGV2:
	678	self._parse_index = parse_index_cl_v2
	679	elif devel_nodemap:
	680	self._parse_index = parse_index_v1_nodemap
	681	elif use_rust_index:
	682	self._parse_index = parse_index_v1_mixed
	683	try:
	684	d = self._parse_index(index_data, self._inline)
	685	index, _chunkcache = d
	686	use_nodemap = (
	687	not self._inline
	688	and self._nodemap_file is not None
	689	and util.safehasattr(index, 'update_nodemap_data')
	690	)
	691	if use_nodemap:
	692	nodemap_data = nodemaputil.persisted_data(self)
	693	if nodemap_data is not None:
	694	docket = nodemap_data[0]
	695	if (
	696	len(d[0]) > docket.tip_rev
	697	and d[0][docket.tip_rev][7] == docket.tip_node
	698	):
	699	# no changelog tampering
	700	self._nodemap_docket = docket
	701	index.update_nodemap_data(*nodemap_data)
	702	except (ValueError, IndexError):
	703	raise error.RevlogError(
	704	_(b"index %s is corrupted") % self.display_id
	705	)
	706	self.index, self._chunkcache = d
	707	if not self._chunkcache:
	708	self._chunkclear()
	709	# revnum -> (chain-length, sum-delta-length)
	710	self._chaininfocache = util.lrucachedict(500)
	711	# revlog header -> revlog compressor
	712	self._decompressors = {}
	713
	714	@util.propertycache
	715	def revlog_kind(self):
	716	return self.target[0]
	717
	718	@util.propertycache
	719	def display_id(self):
	720	"""The public facing "ID" of the revlog that we use in message"""
	721	# Maybe we should build a user facing representation of
	722	# revlog.target instead of using `self.radix`
	723	return self.radix
	724
	725	def _get_decompressor(self, t):
	726	try:
	727	compressor = self._decompressors[t]
	728	except KeyError:
	729	try:
	730	engine = util.compengines.forrevlogheader(t)
	731	compressor = engine.revlogcompressor(self._compengineopts)
	732	self._decompressors[t] = compressor
	733	except KeyError:
	734	raise error.RevlogError(
	735	_(b'unknown compression type %s') % binascii.hexlify(t)
	736	)
	737	return compressor
	738
	739	@util.propertycache
	740	def _compressor(self):
	741	engine = util.compengines[self._compengine]
	742	return engine.revlogcompressor(self._compengineopts)
	743
	744	@util.propertycache
	745	def _decompressor(self):
	746	"""the default decompressor"""
	747	if self._docket is None:
	748	return None
	749	t = self._docket.default_compression_header
	750	c = self._get_decompressor(t)
	751	return c.decompress
	752
	753	def _indexfp(self):
	754	"""file object for the revlog's index file"""
	755	return self.opener(self._indexfile, mode=b"r")
	756
	757	def __index_write_fp(self):
	758	# You should not use this directly and use `_writing` instead
	759	try:
	760	f = self.opener(
	761	self._indexfile, mode=b"r+", checkambig=self._checkambig
	762	)
	763	if self._docket is None:
	764	f.seek(0, os.SEEK_END)
	765	else:
	766	f.seek(self._docket.index_end, os.SEEK_SET)
	767	return f
	768	except IOError as inst:
	769	if inst.errno != errno.ENOENT:
	770	raise
	771	return self.opener(
	772	self._indexfile, mode=b"w+", checkambig=self._checkambig
	773	)
	774
	775	def __index_new_fp(self):
	776	# You should not use this unless you are upgrading from inline revlog
	777	return self.opener(
	778	self._indexfile,
	779	mode=b"w",
	780	checkambig=self._checkambig,
	781	atomictemp=True,
	782	)	35	)
	783		36
	784	def _datafp(self, mode=b'r'):	37	# Rewriting the revlog in place is hard. Our strategy for censoring is
	785	"""file object for the revlog's data file"""	38	# to create a new revlog, copy all revisions to it, then replace the
	786	return self.opener(self._datafile, mode=mode)	39	# revlogs on transaction close.
	787		40	#
	788	@contextlib.contextmanager	41	# This is a bit dangerous. We could easily have a mismatch of state.
	789	def _datareadfp(self, existingfp=None):	42	newrl = revlog.revlog(
	790	"""file object suitable to read data"""	43	rl.opener,
	791	# Use explicit file handle, if given.	44	target=rl.target,
	792	if existingfp is not None:	45	radix=rl.radix,
	793	yield existingfp	46	postfix=b'tmpcensored',
	794		47	censorable=True,
	795	# Use a file handle being actively used for writes, if available.	48	)
	796	# There is some danger to doing this because reads will seek the	49	newrl._format_version = rl._format_version
	797	# file. However, _writeentry() performs a SEEK_END before all writes,	50	newrl._format_flags = rl._format_flags
	798	# so we should be safe.	51	newrl._generaldelta = rl._generaldelta
	799	elif self._writinghandles:	52	newrl._parse_index = rl._parse_index
	800	if self._inline:
	801	yield self._writinghandles[0]
	802	else:
	803	yield self._writinghandles[1]
	804
	805	# Otherwise open a new file handle.
	806	else:
	807	if self._inline:
	808	func = self._indexfp
	809	else:
	810	func = self._datafp
	811	with func() as fp:
	812	yield fp
	813
	814	@contextlib.contextmanager
	815	def _sidedatareadfp(self):
	816	"""file object suitable to read sidedata"""
	817	if self._writinghandles:
	818	yield self._writinghandles[2]
	819	else:
	820	with self.opener(self._sidedatafile) as fp:
	821	yield fp
	822
	823	def tiprev(self):
	824	return len(self.index) - 1
	825
	826	def tip(self):
	827	return self.node(self.tiprev())
	828
	829	def __contains__(self, rev):
	830	return 0 <= rev < len(self)
	831
	832	def __len__(self):
	833	return len(self.index)
	834
	835	def __iter__(self):
	836	return iter(pycompat.xrange(len(self)))
	837
	838	def revs(self, start=0, stop=None):
	839	"""iterate over all rev in this revlog (from start to stop)"""
	840	return storageutil.iterrevs(len(self), start=start, stop=stop)
	841
	842	@property
	843	def nodemap(self):
	844	msg = (
	845	b"revlog.nodemap is deprecated, "
	846	b"use revlog.index.[has_node\|rev\|get_rev]"
	847	)
	848	util.nouideprecwarn(msg, b'5.3', stacklevel=2)
	849	return self.index.nodemap
	850
	851	@property
	852	def _nodecache(self):
	853	msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
	854	util.nouideprecwarn(msg, b'5.3', stacklevel=2)
	855	return self.index.nodemap
	856
	857	def hasnode(self, node):
	858	try:
	859	self.rev(node)
	860	return True
	861	except KeyError:
	862	return False
	863
	864	def candelta(self, baserev, rev):
	865	"""whether two revisions (baserev, rev) can be delta-ed or not"""
	866	# Disable delta if either rev requires a content-changing flag
	867	# processor (ex. LFS). This is because such flag processor can alter
	868	# the rawtext content that the delta will be based on, and two clients
	869	# could have a same revlog node with different flags (i.e. different
	870	# rawtext contents) and the delta could be incompatible.
	871	if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
	872	self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
	873	):
	874	return False
	875	return True
	876
	877	def update_caches(self, transaction):
	878	if self._nodemap_file is not None:
	879	if transaction is None:
	880	nodemaputil.update_persistent_nodemap(self)
	881	else:
	882	nodemaputil.setup_persistent_nodemap(transaction, self)
	883
	884	def clearcaches(self):
	885	self._revisioncache = None
	886	self._chainbasecache.clear()
	887	self._chunkcache = (0, b'')
	888	self._pcache = {}
	889	self._nodemap_docket = None
	890	self.index.clearcaches()
	891	# The python code is the one responsible for validating the docket, we
	892	# end up having to refresh it here.
	893	use_nodemap = (
	894	not self._inline
	895	and self._nodemap_file is not None
	896	and util.safehasattr(self.index, 'update_nodemap_data')
	897	)
	898	if use_nodemap:
	899	nodemap_data = nodemaputil.persisted_data(self)
	900	if nodemap_data is not None:
	901	self._nodemap_docket = nodemap_data[0]
	902	self.index.update_nodemap_data(*nodemap_data)
	903
	904	def rev(self, node):
	905	try:
	906	return self.index.rev(node)
	907	except TypeError:
	908	raise
	909	except error.RevlogError:
	910	# parsers.c radix tree lookup failed
	911	if (
	912	node == self.nodeconstants.wdirid
	913	or node in self.nodeconstants.wdirfilenodeids
	914	):
	915	raise error.WdirUnsupported
	916	raise error.LookupError(node, self.display_id, _(b'no node'))
	917
	918	# Accessors for index entries.
	919
	920	# First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
	921	# are flags.
	922	def start(self, rev):
	923	return int(self.index[rev][0] >> 16)
	924
	925	def sidedata_cut_off(self, rev):
	926	sd_cut_off = self.index[rev][8]
	927	if sd_cut_off != 0:
	928	return sd_cut_off
	929	# This is some annoying dance, because entries without sidedata
	930	# currently use 0 as their ofsset. (instead of previous-offset +
	931	# previous-size)
	932	#
	933	# We should reconsider this sidedata → 0 sidata_offset policy.
	934	# In the meantime, we need this.
	935	while 0 <= rev:
	936	e = self.index[rev]
	937	if e[9] != 0:
	938	return e[8] + e[9]
	939	rev -= 1
	940	return 0
	941
	942	def flags(self, rev):
	943	return self.index[rev][0] & 0xFFFF
	944
	945	def length(self, rev):
	946	return self.index[rev][1]
	947
	948	def sidedata_length(self, rev):
	949	if not self.hassidedata:
	950	return 0
	951	return self.index[rev][9]
	952
	953	def rawsize(self, rev):
	954	"""return the length of the uncompressed text for a given revision"""
	955	l = self.index[rev][2]
	956	if l >= 0:
	957	return l
	958
	959	t = self.rawdata(rev)
	960	return len(t)
	961
	962	def size(self, rev):
	963	"""length of non-raw text (processed by a "read" flag processor)"""
	964	# fast path: if no "read" flag processor could change the content,
	965	# size is rawsize. note: ELLIPSIS is known to not change the content.
	966	flags = self.flags(rev)
	967	if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
	968	return self.rawsize(rev)
	969
	970	return len(self.revision(rev, raw=False))
	971
	972	def chainbase(self, rev):
	973	base = self._chainbasecache.get(rev)
	974	if base is not None:
	975	return base
	976
	977	index = self.index
	978	iterrev = rev
	979	base = index[iterrev][3]
	980	while base != iterrev:
	981	iterrev = base
	982	base = index[iterrev][3]
	983
	984	self._chainbasecache[rev] = base
	985	return base
	986
	987	def linkrev(self, rev):
	988	return self.index[rev][4]
	989
	990	def parentrevs(self, rev):
	991	try:
	992	entry = self.index[rev]
	993	except IndexError:
	994	if rev == wdirrev:
	995	raise error.WdirUnsupported
	996	raise
	997	if entry[5] == nullrev:
	998	return entry[6], entry[5]
	999	else:
	1000	return entry[5], entry[6]
	1001
	1002	# fast parentrevs(rev) where rev isn't filtered
	1003	_uncheckedparentrevs = parentrevs
	1004
	1005	def node(self, rev):
	1006	try:
	1007	return self.index[rev][7]
	1008	except IndexError:
	1009	if rev == wdirrev:
	1010	raise error.WdirUnsupported
	1011	raise
	1012
	1013	# Derived from index values.
	1014
	1015	def end(self, rev):
	1016	return self.start(rev) + self.length(rev)
	1017
	1018	def parents(self, node):
	1019	i = self.index
	1020	d = i[self.rev(node)]
	1021	# inline node() to avoid function call overhead
	1022	if d[5] == self.nullid:
	1023	return i[d[6]][7], i[d[5]][7]
	1024	else:
	1025	return i[d[5]][7], i[d[6]][7]
	1026
	1027	def chainlen(self, rev):
	1028	return self._chaininfo(rev)[0]
	1029
	1030	def _chaininfo(self, rev):
	1031	chaininfocache = self._chaininfocache
	1032	if rev in chaininfocache:
	1033	return chaininfocache[rev]
	1034	index = self.index
	1035	generaldelta = self._generaldelta
	1036	iterrev = rev
	1037	e = index[iterrev]
	1038	clen = 0
	1039	compresseddeltalen = 0
	1040	while iterrev != e[3]:
	1041	clen += 1
	1042	compresseddeltalen += e[1]
	1043	if generaldelta:
	1044	iterrev = e[3]
	1045	else:
	1046	iterrev -= 1
	1047	if iterrev in chaininfocache:
	1048	t = chaininfocache[iterrev]
	1049	clen += t[0]
	1050	compresseddeltalen += t[1]
	1051	break
	1052	e = index[iterrev]
	1053	else:
	1054	# Add text length of base since decompressing that also takes
	1055	# work. For cache hits the length is already included.
	1056	compresseddeltalen += e[1]
	1057	r = (clen, compresseddeltalen)
	1058	chaininfocache[rev] = r
	1059	return r
	1060
	1061	def _deltachain(self, rev, stoprev=None):
	1062	"""Obtain the delta chain for a revision.
	1063
	1064	``stoprev`` specifies a revision to stop at. If not specified, we
	1065	stop at the base of the chain.
	1066
	1067	Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
	1068	revs in ascending order and ``stopped`` is a bool indicating whether
	1069	``stoprev`` was hit.
	1070	"""
	1071	# Try C implementation.
	1072	try:
	1073	return self.index.deltachain(rev, stoprev, self._generaldelta)
	1074	except AttributeError:
	1075	pass
	1076
	1077	chain = []
	1078
	1079	# Alias to prevent attribute lookup in tight loop.
	1080	index = self.index
	1081	generaldelta = self._generaldelta
	1082
	1083	iterrev = rev
	1084	e = index[iterrev]
	1085	while iterrev != e[3] and iterrev != stoprev:
	1086	chain.append(iterrev)
	1087	if generaldelta:
	1088	iterrev = e[3]
	1089	else:
	1090	iterrev -= 1
	1091	e = index[iterrev]
	1092
	1093	if iterrev == stoprev:
	1094	stopped = True
	1095	else:
	1096	chain.append(iterrev)
	1097	stopped = False
	1098
	1099	chain.reverse()
	1100	return chain, stopped
	1101
	1102	def ancestors(self, revs, stoprev=0, inclusive=False):
	1103	"""Generate the ancestors of 'revs' in reverse revision order.
	1104	Does not generate revs lower than stoprev.
	1105
	1106	See the documentation for ancestor.lazyancestors for more details."""
	1107
	1108	# first, make sure start revisions aren't filtered
	1109	revs = list(revs)
	1110	checkrev = self.node
	1111	for r in revs:
	1112	checkrev(r)
	1113	# and we're sure ancestors aren't filtered as well
	1114
	1115	if rustancestor is not None and self.index.rust_ext_compat:
	1116	lazyancestors = rustancestor.LazyAncestors
	1117	arg = self.index
	1118	else:
	1119	lazyancestors = ancestor.lazyancestors
	1120	arg = self._uncheckedparentrevs
	1121	return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
	1122
	1123	def descendants(self, revs):
	1124	return dagop.descendantrevs(revs, self.revs, self.parentrevs)
	1125
	1126	def findcommonmissing(self, common=None, heads=None):
	1127	"""Return a tuple of the ancestors of common and the ancestors of heads
	1128	that are not ancestors of common. In revset terminology, we return the
	1129	tuple:
	1130
	1131	::common, (::heads) - (::common)
	1132
	1133	The list is sorted by revision number, meaning it is
	1134	topologically sorted.
	1135
	1136	'heads' and 'common' are both lists of node IDs. If heads is
	1137	not supplied, uses all of the revlog's heads. If common is not
	1138	supplied, uses nullid."""
	1139	if common is None:
	1140	common = [self.nullid]
	1141	if heads is None:
	1142	heads = self.heads()
	1143
	1144	common = [self.rev(n) for n in common]
	1145	heads = [self.rev(n) for n in heads]
	1146
	1147	# we want the ancestors, but inclusive
	1148	class lazyset(object):
	1149	def __init__(self, lazyvalues):
	1150	self.addedvalues = set()
	1151	self.lazyvalues = lazyvalues
	1152
	1153	def __contains__(self, value):
	1154	return value in self.addedvalues or value in self.lazyvalues
	1155
	1156	def __iter__(self):
	1157	added = self.addedvalues
	1158	for r in added:
	1159	yield r
	1160	for r in self.lazyvalues:
	1161	if not r in added:
	1162	yield r
	1163
	1164	def add(self, value):
	1165	self.addedvalues.add(value)
	1166
	1167	def update(self, values):
	1168	self.addedvalues.update(values)
	1169
	1170	has = lazyset(self.ancestors(common))
	1171	has.add(nullrev)
	1172	has.update(common)
	1173
	1174	# take all ancestors from heads that aren't in has
	1175	missing = set()
	1176	visit = collections.deque(r for r in heads if r not in has)
	1177	while visit:
	1178	r = visit.popleft()
	1179	if r in missing:
	1180	continue
	1181	else:
	1182	missing.add(r)
	1183	for p in self.parentrevs(r):
	1184	if p not in has:
	1185	visit.append(p)
	1186	missing = list(missing)
	1187	missing.sort()
	1188	return has, [self.node(miss) for miss in missing]
	1189
	1190	def incrementalmissingrevs(self, common=None):
	1191	"""Return an object that can be used to incrementally compute the
	1192	revision numbers of the ancestors of arbitrary sets that are not
	1193	ancestors of common. This is an ancestor.incrementalmissingancestors
	1194	object.
	1195		53
	1196	'common' is a list of revision numbers. If common is not supplied, uses	54	for rev in rl.revs():
	1197	n~~ullrev.~~	55	node = rl.node(rev)
	1198	"""	56	p1, p2 = rl.parents(node)
	1199	if common is None:
	1200	common = [nullrev]
	1201
	1202	if rustancestor is not None and self.index.rust_ext_compat:
	1203	return rustancestor.MissingAncestors(self.index, common)
	1204	return ancestor.incrementalmissingancestors(self.parentrevs, common)
	1205
	1206	def findmissingrevs(self, common=None, heads=None):
	1207	"""Return the revision numbers of the ancestors of heads that
	1208	are not ancestors of common.
	1209
	1210	More specifically, return a list of revision numbers corresponding to
	1211	nodes N such that every N satisfies the following constraints:
	1212
	1213	1. N is an ancestor of some node in 'heads'
	1214	2. N is not an ancestor of any node in 'common'
	1215
	1216	The list is sorted by revision number, meaning it is
	1217	topologically sorted.
	1218
	1219	'heads' and 'common' are both lists of revision numbers. If heads is
	1220	not supplied, uses all of the revlog's heads. If common is not
	1221	supplied, uses nullid."""
	1222	if common is None:
	1223	common = [nullrev]
	1224	if heads is None:
	1225	heads = self.headrevs()
	1226
	1227	inc = self.incrementalmissingrevs(common=common)
	1228	return inc.missingancestors(heads)
	1229
	1230	def findmissing(self, common=None, heads=None):
	1231	"""Return the ancestors of heads that are not ancestors of common.
	1232
	1233	More specifically, return a list of nodes N such that every N
	1234	satisfies the following constraints:
	1235
	1236	1. N is an ancestor of some node in 'heads'
	1237	2. N is not an ancestor of any node in 'common'
	1238
	1239	The list is sorted by revision number, meaning it is
	1240	topologically sorted.
	1241
	1242	'heads' and 'common' are both lists of node IDs. If heads is
	1243	not supplied, uses all of the revlog's heads. If common is not
	1244	supplied, uses nullid."""
	1245	if common is None:
	1246	common = [self.nullid]
	1247	if heads is None:
	1248	heads = self.heads()
	1249
	1250	common = [self.rev(n) for n in common]
	1251	heads = [self.rev(n) for n in heads]
	1252
	1253	inc = self.incrementalmissingrevs(common=common)
	1254	return [self.node(r) for r in inc.missingancestors(heads)]
	1255
	1256	def nodesbetween(self, roots=None, heads=None):
	1257	"""Return a topological path from 'roots' to 'heads'.
	1258
	1259	Return a tuple (nodes, outroots, outheads) where 'nodes' is a
	1260	topologically sorted list of all nodes N that satisfy both of
	1261	these constraints:
	1262
	1263	1. N is a descendant of some node in 'roots'
	1264	2. N is an ancestor of some node in 'heads'
	1265
	1266	Every node is considered to be both a descendant and an ancestor
	1267	of itself, so every reachable node in 'roots' and 'heads' will be
	1268	included in 'nodes'.
	1269
	1270	'outroots' is the list of reachable nodes in 'roots', i.e., the
	1271	subset of 'roots' that is returned in 'nodes'. Likewise,
	1272	'outheads' is the subset of 'heads' that is also in 'nodes'.
	1273
	1274	'roots' and 'heads' are both lists of node IDs. If 'roots' is
	1275	unspecified, uses nullid as the only root. If 'heads' is
	1276	unspecified, uses list of all of the revlog's heads."""
	1277	nonodes = ([], [], [])
	1278	if roots is not None:
	1279	roots = list(roots)
	1280	if not roots:
	1281	return nonodes
	1282	lowestrev = min([self.rev(n) for n in roots])
	1283	else:
	1284	roots = [self.nullid] # Everybody's a descendant of nullid
	1285	lowestrev = nullrev
	1286	if (lowestrev == nullrev) and (heads is None):
	1287	# We want _all_ the nodes!
	1288	return (
	1289	[self.node(r) for r in self],
	1290	[self.nullid],
	1291	list(self.heads()),
	1292	)
	1293	if heads is None:
	1294	# All nodes are ancestors, so the latest ancestor is the last
	1295	# node.
	1296	highestrev = len(self) - 1
	1297	# Set ancestors to None to signal that every node is an ancestor.
	1298	ancestors = None
	1299	# Set heads to an empty dictionary for later discovery of heads
	1300	heads = {}
	1301	else:
	1302	heads = list(heads)
	1303	if not heads:
	1304	return nonodes
	1305	ancestors = set()
	1306	# Turn heads into a dictionary so we can remove 'fake' heads.
	1307	# Also, later we will be using it to filter out the heads we can't
	1308	# find from roots.
	1309	heads = dict.fromkeys(heads, False)
	1310	# Start at the top and keep marking parents until we're done.
	1311	nodestotag = set(heads)
	1312	# Remember where the top was so we can use it as a limit later.
	1313	highestrev = max([self.rev(n) for n in nodestotag])
	1314	while nodestotag:
	1315	# grab a node to tag
	1316	n = nodestotag.pop()
	1317	# Never tag nullid
	1318	if n == self.nullid:
	1319	continue
	1320	# A node's revision number represents its place in a
	1321	# topologically sorted list of nodes.
	1322	r = self.rev(n)
	1323	if r >= lowestrev:
	1324	if n not in ancestors:
	1325	# If we are possibly a descendant of one of the roots
	1326	# and we haven't already been marked as an ancestor
	1327	ancestors.add(n) # Mark as ancestor
	1328	# Add non-nullid parents to list of nodes to tag.
	1329	nodestotag.update(
	1330	[p for p in self.parents(n) if p != self.nullid]
	1331	)
	1332	elif n in heads: # We've seen it before, is it a fake head?
	1333	# So it is, real heads should not be the ancestors of
	1334	# any other heads.
	1335	heads.pop(n)
	1336	if not ancestors:
	1337	return nonodes
	1338	# Now that we have our set of ancestors, we want to remove any
	1339	# roots that are not ancestors.
	1340
	1341	# If one of the roots was nullid, everything is included anyway.
	1342	if lowestrev > nullrev:
	1343	# But, since we weren't, let's recompute the lowest rev to not
	1344	# include roots that aren't ancestors.
	1345		57
	1346	# Filter out roots that aren't ancestors of heads	58	if rev == censorrev:
	1347	roots = [root for root in roots if root in ancestors]	59	newrl.addrawrevision(
	1348	~~# Recompute the lowest revision~~	60	tombstone,
	1349	if ~~roots~~:	61	tr,
	1350	lowestrev = min([self.rev(root) for root in roots])	62	rl.linkrev(censorrev),
	1351	~~else~~:	63	p1,
	1352	# No more roots? Return empty list	64	p2,
	1353	~~return~~ ~~nonodes~~	65	censornode,
	1354	else:	66	constants.REVIDX_ISCENSORED,
	1355	# We are descending from nullid, and don't need to care about
	1356	# any other roots.
	1357	lowestrev = nullrev
	1358	roots = [self.nullid]
	1359	# Transform our roots list into a set.
	1360	descendants = set(roots)
	1361	# Also, keep the original roots so we can filter out roots that aren't
	1362	# 'real' roots (i.e. are descended from other roots).
	1363	roots = descendants.copy()
	1364	# Our topologically sorted list of output nodes.
	1365	orderedout = []
	1366	# Don't start at nullid since we don't want nullid in our output list,
	1367	# and if nullid shows up in descendants, empty parents will look like
	1368	# they're descendants.
	1369	for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
	1370	n = self.node(r)
	1371	isdescendant = False
	1372	if lowestrev == nullrev: # Everybody is a descendant of nullid
	1373	isdescendant = True
	1374	elif n in descendants:
	1375	# n is already a descendant
	1376	isdescendant = True
	1377	# This check only needs to be done here because all the roots
	1378	# will start being marked is descendants before the loop.
	1379	if n in roots:
	1380	# If n was a root, check if it's a 'real' root.
	1381	p = tuple(self.parents(n))
	1382	# If any of its parents are descendants, it's not a root.
	1383	if (p[0] in descendants) or (p[1] in descendants):
	1384	roots.remove(n)
	1385	else:
	1386	p = tuple(self.parents(n))
	1387	# A node is a descendant if either of its parents are
	1388	# descendants. (We seeded the dependents list with the roots
	1389	# up there, remember?)
	1390	if (p[0] in descendants) or (p[1] in descendants):
	1391	descendants.add(n)
	1392	isdescendant = True
	1393	if isdescendant and ((ancestors is None) or (n in ancestors)):
	1394	# Only include nodes that are both descendants and ancestors.
	1395	orderedout.append(n)
	1396	if (ancestors is not None) and (n in heads):
	1397	# We're trying to figure out which heads are reachable
	1398	# from roots.
	1399	# Mark this head as having been reached
	1400	heads[n] = True
	1401	elif ancestors is None:
	1402	# Otherwise, we're trying to discover the heads.
	1403	# Assume this is a head because if it isn't, the next step
	1404	# will eventually remove it.
	1405	heads[n] = True
	1406	# But, obviously its parents aren't.
	1407	for p in self.parents(n):
	1408	heads.pop(p, None)
	1409	heads = [head for head, flag in pycompat.iteritems(heads) if flag]
	1410	roots = list(roots)
	1411	assert orderedout
	1412	assert roots
	1413	assert heads
	1414	return (orderedout, roots, heads)
	1415
	1416	def headrevs(self, revs=None):
	1417	if revs is None:
	1418	try:
	1419	return self.index.headrevs()
	1420	except AttributeError:
	1421	return self._headrevs()
	1422	if rustdagop is not None and self.index.rust_ext_compat:
	1423	return rustdagop.headrevs(self.index, revs)
	1424	return dagop.headrevs(revs, self._uncheckedparentrevs)
	1425
	1426	def computephases(self, roots):
	1427	return self.index.computephasesmapsets(roots)
	1428
	1429	def _headrevs(self):
	1430	count = len(self)
	1431	if not count:
	1432	return [nullrev]
	1433	# we won't iter over filtered rev so nobody is a head at start
	1434	ishead = [0] * (count + 1)
	1435	index = self.index
	1436	for r in self:
	1437	ishead[r] = 1 # I may be an head
	1438	e = index[r]
	1439	ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
	1440	return [r for r, val in enumerate(ishead) if val]
	1441
	1442	def heads(self, start=None, stop=None):
	1443	"""return the list of all nodes that have no children
	1444
	1445	if start is specified, only heads that are descendants of
	1446	start will be returned
	1447	if stop is specified, it will consider all the revs from stop
	1448	as if they had no children
	1449	"""
	1450	if start is None and stop is None:
	1451	if not len(self):
	1452	return [self.nullid]
	1453	return [self.node(r) for r in self.headrevs()]
	1454
	1455	if start is None:
	1456	start = nullrev
	1457	else:
	1458	start = self.rev(start)
	1459
	1460	stoprevs = {self.rev(n) for n in stop or []}
	1461
	1462	revs = dagop.headrevssubset(
	1463	self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
	1464	)
	1465
	1466	return [self.node(rev) for rev in revs]
	1467
	1468	def children(self, node):
	1469	"""find the children of a given node"""
	1470	c = []
	1471	p = self.rev(node)
	1472	for r in self.revs(start=p + 1):
	1473	prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
	1474	if prevs:
	1475	for pr in prevs:
	1476	if pr == p:
	1477	c.append(self.node(r))
	1478	elif p == nullrev:
	1479	c.append(self.node(r))
	1480	return c
	1481
	1482	def commonancestorsheads(self, a, b):
	1483	"""calculate all the heads of the common ancestors of nodes a and b"""
	1484	a, b = self.rev(a), self.rev(b)
	1485	ancs = self._commonancestorsheads(a, b)
	1486	return pycompat.maplist(self.node, ancs)
	1487
	1488	def _commonancestorsheads(self, *revs):
	1489	"""calculate all the heads of the common ancestors of revs"""
	1490	try:
	1491	ancs = self.index.commonancestorsheads(*revs)
	1492	except (AttributeError, OverflowError): # C implementation failed
	1493	ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
	1494	return ancs
	1495
	1496	def isancestor(self, a, b):
	1497	"""return True if node a is an ancestor of node b
	1498
	1499	A revision is considered an ancestor of itself."""
	1500	a, b = self.rev(a), self.rev(b)
	1501	return self.isancestorrev(a, b)
	1502
	1503	def isancestorrev(self, a, b):
	1504	"""return True if revision a is an ancestor of revision b
	1505
	1506	A revision is considered an ancestor of itself.
	1507
	1508	The implementation of this is trivial but the use of
	1509	reachableroots is not."""
	1510	if a == nullrev:
	1511	return True
	1512	elif a == b:
	1513	return True
	1514	elif a > b:
	1515	return False
	1516	return bool(self.reachableroots(a, [b], [a], includepath=False))
	1517
	1518	def reachableroots(self, minroot, heads, roots, includepath=False):
	1519	"""return (heads(::(<roots> and <roots>::<heads>)))
	1520
	1521	If includepath is True, return (<roots>::<heads>)."""
	1522	try:
	1523	return self.index.reachableroots2(
	1524	minroot, heads, roots, includepath
	1525	)
	1526	except AttributeError:
	1527	return dagop._reachablerootspure(
	1528	self.parentrevs, minroot, roots, heads, includepath
	1529	)
	1530
	1531	def ancestor(self, a, b):
	1532	"""calculate the "best" common ancestor of nodes a and b"""
	1533
	1534	a, b = self.rev(a), self.rev(b)
	1535	try:
	1536	ancs = self.index.ancestors(a, b)
	1537	except (AttributeError, OverflowError):
	1538	ancs = ancestor.ancestors(self.parentrevs, a, b)
	1539	if ancs:
	1540	# choose a consistent winner when there's a tie
	1541	return min(map(self.node, ancs))
	1542	return self.nullid
	1543
	1544	def _match(self, id):
	1545	if isinstance(id, int):
	1546	# rev
	1547	return self.node(id)
	1548	if len(id) == self.nodeconstants.nodelen:
	1549	# possibly a binary node
	1550	# odds of a binary node being all hex in ASCII are 1 in 10**25
	1551	try:
	1552	node = id
	1553	self.rev(node) # quick search the index
	1554	return node
	1555	except error.LookupError:
	1556	pass # may be partial hex id
	1557	try:
	1558	# str(rev)
	1559	rev = int(id)
	1560	if b"%d" % rev != id:
	1561	raise ValueError
	1562	if rev < 0:
	1563	rev = len(self) + rev
	1564	if rev < 0 or rev >= len(self):
	1565	raise ValueError
	1566	return self.node(rev)
	1567	except (ValueError, OverflowError):
	1568	pass
	1569	if len(id) == 2 * self.nodeconstants.nodelen:
	1570	try:
	1571	# a full hex nodeid?
	1572	node = bin(id)
	1573	self.rev(node)
	1574	return node
	1575	except (TypeError, error.LookupError):
	1576	pass
	1577
	1578	def _partialmatch(self, id):
	1579	# we don't care wdirfilenodeids as they should be always full hash
	1580	maybewdir = self.nodeconstants.wdirhex.startswith(id)
	1581	ambiguous = False
	1582	try:
	1583	partial = self.index.partialmatch(id)
	1584	if partial and self.hasnode(partial):
	1585	if maybewdir:
	1586	# single 'ff...' match in radix tree, ambiguous with wdir
	1587	ambiguous = True
	1588	else:
	1589	return partial
	1590	elif maybewdir:
	1591	# no 'ff...' match in radix tree, wdir identified
	1592	raise error.WdirUnsupported
	1593	else:
	1594	return None
	1595	except error.RevlogError:
	1596	# parsers.c radix tree lookup gave multiple matches
	1597	# fast path: for unfiltered changelog, radix tree is accurate
	1598	if not getattr(self, 'filteredrevs', None):
	1599	ambiguous = True
	1600	# fall through to slow path that filters hidden revisions
	1601	except (AttributeError, ValueError):
	1602	# we are pure python, or key was too short to search radix tree
	1603	pass
	1604	if ambiguous:
	1605	raise error.AmbiguousPrefixLookupError(
	1606	id, self.display_id, _(b'ambiguous identifier')
	1607	)	67	)
	1608		68
	1609	if id in self._pcache:	69	if newrl.deltaparent(rev) != nullrev:
	1610	return self._pcache[id]	70	m = _(b'censored revision stored as delta; cannot censor')
	1611		71	h = _(
	1612	if len(id) <= 40:	72	b'censoring of revlogs is not fully implemented;'
	1613	try:	73	b' please report this bug'
	1614	~~# hex(node~~)~~[:...]~~	74	)
	1615	l = len(id) // 2 # grab an even number of digits	75	raise error.Abort(m, hint=h)
	1616	prefix = bin(id[: l * 2])	76	continue
	1617	nl = [e[7] for e in self.index if e[7].startswith(prefix)]
	1618	nl = [
	1619	n for n in nl if hex(n).startswith(id) and self.hasnode(n)
	1620	]
	1621	if self.nodeconstants.nullhex.startswith(id):
	1622	nl.append(self.nullid)
	1623	if len(nl) > 0:
	1624	if len(nl) == 1 and not maybewdir:
	1625	self._pcache[id] = nl[0]
	1626	return nl[0]
	1627	raise error.AmbiguousPrefixLookupError(
	1628	id, self.display_id, _(b'ambiguous identifier')
	1629	)
	1630	if maybewdir:
	1631	raise error.WdirUnsupported
	1632	return None
	1633	except TypeError:
	1634	pass
	1635
	1636	def lookup(self, id):
	1637	"""locate a node based on:
	1638	- revision number or str(revision number)
	1639	- nodeid or subset of hex nodeid
	1640	"""
	1641	n = self._match(id)
	1642	if n is not None:
	1643	return n
	1644	n = self._partialmatch(id)
	1645	if n:
	1646	return n
	1647
	1648	raise error.LookupError(id, self.display_id, _(b'no match found'))
	1649
	1650	def shortest(self, node, minlength=1):
	1651	"""Find the shortest unambiguous prefix that matches node."""
	1652
	1653	def isvalid(prefix):
	1654	try:
	1655	matchednode = self._partialmatch(prefix)
	1656	except error.AmbiguousPrefixLookupError:
	1657	return False
	1658	except error.WdirUnsupported:
	1659	# single 'ff...' match
	1660	return True
	1661	if matchednode is None:
	1662	raise error.LookupError(node, self.display_id, _(b'no node'))
	1663	return True
	1664
	1665	def maybewdir(prefix):
	1666	return all(c == b'f' for c in pycompat.iterbytestr(prefix))
	1667
	1668	hexnode = hex(node)
	1669
	1670	def disambiguate(hexnode, minlength):
	1671	"""Disambiguate against wdirid."""
	1672	for length in range(minlength, len(hexnode) + 1):
	1673	prefix = hexnode[:length]
	1674	if not maybewdir(prefix):
	1675	return prefix
	1676
	1677	if not getattr(self, 'filteredrevs', None):
	1678	try:
	1679	length = max(self.index.shortest(node), minlength)
	1680	return disambiguate(hexnode, length)
	1681	except error.RevlogError:
	1682	if node != self.nodeconstants.wdirid:
	1683	raise error.LookupError(
	1684	node, self.display_id, _(b'no node')
	1685	)
	1686	except AttributeError:
	1687	# Fall through to pure code
	1688	pass
	1689
	1690	if node == self.nodeconstants.wdirid:
	1691	for length in range(minlength, len(hexnode) + 1):
	1692	prefix = hexnode[:length]
	1693	if isvalid(prefix):
	1694	return prefix
	1695
	1696	for length in range(minlength, len(hexnode) + 1):
	1697	prefix = hexnode[:length]
	1698	if isvalid(prefix):
	1699	return disambiguate(hexnode, length)
	1700
	1701	def cmp(self, node, text):
	1702	"""compare text with a given file revision
	1703
	1704	returns True if text is different than what is stored.
	1705	"""
	1706	p1, p2 = self.parents(node)
	1707	return storageutil.hashrevisionsha1(text, p1, p2) != node
	1708
	1709	def _cachesegment(self, offset, data):
	1710	"""Add a segment to the revlog cache.
	1711
	1712	Accepts an absolute offset and the data that is at that location.
	1713	"""
	1714	o, d = self._chunkcache
	1715	# try to add to existing cache
	1716	if o + len(d) == offset and len(d) + len(data) < _chunksize:
	1717	self._chunkcache = o, d + data
	1718	else:
	1719	self._chunkcache = offset, data
	1720
	1721	def _readsegment(self, offset, length, df=None):
	1722	"""Load a segment of raw data from the revlog.
	1723
	1724	Accepts an absolute offset, length to read, and an optional existing
	1725	file handle to read from.
	1726
	1727	If an existing file handle is passed, it will be seeked and the
	1728	original seek position will NOT be restored.
	1729
	1730	Returns a str or buffer of raw byte data.
	1731
	1732	Raises if the requested number of bytes could not be read.
	1733	"""
	1734	# Cache data both forward and backward around the requested
	1735	# data, in a fixed size window. This helps speed up operations
	1736	# involving reading the revlog backwards.
	1737	cachesize = self._chunkcachesize
	1738	realoffset = offset & ~(cachesize - 1)
	1739	reallength = (
	1740	(offset + length + cachesize) & ~(cachesize - 1)
	1741	) - realoffset
	1742	with self._datareadfp(df) as df:
	1743	df.seek(realoffset)
	1744	d = df.read(reallength)
	1745
	1746	self._cachesegment(realoffset, d)
	1747	if offset != realoffset or reallength != length:
	1748	startoffset = offset - realoffset
	1749	if len(d) - startoffset < length:
	1750	filename = self._indexfile if self._inline else self._datafile
	1751	got = len(d) - startoffset
	1752	m = PARTIAL_READ_MSG % (filename, length, offset, got)
	1753	raise error.RevlogError(m)
	1754	return util.buffer(d, startoffset, length)
	1755
	1756	if len(d) < length:
	1757	filename = self._indexfile if self._inline else self._datafile
	1758	got = len(d) - startoffset
	1759	m = PARTIAL_READ_MSG % (filename, length, offset, got)
	1760	raise error.RevlogError(m)
	1761
	1762	return d
	1763
	1764	def _getsegment(self, offset, length, df=None):
	1765	"""Obtain a segment of raw data from the revlog.
	1766
	1767	Accepts an absolute offset, length of bytes to obtain, and an
	1768	optional file handle to the already-opened revlog. If the file
	1769	handle is used, it's original seek position will not be preserved.
	1770
	1771	Requests for data may be returned from a cache.
	1772
	1773	Returns a str or a buffer instance of raw byte data.
	1774	"""
	1775	o, d = self._chunkcache
	1776	l = len(d)
	1777
	1778	# is it in the cache?
	1779	cachestart = offset - o
	1780	cacheend = cachestart + length
	1781	if cachestart >= 0 and cacheend <= l:
	1782	if cachestart == 0 and cacheend == l:
	1783	return d # avoid a copy
	1784	return util.buffer(d, cachestart, cacheend - cachestart)
	1785
	1786	return self._readsegment(offset, length, df=df)
	1787
	1788	def _getsegmentforrevs(self, startrev, endrev, df=None):
	1789	"""Obtain a segment of raw data corresponding to a range of revisions.
	1790
	1791	Accepts the start and end revisions and an optional already-open
	1792	file handle to be used for reading. If the file handle is read, its
	1793	seek position will not be preserved.
	1794
	1795	Requests for data may be satisfied by a cache.
	1796
	1797	Returns a 2-tuple of (offset, data) for the requested range of
	1798	revisions. Offset is the integer offset from the beginning of the
	1799	revlog and data is a str or buffer of the raw byte data.
	1800
	1801	Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
	1802	to determine where each revision's data begins and ends.
	1803	"""
	1804	# Inlined self.start(startrev) & self.end(endrev) for perf reasons
	1805	# (functions are expensive).
	1806	index = self.index
	1807	istart = index[startrev]
	1808	start = int(istart[0] >> 16)
	1809	if startrev == endrev:
	1810	end = start + istart[1]
	1811	else:
	1812	iend = index[endrev]
	1813	end = int(iend[0] >> 16) + iend[1]
	1814
	1815	if self._inline:
	1816	start += (startrev + 1) * self.index.entry_size
	1817	end += (endrev + 1) * self.index.entry_size
	1818	length = end - start
	1819
	1820	return start, self._getsegment(start, length, df=df)
	1821
	1822	def _chunk(self, rev, df=None):
	1823	"""Obtain a single decompressed chunk for a revision.
	1824
	1825	Accepts an integer revision and an optional already-open file handle
	1826	to be used for reading. If used, the seek position of the file will not
	1827	be preserved.
	1828
	1829	Returns a str holding uncompressed data for the requested revision.
	1830	"""
	1831	compression_mode = self.index[rev][10]
	1832	data = self._getsegmentforrevs(rev, rev, df=df)[1]
	1833	if compression_mode == COMP_MODE_PLAIN:
	1834	return data
	1835	elif compression_mode == COMP_MODE_DEFAULT:
	1836	return self._decompressor(data)
	1837	elif compression_mode == COMP_MODE_INLINE:
	1838	return self.decompress(data)
	1839	else:
	1840	msg = 'unknown compression mode %d'
	1841	msg %= compression_mode
	1842	raise error.RevlogError(msg)
	1843
	1844	def _chunks(self, revs, df=None, targetsize=None):
	1845	"""Obtain decompressed chunks for the specified revisions.
	1846
	1847	Accepts an iterable of numeric revisions that are assumed to be in
	1848	ascending order. Also accepts an optional already-open file handle
	1849	to be used for reading. If used, the seek position of the file will
	1850	not be preserved.
	1851
	1852	This function is similar to calling ``self._chunk()`` multiple times,
	1853	but is faster.
	1854
	1855	Returns a list with decompressed data for each requested revision.
	1856	"""
	1857	if not revs:
	1858	return []
	1859	start = self.start
	1860	length = self.length
	1861	inline = self._inline
	1862	iosize = self.index.entry_size
	1863	buffer = util.buffer
	1864
	1865	l = []
	1866	ladd = l.append
	1867
	1868	if not self._withsparseread:
	1869	slicedchunks = (revs,)
	1870	else:
	1871	slicedchunks = deltautil.slicechunk(
	1872	self, revs, targetsize=targetsize
	1873	)
	1874
	1875	for revschunk in slicedchunks:
	1876	firstrev = revschunk[0]
	1877	# Skip trailing revisions with empty diff
	1878	for lastrev in revschunk[::-1]:
	1879	if length(lastrev) != 0:
	1880	break
	1881
	1882	try:
	1883	offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
	1884	except OverflowError:
	1885	# issue4215 - we can't cache a run of chunks greater than
	1886	# 2G on Windows
	1887	return [self._chunk(rev, df=df) for rev in revschunk]
	1888
	1889	decomp = self.decompress
	1890	# self._decompressor might be None, but will not be used in that case
	1891	def_decomp = self._decompressor
	1892	for rev in revschunk:
	1893	chunkstart = start(rev)
	1894	if inline:
	1895	chunkstart += (rev + 1) * iosize
	1896	chunklength = length(rev)
	1897	comp_mode = self.index[rev][10]
	1898	c = buffer(data, chunkstart - offset, chunklength)
	1899	if comp_mode == COMP_MODE_PLAIN:
	1900	ladd(c)
	1901	elif comp_mode == COMP_MODE_INLINE:
	1902	ladd(decomp(c))
	1903	elif comp_mode == COMP_MODE_DEFAULT:
	1904	ladd(def_decomp(c))
	1905	else:
	1906	msg = 'unknown compression mode %d'
	1907	msg %= comp_mode
	1908	raise error.RevlogError(msg)
	1909
	1910	return l
	1911
	1912	def _chunkclear(self):
	1913	"""Clear the raw chunk cache."""
	1914	self._chunkcache = (0, b'')
	1915
	1916	def deltaparent(self, rev):
	1917	"""return deltaparent of the given revision"""
	1918	base = self.index[rev][3]
	1919	if base == rev:
	1920	return nullrev
	1921	elif self._generaldelta:
	1922	return base
	1923	else:
	1924	return rev - 1
	1925
	1926	def issnapshot(self, rev):
	1927	"""tells whether rev is a snapshot"""
	1928	if not self._sparserevlog:
	1929	return self.deltaparent(rev) == nullrev
	1930	elif util.safehasattr(self.index, b'issnapshot'):
	1931	# directly assign the method to cache the testing and access
	1932	self.issnapshot = self.index.issnapshot
	1933	return self.issnapshot(rev)
	1934	if rev == nullrev:
	1935	return True
	1936	entry = self.index[rev]
	1937	base = entry[3]
	1938	if base == rev:
	1939	return True
	1940	if base == nullrev:
	1941	return True
	1942	p1 = entry[5]
	1943	p2 = entry[6]
	1944	if base == p1 or base == p2:
	1945	return False
	1946	return self.issnapshot(base)
	1947
	1948	def snapshotdepth(self, rev):
	1949	"""number of snapshot in the chain before this one"""
	1950	if not self.issnapshot(rev):
	1951	raise error.ProgrammingError(b'revision %d not a snapshot')
	1952	return len(self._deltachain(rev)[0]) - 1
	1953
	1954	def revdiff(self, rev1, rev2):
	1955	"""return or calculate a delta between two revisions
	1956
	1957	The delta calculated is in binary form and is intended to be written to
	1958	revlog data directly. So this function needs raw revision data.
	1959	"""
	1960	if rev1 != nullrev and self.deltaparent(rev2) == rev1:
	1961	return bytes(self._chunk(rev2))
	1962
	1963	return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
	1964
	1965	def _processflags(self, text, flags, operation, raw=False):
	1966	"""deprecated entry point to access flag processors"""
	1967	msg = b'_processflag(...) use the specialized variant'
	1968	util.nouideprecwarn(msg, b'5.2', stacklevel=2)
	1969	if raw:
	1970	return text, flagutil.processflagsraw(self, text, flags)
	1971	elif operation == b'read':
	1972	return flagutil.processflagsread(self, text, flags)
	1973	else: # write operation
	1974	return flagutil.processflagswrite(self, text, flags)
	1975
	1976	def revision(self, nodeorrev, _df=None, raw=False):
	1977	"""return an uncompressed revision of a given node or revision
	1978	number.
	1979
	1980	_df - an existing file handle to read from. (internal-only)
	1981	raw - an optional argument specifying if the revision data is to be
	1982	treated as raw data when applying flag transforms. 'raw' should be set
	1983	to True when generating changegroups or in debug commands.
	1984	"""
	1985	if raw:
	1986	msg = (
	1987	b'revlog.revision(..., raw=True) is deprecated, '
	1988	b'use revlog.rawdata(...)'
	1989	)
	1990	util.nouideprecwarn(msg, b'5.2', stacklevel=2)
	1991	return self._revisiondata(nodeorrev, _df, raw=raw)
	1992
	1993	def sidedata(self, nodeorrev, _df=None):
	1994	"""a map of extra data related to the changeset but not part of the hash
	1995
	1996	This function currently return a dictionary. However, more advanced
	1997	mapping object will likely be used in the future for a more
	1998	efficient/lazy code.
	1999	"""
	2000	# deal with <nodeorrev> argument type
	2001	if isinstance(nodeorrev, int):
	2002	rev = nodeorrev
	2003	else:
	2004	rev = self.rev(nodeorrev)
	2005	return self._sidedata(rev)
	2006		77
	2007	def _revisiondata(self, nodeorrev, _df=None, raw=False):	78	if rl.iscensored(rev):
	2008	# deal with <nodeorrev> argument type	79	if rl.deltaparent(rev) != nullrev:
	2009	if isinstance(nodeorrev, int):	80	m = _(
	2010	rev = nodeorrev	81	b'cannot censor due to censored '
	2011	node = self.node(rev)	82	b'revision having delta stored'
	2012	else:	83	)
	2013	node = nodeorrev	84	raise error.Abort(m)
	2014	rev = ~~None~~	85	rawtext = rl._chunk(rev)
	2015
	2016	# fast path the special `nullid` rev
	2017	if node == self.nullid:
	2018	return b""
	2019
	2020	# ``rawtext`` is the text as stored inside the revlog. Might be the
	2021	# revision or might need to be processed to retrieve the revision.
	2022	rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
	2023
	2024	if raw and validated:
	2025	# if we don't want to process the raw text and that raw
	2026	# text is cached, we can exit early.
	2027	return rawtext
	2028	if rev is None:
	2029	rev = self.rev(node)
	2030	# the revlog's flag for this revision
	2031	# (usually alter its state or content)
	2032	flags = self.flags(rev)
	2033
	2034	if validated and flags == REVIDX_DEFAULT_FLAGS:
	2035	# no extra flags set, no flag processor runs, text = rawtext
	2036	return rawtext
	2037
	2038	if raw:
	2039	validatehash = flagutil.processflagsraw(self, rawtext, flags)
	2040	text = rawtext
	2041	else:	86	else:
	2042	r = flagutil.processflagsread(self, rawtext, flags)	87	rawtext = rl.rawdata(rev)
	2043	text, validatehash = r
	2044	if validatehash:
	2045	self.checkhash(text, node, rev=rev)
	2046	if not validated:
	2047	self._revisioncache = (node, rev, rawtext)
	2048
	2049	return text
	2050
	2051	def _rawtext(self, node, rev, _df=None):
	2052	"""return the possibly unvalidated rawtext for a revision
	2053
	2054	returns (rev, rawtext, validated)
	2055	"""
	2056
	2057	# revision in the cache (could be useful to apply delta)
	2058	cachedrev = None
	2059	# An intermediate text to apply deltas to
	2060	basetext = None
	2061
	2062	# Check if we have the entry in cache
	2063	# The cache entry looks like (node, rev, rawtext)
	2064	if self._revisioncache:
	2065	if self._revisioncache[0] == node:
	2066	return (rev, self._revisioncache[2], True)
	2067	cachedrev = self._revisioncache[1]
	2068
	2069	if rev is None:
	2070	rev = self.rev(node)
	2071
	2072	chain, stopped = self._deltachain(rev, stoprev=cachedrev)
	2073	if stopped:
	2074	basetext = self._revisioncache[2]
	2075
	2076	# drop cache to save memory, the caller is expected to
	2077	# update self._revisioncache after validating the text
	2078	self._revisioncache = None
	2079
	2080	targetsize = None
	2081	rawsize = self.index[rev][2]
	2082	if 0 <= rawsize:
	2083	targetsize = 4 * rawsize
	2084
	2085	bins = self._chunks(chain, df=_df, targetsize=targetsize)
	2086	if basetext is None:
	2087	basetext = bytes(bins[0])
	2088	bins = bins[1:]
	2089
	2090	rawtext = mdiff.patches(basetext, bins)
	2091	del basetext # let us have a chance to free memory early
	2092	return (rev, rawtext, False)
	2093
	2094	def _sidedata(self, rev):
	2095	"""Return the sidedata for a given revision number."""
	2096	index_entry = self.index[rev]
	2097	sidedata_offset = index_entry[8]
	2098	sidedata_size = index_entry[9]
	2099
	2100	if self._inline:
	2101	sidedata_offset += self.index.entry_size * (1 + rev)
	2102	if sidedata_size == 0:
	2103	return {}
	2104
	2105	# XXX this need caching, as we do for data
	2106	with self._sidedatareadfp() as sdf:
	2107	if self._docket.sidedata_end < sidedata_offset + sidedata_size:
	2108	filename = self._sidedatafile
	2109	end = self._docket.sidedata_end
	2110	offset = sidedata_offset
	2111	length = sidedata_size
	2112	m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
	2113	raise error.RevlogError(m)
	2114
	2115	sdf.seek(sidedata_offset, os.SEEK_SET)
	2116	comp_segment = sdf.read(sidedata_size)
	2117
	2118	if len(comp_segment) < sidedata_size:
	2119	filename = self._sidedatafile
	2120	length = sidedata_size
	2121	offset = sidedata_offset
	2122	got = len(comp_segment)
	2123	m = PARTIAL_READ_MSG % (filename, length, offset, got)
	2124	raise error.RevlogError(m)
	2125
	2126	comp = self.index[rev][11]
	2127	if comp == COMP_MODE_PLAIN:
	2128	segment = comp_segment
	2129	elif comp == COMP_MODE_DEFAULT:
	2130	segment = self._decompressor(comp_segment)
	2131	elif comp == COMP_MODE_INLINE:
	2132	segment = self.decompress(comp_segment)
	2133	else:
	2134	msg = 'unknown compression mode %d'
	2135	msg %= comp
	2136	raise error.RevlogError(msg)
	2137
	2138	sidedata = sidedatautil.deserialize_sidedata(segment)
	2139	return sidedata
	2140
	2141	def rawdata(self, nodeorrev, _df=None):
	2142	"""return an uncompressed raw data of a given node or revision number.
	2143
	2144	_df - an existing file handle to read from. (internal-only)
	2145	"""
	2146	return self._revisiondata(nodeorrev, _df, raw=True)
	2147
	2148	def hash(self, text, p1, p2):
	2149	"""Compute a node hash.
	2150
	2151	Available as a function so that subclasses can replace the hash
	2152	as needed.
	2153	"""
	2154	return storageutil.hashrevisionsha1(text, p1, p2)
	2155
	2156	def checkhash(self, text, node, p1=None, p2=None, rev=None):
	2157	"""Check node hash integrity.
	2158
	2159	Available as a function so that subclasses can extend hash mismatch
	2160	behaviors as needed.
	2161	"""
	2162	try:
	2163	if p1 is None and p2 is None:
	2164	p1, p2 = self.parents(node)
	2165	if node != self.hash(text, p1, p2):
	2166	# Clear the revision cache on hash failure. The revision cache
	2167	# only stores the raw revision and clearing the cache does have
	2168	# the side-effect that we won't have a cache hit when the raw
	2169	# revision data is accessed. But this case should be rare and
	2170	# it is extra work to teach the cache about the hash
	2171	# verification state.
	2172	if self._revisioncache and self._revisioncache[0] == node:
	2173	self._revisioncache = None
	2174
	2175	revornode = rev
	2176	if revornode is None:
	2177	revornode = templatefilters.short(hex(node))
	2178	raise error.RevlogError(
	2179	_(b"integrity check failed on %s:%s")
	2180	% (self.display_id, pycompat.bytestr(revornode))
	2181	)
	2182	except error.RevlogError:
	2183	if self._censorable and storageutil.iscensoredtext(text):
	2184	raise error.CensoredNodeError(self.display_id, node, text)
	2185	raise
	2186
	2187	def _enforceinlinesize(self, tr):
	2188	"""Check if the revlog is too big for inline and convert if so.
	2189
	2190	This should be called after revisions are added to the revlog. If the
	2191	revlog has grown too large to be an inline revlog, it will convert it
	2192	to use multiple index and data files.
	2193	"""
	2194	tiprev = len(self) - 1
	2195	total_size = self.start(tiprev) + self.length(tiprev)
	2196	if not self._inline or total_size < _maxinline:
	2197	return
	2198
	2199	troffset = tr.findoffset(self._indexfile)
	2200	if troffset is None:
	2201	raise error.RevlogError(
	2202	_(b"%s not found in the transaction") % self._indexfile
	2203	)
	2204	trindex = 0
	2205	tr.add(self._datafile, 0)
	2206		88
	2207	existing_handles = False	89	newrl.addrawrevision(
	2208	if self._writinghandles is not None:	90	rawtext, tr, rl.linkrev(rev), p1, p2, node, rl.flags(rev)
	2209	existing_handles = True
	2210	fp = self._writinghandles[0]
	2211	fp.flush()
	2212	fp.close()
	2213	# We can't use the cached file handle after close(). So prevent
	2214	# its usage.
	2215	self._writinghandles = None
	2216
	2217	new_dfh = self._datafp(b'w+')
	2218	new_dfh.truncate(0) # drop any potentially existing data
	2219	try:
	2220	with self._indexfp() as read_ifh:
	2221	for r in self:
	2222	new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
	2223	if troffset <= self.start(r) + r * self.index.entry_size:
	2224	trindex = r
	2225	new_dfh.flush()
	2226
	2227	with self.__index_new_fp() as fp:
	2228	self._format_flags &= ~FLAG_INLINE_DATA
	2229	self._inline = False
	2230	for i in self:
	2231	e = self.index.entry_binary(i)
	2232	if i == 0 and self._docket is None:
	2233	header = self._format_flags \| self._format_version
	2234	header = self.index.pack_header(header)
	2235	e = header + e
	2236	fp.write(e)
	2237	if self._docket is not None:
	2238	self._docket.index_end = fp.tell()
	2239
	2240	# There is a small transactional race here. If the rename of
	2241	# the index fails, we should remove the datafile. It is more
	2242	# important to ensure that the data file is not truncated
	2243	# when the index is replaced as otherwise data is lost.
	2244	tr.replace(self._datafile, self.start(trindex))
	2245
	2246	# the temp file replace the real index when we exit the context
	2247	# manager
	2248
	2249	tr.replace(self._indexfile, trindex * self.index.entry_size)
	2250	nodemaputil.setup_persistent_nodemap(tr, self)
	2251	self._chunkclear()
	2252
	2253	if existing_handles:
	2254	# switched from inline to conventional reopen the index
	2255	ifh = self.__index_write_fp()
	2256	self._writinghandles = (ifh, new_dfh, None)
	2257	new_dfh = None
	2258	finally:
	2259	if new_dfh is not None:
	2260	new_dfh.close()
	2261
	2262	def _nodeduplicatecallback(self, transaction, node):
	2263	"""called when trying to add a node already stored."""
	2264
	2265	@contextlib.contextmanager
	2266	def _writing(self, transaction):
	2267	if self._trypending:
	2268	msg = b'try to write in a `trypending` revlog: %s'
	2269	msg %= self.display_id
	2270	raise error.ProgrammingError(msg)
	2271	if self._writinghandles is not None:
	2272	yield
	2273	else:
	2274	ifh = dfh = sdfh = None
	2275	try:
	2276	r = len(self)
	2277	# opening the data file.
	2278	dsize = 0
	2279	if r:
	2280	dsize = self.end(r - 1)
	2281	dfh = None
	2282	if not self._inline:
	2283	try:
	2284	dfh = self._datafp(b"r+")
	2285	if self._docket is None:
	2286	dfh.seek(0, os.SEEK_END)
	2287	else:
	2288	dfh.seek(self._docket.data_end, os.SEEK_SET)
	2289	except IOError as inst:
	2290	if inst.errno != errno.ENOENT:
	2291	raise
	2292	dfh = self._datafp(b"w+")
	2293	transaction.add(self._datafile, dsize)
	2294	if self._sidedatafile is not None:
	2295	try:
	2296	sdfh = self.opener(self._sidedatafile, mode=b"r+")
	2297	dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
	2298	except IOError as inst:
	2299	if inst.errno != errno.ENOENT:
	2300	raise
	2301	sdfh = self.opener(self._sidedatafile, mode=b"w+")
	2302	transaction.add(
	2303	self._sidedatafile, self._docket.sidedata_end
	2304	)
	2305
	2306	# opening the index file.
	2307	isize = r * self.index.entry_size
	2308	ifh = self.__index_write_fp()
	2309	if self._inline:
	2310	transaction.add(self._indexfile, dsize + isize)
	2311	else:
	2312	transaction.add(self._indexfile, isize)
	2313	# exposing all file handle for writing.
	2314	self._writinghandles = (ifh, dfh, sdfh)
	2315	yield
	2316	if self._docket is not None:
	2317	self._write_docket(transaction)
	2318	finally:
	2319	self._writinghandles = None
	2320	if dfh is not None:
	2321	dfh.close()
	2322	if sdfh is not None:
	2323	dfh.close()
	2324	# closing the index file last to avoid exposing referent to
	2325	# potential unflushed data content.
	2326	if ifh is not None:
	2327	ifh.close()
	2328
	2329	def _write_docket(self, transaction):
	2330	"""write the current docket on disk
	2331
	2332	Exist as a method to help changelog to implement transaction logic
	2333
	2334	We could also imagine using the same transaction logic for all revlog
	2335	since docket are cheap."""
	2336	self._docket.write(transaction)
	2337
	2338	def addrevision(
	2339	self,
	2340	text,
	2341	transaction,
	2342	link,
	2343	p1,
	2344	p2,
	2345	cachedelta=None,
	2346	node=None,
	2347	flags=REVIDX_DEFAULT_FLAGS,
	2348	deltacomputer=None,
	2349	sidedata=None,
	2350	):
	2351	"""add a revision to the log
	2352
	2353	text - the revision data to add
	2354	transaction - the transaction object used for rollback
	2355	link - the linkrev data to add
	2356	p1, p2 - the parent nodeids of the revision
	2357	cachedelta - an optional precomputed delta
	2358	node - nodeid of revision; typically node is not specified, and it is
	2359	computed by default as hash(text, p1, p2), however subclasses might
	2360	use different hashing method (and override checkhash() in such case)
	2361	flags - the known flags to set on the revision
	2362	deltacomputer - an optional deltacomputer instance shared between
	2363	multiple calls
	2364	"""
	2365	if link == nullrev:
	2366	raise error.RevlogError(
	2367	_(b"attempted to add linkrev -1 to %s") % self.display_id
	2368	)
	2369
	2370	if sidedata is None:
	2371	sidedata = {}
	2372	elif sidedata and not self.hassidedata:
	2373	raise error.ProgrammingError(
	2374	_(b"trying to add sidedata to a revlog who don't support them")
	2375	)
	2376
	2377	if flags:
	2378	node = node or self.hash(text, p1, p2)
	2379
	2380	rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
	2381
	2382	# If the flag processor modifies the revision data, ignore any provided
	2383	# cachedelta.
	2384	if rawtext != text:
	2385	cachedelta = None
	2386
	2387	if len(rawtext) > _maxentrysize:
	2388	raise error.RevlogError(
	2389	_(
	2390	b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
	2391	)
	2392	% (self.display_id, len(rawtext))
	2393	)
	2394
	2395	node = node or self.hash(rawtext, p1, p2)
	2396	rev = self.index.get_rev(node)
	2397	if rev is not None:
	2398	return rev
	2399
	2400	if validatehash:
	2401	self.checkhash(rawtext, node, p1=p1, p2=p2)
	2402
	2403	return self.addrawrevision(
	2404	rawtext,
	2405	transaction,
	2406	link,
	2407	p1,
	2408	p2,
	2409	node,
	2410	flags,
	2411	cachedelta=cachedelta,
	2412	deltacomputer=deltacomputer,
	2413	sidedata=sidedata,
	2414	)	91	)
	2415		92
	2416	def addrawrevision(	93	tr.addbackup(rl._indexfile, location=b'store')
	2417	self,	94	if not rl._inline:
	2418	rawtext,	95	tr.addbackup(rl._datafile, location=b'store')
	2419	transaction,
	2420	link,
	2421	p1,
	2422	p2,
	2423	node,
	2424	flags,
	2425	cachedelta=None,
	2426	deltacomputer=None,
	2427	sidedata=None,
	2428	):
	2429	"""add a raw revision with known flags, node and parents
	2430	useful when reusing a revision not stored in this revlog (ex: received
	2431	over wire, or read from an external bundle).
	2432	"""
	2433	with self._writing(transaction):
	2434	return self._addrevision(
	2435	node,
	2436	rawtext,
	2437	transaction,
	2438	link,
	2439	p1,
	2440	p2,
	2441	flags,
	2442	cachedelta,
	2443	deltacomputer=deltacomputer,
	2444	sidedata=sidedata,
	2445	)
	2446
	2447	def compress(self, data):
	2448	"""Generate a possibly-compressed representation of data."""
	2449	if not data:
	2450	return b'', data
	2451
	2452	compressed = self._compressor.compress(data)
	2453
	2454	if compressed:
	2455	# The revlog compressor added the header in the returned data.
	2456	return b'', compressed
	2457
	2458	if data[0:1] == b'\0':
	2459	return b'', data
	2460	return b'u', data
	2461
	2462	def decompress(self, data):
	2463	"""Decompress a revlog chunk.
	2464
	2465	The chunk is expected to begin with a header identifying the
	2466	format type so it can be routed to an appropriate decompressor.
	2467	"""
	2468	if not data:
	2469	return data
	2470
	2471	# Revlogs are read much more frequently than they are written and many
	2472	# chunks only take microseconds to decompress, so performance is
	2473	# important here.
	2474	#
	2475	# We can make a few assumptions about revlogs:
	2476	#
	2477	# 1) the majority of chunks will be compressed (as opposed to inline
	2478	# raw data).
	2479	# 2) decompressing any data will likely by at least 10x slower than
	2480	# returning raw inline data.
	2481	# 3) we want to prioritize common and officially supported compression
	2482	# engines
	2483	#
	2484	# It follows that we want to optimize for "decompress compressed data
	2485	# when encoded with common and officially supported compression engines"
	2486	# case over "raw data" and "data encoded by less common or non-official
	2487	# compression engines." That is why we have the inline lookup first
	2488	# followed by the compengines lookup.
	2489	#
	2490	# According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
	2491	# compressed chunks. And this matters for changelog and manifest reads.
	2492	t = data[0:1]
	2493
	2494	if t == b'x':
	2495	try:
	2496	return _zlibdecompress(data)
	2497	except zlib.error as e:
	2498	raise error.RevlogError(
	2499	_(b'revlog decompress error: %s')
	2500	% stringutil.forcebytestr(e)
	2501	)
	2502	# '\0' is more common than 'u' so it goes first.
	2503	elif t == b'\0':
	2504	return data
	2505	elif t == b'u':
	2506	return util.buffer(data, 1)
	2507
	2508	compressor = self._get_decompressor(t)
	2509
	2510	return compressor.decompress(data)
	2511
	2512	def _addrevision(
	2513	self,
	2514	node,
	2515	rawtext,
	2516	transaction,
	2517	link,
	2518	p1,
	2519	p2,
	2520	flags,
	2521	cachedelta,
	2522	alwayscache=False,
	2523	deltacomputer=None,
	2524	sidedata=None,
	2525	):
	2526	"""internal function to add revisions to the log
	2527
	2528	see addrevision for argument descriptions.
	2529
	2530	note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
	2531
	2532	if "deltacomputer" is not provided or None, a defaultdeltacomputer will
	2533	be used.
	2534
	2535	invariants:
	2536	- rawtext is optional (can be None); if not set, cachedelta must be set.
	2537	if both are set, they must correspond to each other.
	2538	"""
	2539	if node == self.nullid:
	2540	raise error.RevlogError(
	2541	_(b"%s: attempt to add null revision") % self.display_id
	2542	)
	2543	if (
	2544	node == self.nodeconstants.wdirid
	2545	or node in self.nodeconstants.wdirfilenodeids
	2546	):
	2547	raise error.RevlogError(
	2548	_(b"%s: attempt to add wdir revision") % self.display_id
	2549	)
	2550	if self._writinghandles is None:
	2551	msg = b'adding revision outside `revlog._writing` context'
	2552	raise error.ProgrammingError(msg)
	2553
	2554	if self._inline:
	2555	fh = self._writinghandles[0]
	2556	else:
	2557	fh = self._writinghandles[1]
	2558
	2559	btext = [rawtext]
	2560
	2561	curr = len(self)
	2562	prev = curr - 1
	2563
	2564	offset = self._get_data_offset(prev)
	2565
	2566	if self._concurrencychecker:
	2567	ifh, dfh, sdfh = self._writinghandles
	2568	# XXX no checking for the sidedata file
	2569	if self._inline:
	2570	# offset is "as if" it were in the .d file, so we need to add on
	2571	# the size of the entry metadata.
	2572	self._concurrencychecker(
	2573	ifh, self._indexfile, offset + curr * self.index.entry_size
	2574	)
	2575	else:
	2576	# Entries in the .i are a consistent size.
	2577	self._concurrencychecker(
	2578	ifh, self._indexfile, curr * self.index.entry_size
	2579	)
	2580	self._concurrencychecker(dfh, self._datafile, offset)
	2581
	2582	p1r, p2r = self.rev(p1), self.rev(p2)
	2583
	2584	# full versions are inserted when the needed deltas
	2585	# become comparable to the uncompressed text
	2586	if rawtext is None:
	2587	# need rawtext size, before changed by flag processors, which is
	2588	# the non-raw size. use revlog explicitly to avoid filelog's extra
	2589	# logic that might remove metadata size.
	2590	textlen = mdiff.patchedsize(
	2591	revlog.size(self, cachedelta[0]), cachedelta[1]
	2592	)
	2593	else:
	2594	textlen = len(rawtext)
	2595
	2596	if deltacomputer is None:
	2597	deltacomputer = deltautil.deltacomputer(self)
	2598
	2599	revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
	2600
	2601	deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
	2602
	2603	compression_mode = COMP_MODE_INLINE
	2604	if self._docket is not None:
	2605	h, d = deltainfo.data
	2606	if not h and not d:
	2607	# not data to store at all... declare them uncompressed
	2608	compression_mode = COMP_MODE_PLAIN
	2609	elif not h:
	2610	t = d[0:1]
	2611	if t == b'\0':
	2612	compression_mode = COMP_MODE_PLAIN
	2613	elif t == self._docket.default_compression_header:
	2614	compression_mode = COMP_MODE_DEFAULT
	2615	elif h == b'u':
	2616	# we have a more efficient way to declare uncompressed
	2617	h = b''
	2618	compression_mode = COMP_MODE_PLAIN
	2619	deltainfo = deltautil.drop_u_compression(deltainfo)
	2620
	2621	sidedata_compression_mode = COMP_MODE_INLINE
	2622	if sidedata and self.hassidedata:
	2623	sidedata_compression_mode = COMP_MODE_PLAIN
	2624	serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
	2625	sidedata_offset = self._docket.sidedata_end
	2626	h, comp_sidedata = self.compress(serialized_sidedata)
	2627	if (
	2628	h != b'u'
	2629	and comp_sidedata[0:1] != b'\0'
	2630	and len(comp_sidedata) < len(serialized_sidedata)
	2631	):
	2632	assert not h
	2633	if (
	2634	comp_sidedata[0:1]
	2635	== self._docket.default_compression_header
	2636	):
	2637	sidedata_compression_mode = COMP_MODE_DEFAULT
	2638	serialized_sidedata = comp_sidedata
	2639	else:
	2640	sidedata_compression_mode = COMP_MODE_INLINE
	2641	serialized_sidedata = comp_sidedata
	2642	else:
	2643	serialized_sidedata = b""
	2644	# Don't store the offset if the sidedata is empty, that way
	2645	# we can easily detect empty sidedata and they will be no different
	2646	# than ones we manually add.
	2647	sidedata_offset = 0
	2648
	2649	e = (
	2650	offset_type(offset, flags),
	2651	deltainfo.deltalen,
	2652	textlen,
	2653	deltainfo.base,
	2654	link,
	2655	p1r,
	2656	p2r,
	2657	node,
	2658	sidedata_offset,
	2659	len(serialized_sidedata),
	2660	compression_mode,
	2661	sidedata_compression_mode,
	2662	)
	2663
	2664	self.index.append(e)
	2665	entry = self.index.entry_binary(curr)
	2666	if curr == 0 and self._docket is None:
	2667	header = self._format_flags \| self._format_version
	2668	header = self.index.pack_header(header)
	2669	entry = header + entry
	2670	self._writeentry(
	2671	transaction,
	2672	entry,
	2673	deltainfo.data,
	2674	link,
	2675	offset,
	2676	serialized_sidedata,
	2677	sidedata_offset,
	2678	)
	2679
	2680	rawtext = btext[0]
	2681
	2682	if alwayscache and rawtext is None:
	2683	rawtext = deltacomputer.buildtext(revinfo, fh)
	2684
	2685	if type(rawtext) == bytes: # only accept immutable objects
	2686	self._revisioncache = (node, curr, rawtext)
	2687	self._chainbasecache[curr] = deltainfo.chainbase
	2688	return curr
	2689
	2690	def _get_data_offset(self, prev):
	2691	"""Returns the current offset in the (in-transaction) data file.
	2692	Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
	2693	file to store that information: since sidedata can be rewritten to the
	2694	end of the data file within a transaction, you can have cases where, for
	2695	example, rev `n` does not have sidedata while rev `n - 1` does, leading
	2696	to `n - 1`'s sidedata being written after `n`'s data.
	2697
	2698	TODO cache this in a docket file before getting out of experimental."""
	2699	if self._docket is None:
	2700	return self.end(prev)
	2701	else:
	2702	return self._docket.data_end
	2703
	2704	def _writeentry(
	2705	self, transaction, entry, data, link, offset, sidedata, sidedata_offset
	2706	):
	2707	# Files opened in a+ mode have inconsistent behavior on various
	2708	# platforms. Windows requires that a file positioning call be made
	2709	# when the file handle transitions between reads and writes. See
	2710	# 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
	2711	# platforms, Python or the platform itself can be buggy. Some versions
	2712	# of Solaris have been observed to not append at the end of the file
	2713	# if the file was seeked to before the end. See issue4943 for more.
	2714	#
	2715	# We work around this issue by inserting a seek() before writing.
	2716	# Note: This is likely not necessary on Python 3. However, because
	2717	# the file handle is reused for reads and may be seeked there, we need
	2718	# to be careful before changing this.
	2719	if self._writinghandles is None:
	2720	msg = b'adding revision outside `revlog._writing` context'
	2721	raise error.ProgrammingError(msg)
	2722	ifh, dfh, sdfh = self._writinghandles
	2723	if self._docket is None:
	2724	ifh.seek(0, os.SEEK_END)
	2725	else:
	2726	ifh.seek(self._docket.index_end, os.SEEK_SET)
	2727	if dfh:
	2728	if self._docket is None:
	2729	dfh.seek(0, os.SEEK_END)
	2730	else:
	2731	dfh.seek(self._docket.data_end, os.SEEK_SET)
	2732	if sdfh:
	2733	sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
	2734
	2735	curr = len(self) - 1
	2736	if not self._inline:
	2737	transaction.add(self._datafile, offset)
	2738	if self._sidedatafile:
	2739	transaction.add(self._sidedatafile, sidedata_offset)
	2740	transaction.add(self._indexfile, curr * len(entry))
	2741	if data[0]:
	2742	dfh.write(data[0])
	2743	dfh.write(data[1])
	2744	if sidedata:
	2745	sdfh.write(sidedata)
	2746	ifh.write(entry)
	2747	else:
	2748	offset += curr * self.index.entry_size
	2749	transaction.add(self._indexfile, offset)
	2750	ifh.write(entry)
	2751	ifh.write(data[0])
	2752	ifh.write(data[1])
	2753	assert not sidedata
	2754	self._enforceinlinesize(transaction)
	2755	if self._docket is not None:
	2756	self._docket.index_end = self._writinghandles[0].tell()
	2757	self._docket.data_end = self._writinghandles[1].tell()
	2758	self._docket.sidedata_end = self._writinghandles[2].tell()
	2759
	2760	nodemaputil.setup_persistent_nodemap(transaction, self)
	2761
	2762	def addgroup(
	2763	self,
	2764	deltas,
	2765	linkmapper,
	2766	transaction,
	2767	alwayscache=False,
	2768	addrevisioncb=None,
	2769	duplicaterevisioncb=None,
	2770	):
	2771	"""
	2772	add a delta group
	2773
	2774	given a set of deltas, add them to the revision log. the
	2775	first delta is against its parent, which should be in our
	2776	log, the rest are against the previous delta.
	2777
	2778	If ``addrevisioncb`` is defined, it will be called with arguments of
	2779	this revlog and the node that was added.
	2780	"""
	2781
	2782	if self._adding_group:
	2783	raise error.ProgrammingError(b'cannot nest addgroup() calls')
	2784
	2785	self._adding_group = True
	2786	empty = True
	2787	try:
	2788	with self._writing(transaction):
	2789	deltacomputer = deltautil.deltacomputer(self)
	2790	# loop through our set of deltas
	2791	for data in deltas:
	2792	(
	2793	node,
	2794	p1,
	2795	p2,
	2796	linknode,
	2797	deltabase,
	2798	delta,
	2799	flags,
	2800	sidedata,
	2801	) = data
	2802	link = linkmapper(linknode)
	2803	flags = flags or REVIDX_DEFAULT_FLAGS
	2804
	2805	rev = self.index.get_rev(node)
	2806	if rev is not None:
	2807	# this can happen if two branches make the same change
	2808	self._nodeduplicatecallback(transaction, rev)
	2809	if duplicaterevisioncb:
	2810	duplicaterevisioncb(self, rev)
	2811	empty = False
	2812	continue
	2813
	2814	for p in (p1, p2):
	2815	if not self.index.has_node(p):
	2816	raise error.LookupError(
	2817	p, self.radix, _(b'unknown parent')
	2818	)
	2819
	2820	if not self.index.has_node(deltabase):
	2821	raise error.LookupError(
	2822	deltabase, self.display_id, _(b'unknown delta base')
	2823	)
	2824
	2825	baserev = self.rev(deltabase)
	2826
	2827	if baserev != nullrev and self.iscensored(baserev):
	2828	# if base is censored, delta must be full replacement in a
	2829	# single patch operation
	2830	hlen = struct.calcsize(b">lll")
	2831	oldlen = self.rawsize(baserev)
	2832	newlen = len(delta) - hlen
	2833	if delta[:hlen] != mdiff.replacediffheader(
	2834	oldlen, newlen
	2835	):
	2836	raise error.CensoredBaseError(
	2837	self.display_id, self.node(baserev)
	2838	)
	2839
	2840	if not flags and self._peek_iscensored(baserev, delta):
	2841	flags \|= REVIDX_ISCENSORED
	2842
	2843	# We assume consumers of addrevisioncb will want to retrieve
	2844	# the added revision, which will require a call to
	2845	# revision(). revision() will fast path if there is a cache
	2846	# hit. So, we tell _addrevision() to always cache in this case.
	2847	# We're only using addgroup() in the context of changegroup
	2848	# generation so the revision data can always be handled as raw
	2849	# by the flagprocessor.
	2850	rev = self._addrevision(
	2851	node,
	2852	None,
	2853	transaction,
	2854	link,
	2855	p1,
	2856	p2,
	2857	flags,
	2858	(baserev, delta),
	2859	alwayscache=alwayscache,
	2860	deltacomputer=deltacomputer,
	2861	sidedata=sidedata,
	2862	)
	2863
	2864	if addrevisioncb:
	2865	addrevisioncb(self, rev)
	2866	empty = False
	2867	finally:
	2868	self._adding_group = False
	2869	return not empty
	2870
	2871	def iscensored(self, rev):
	2872	"""Check if a file revision is censored."""
	2873	if not self._censorable:
	2874	return False
	2875
	2876	return self.flags(rev) & REVIDX_ISCENSORED
	2877
	2878	def _peek_iscensored(self, baserev, delta):
	2879	"""Quickly check if a delta produces a censored revision."""
	2880	if not self._censorable:
	2881	return False
	2882
	2883	return storageutil.deltaiscensored(delta, baserev, self.rawsize)
	2884
	2885	def getstrippoint(self, minlink):
	2886	"""find the minimum rev that must be stripped to strip the linkrev
	2887
	2888	Returns a tuple containing the minimum rev and a set of all revs that
	2889	have linkrevs that will be broken by this strip.
	2890	"""
	2891	return storageutil.resolvestripinfo(
	2892	minlink,
	2893	len(self) - 1,
	2894	self.headrevs(),
	2895	self.linkrev,
	2896	self.parentrevs,
	2897	)
	2898
	2899	def strip(self, minlink, transaction):
	2900	"""truncate the revlog on the first revision with a linkrev >= minlink
	2901
	2902	This function is called when we're stripping revision minlink and
	2903	its descendants from the repository.
	2904
	2905	We have to remove all revisions with linkrev >= minlink, because
	2906	the equivalent changelog revisions will be renumbered after the
	2907	strip.
	2908
	2909	So we truncate the revlog on the first of these revisions, and
	2910	trust that the caller has saved the revisions that shouldn't be
	2911	removed and that it'll re-add them after this truncation.
	2912	"""
	2913	if len(self) == 0:
	2914	return
	2915
	2916	rev, _ = self.getstrippoint(minlink)
	2917	if rev == len(self):
	2918	return
	2919
	2920	# first truncate the files on disk
	2921	data_end = self.start(rev)
	2922	if not self._inline:
	2923	transaction.add(self._datafile, data_end)
	2924	end = rev * self.index.entry_size
	2925	else:
	2926	end = data_end + (rev * self.index.entry_size)
	2927
	2928	if self._sidedatafile:
	2929	sidedata_end = self.sidedata_cut_off(rev)
	2930	transaction.add(self._sidedatafile, sidedata_end)
	2931
	2932	transaction.add(self._indexfile, end)
	2933	if self._docket is not None:
	2934	# XXX we could, leverage the docket while stripping. However it is
	2935	# not powerfull enough at the time of this comment
	2936	self._docket.index_end = end
	2937	self._docket.data_end = data_end
	2938	self._docket.sidedata_end = sidedata_end
	2939	self._docket.write(transaction, stripping=True)
	2940
	2941	# then reset internal state in memory to forget those revisions
	2942	self._revisioncache = None
	2943	self._chaininfocache = util.lrucachedict(500)
	2944	self._chunkclear()
	2945
	2946	del self.index[rev:-1]
	2947
	2948	def checksize(self):
	2949	"""Check size of index and data files
	2950
	2951	return a (dd, di) tuple.
	2952	- dd: extra bytes for the "data" file
	2953	- di: extra bytes for the "index" file
	2954
	2955	A healthy revlog will return (0, 0).
	2956	"""
	2957	expected = 0
	2958	if len(self):
	2959	expected = max(0, self.end(len(self) - 1))
	2960
	2961	try:
	2962	with self._datafp() as f:
	2963	f.seek(0, io.SEEK_END)
	2964	actual = f.tell()
	2965	dd = actual - expected
	2966	except IOError as inst:
	2967	if inst.errno != errno.ENOENT:
	2968	raise
	2969	dd = 0
	2970		96
	2971	try:	97	rl.opener.rename(newrl._indexfile, rl._indexfile)
	2972	f = self.opener(self._indexfile)	98	if not rl._inline:
	2973	f.seek(0, io.SEEK_END)	99	rl.opener.rename(newrl._datafile, rl._datafile)
	2974	actual = f.tell()
	2975	f.close()
	2976	s = self.index.entry_size
	2977	i = max(0, actual // s)
	2978	di = actual - (i * s)
	2979	if self._inline:
	2980	databytes = 0
	2981	for r in self:
	2982	databytes += max(0, self.length(r))
	2983	dd = 0
	2984	di = actual - len(self) * s - databytes
	2985	except IOError as inst:
	2986	if inst.errno != errno.ENOENT:
	2987	raise
	2988	di = 0
	2989
	2990	return (dd, di)
	2991
	2992	def files(self):
	2993	res = [self._indexfile]
	2994	if not self._inline:
	2995	res.append(self._datafile)
	2996	return res
	2997
	2998	def emitrevisions(
	2999	self,
	3000	nodes,
	3001	nodesorder=None,
	3002	revisiondata=False,
	3003	assumehaveparentrevisions=False,
	3004	deltamode=repository.CG_DELTAMODE_STD,
	3005	sidedata_helpers=None,
	3006	):
	3007	if nodesorder not in (b'nodes', b'storage', b'linear', None):
	3008	raise error.ProgrammingError(
	3009	b'unhandled value for nodesorder: %s' % nodesorder
	3010	)
	3011
	3012	if nodesorder is None and not self._generaldelta:
	3013	nodesorder = b'storage'
	3014
	3015	if (
	3016	not self._storedeltachains
	3017	and deltamode != repository.CG_DELTAMODE_PREV
	3018	):
	3019	deltamode = repository.CG_DELTAMODE_FULL
	3020
	3021	return storageutil.emitrevisions(
	3022	self,
	3023	nodes,
	3024	nodesorder,
	3025	revlogrevisiondelta,
	3026	deltaparentfn=self.deltaparent,
	3027	candeltafn=self.candelta,
	3028	rawsizefn=self.rawsize,
	3029	revdifffn=self.revdiff,
	3030	flagsfn=self.flags,
	3031	deltamode=deltamode,
	3032	revisiondata=revisiondata,
	3033	assumehaveparentrevisions=assumehaveparentrevisions,
	3034	sidedata_helpers=sidedata_helpers,
	3035	)
	3036
	3037	DELTAREUSEALWAYS = b'always'
	3038	DELTAREUSESAMEREVS = b'samerevs'
	3039	DELTAREUSENEVER = b'never'
	3040
	3041	DELTAREUSEFULLADD = b'fulladd'
	3042
	3043	DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
	3044
	3045	def clone(
	3046	self,
	3047	tr,
	3048	destrevlog,
	3049	addrevisioncb=None,
	3050	deltareuse=DELTAREUSESAMEREVS,
	3051	forcedeltabothparents=None,
	3052	sidedata_helpers=None,
	3053	):
	3054	"""Copy this revlog to another, possibly with format changes.
	3055
	3056	The destination revlog will contain the same revisions and nodes.
	3057	However, it may not be bit-for-bit identical due to e.g. delta encoding
	3058	differences.
	3059
	3060	The ``deltareuse`` argument control how deltas from the existing revlog
	3061	are preserved in the destination revlog. The argument can have the
	3062	following values:
	3063
	3064	DELTAREUSEALWAYS
	3065	Deltas will always be reused (if possible), even if the destination
	3066	revlog would not select the same revisions for the delta. This is the
	3067	fastest mode of operation.
	3068	DELTAREUSESAMEREVS
	3069	Deltas will be reused if the destination revlog would pick the same
	3070	revisions for the delta. This mode strikes a balance between speed
	3071	and optimization.
	3072	DELTAREUSENEVER
	3073	Deltas will never be reused. This is the slowest mode of execution.
	3074	This mode can be used to recompute deltas (e.g. if the diff/delta
	3075	algorithm changes).
	3076	DELTAREUSEFULLADD
	3077	Revision will be re-added as if their were new content. This is
	3078	slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
	3079	eg: large file detection and handling.
	3080
	3081	Delta computation can be slow, so the choice of delta reuse policy can
	3082	significantly affect run time.
	3083
	3084	The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
	3085	two extremes. Deltas will be reused if they are appropriate. But if the
	3086	delta could choose a better revision, it will do so. This means if you
	3087	are converting a non-generaldelta revlog to a generaldelta revlog,
	3088	deltas will be recomputed if the delta's parent isn't a parent of the
	3089	revision.
	3090
	3091	In addition to the delta policy, the ``forcedeltabothparents``
	3092	argument controls whether to force compute deltas against both parents
	3093	for merges. By default, the current default is used.
	3094
	3095	See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
	3096	`sidedata_helpers`.
	3097	"""
	3098	if deltareuse not in self.DELTAREUSEALL:
	3099	raise ValueError(
	3100	_(b'value for deltareuse invalid: %s') % deltareuse
	3101	)
	3102
	3103	if len(destrevlog):
	3104	raise ValueError(_(b'destination revlog is not empty'))
	3105
	3106	if getattr(self, 'filteredrevs', None):
	3107	raise ValueError(_(b'source revlog has filtered revisions'))
	3108	if getattr(destrevlog, 'filteredrevs', None):
	3109	raise ValueError(_(b'destination revlog has filtered revisions'))
	3110
	3111	# lazydelta and lazydeltabase controls whether to reuse a cached delta,
	3112	# if possible.
	3113	oldlazydelta = destrevlog._lazydelta
	3114	oldlazydeltabase = destrevlog._lazydeltabase
	3115	oldamd = destrevlog._deltabothparents
	3116
	3117	try:
	3118	if deltareuse == self.DELTAREUSEALWAYS:
	3119	destrevlog._lazydeltabase = True
	3120	destrevlog._lazydelta = True
	3121	elif deltareuse == self.DELTAREUSESAMEREVS:
	3122	destrevlog._lazydeltabase = False
	3123	destrevlog._lazydelta = True
	3124	elif deltareuse == self.DELTAREUSENEVER:
	3125	destrevlog._lazydeltabase = False
	3126	destrevlog._lazydelta = False
	3127
	3128	destrevlog._deltabothparents = forcedeltabothparents or oldamd
	3129
	3130	self._clone(
	3131	tr,
	3132	destrevlog,
	3133	addrevisioncb,
	3134	deltareuse,
	3135	forcedeltabothparents,
	3136	sidedata_helpers,
	3137	)
	3138
	3139	finally:
	3140	destrevlog._lazydelta = oldlazydelta
	3141	destrevlog._lazydeltabase = oldlazydeltabase
	3142	destrevlog._deltabothparents = oldamd
	3143
	3144	def _clone(
	3145	self,
	3146	tr,
	3147	destrevlog,
	3148	addrevisioncb,
	3149	deltareuse,
	3150	forcedeltabothparents,
	3151	sidedata_helpers,
	3152	):
	3153	"""perform the core duty of `revlog.clone` after parameter processing"""
	3154	deltacomputer = deltautil.deltacomputer(destrevlog)
	3155	index = self.index
	3156	for rev in self:
	3157	entry = index[rev]
	3158
	3159	# Some classes override linkrev to take filtered revs into
	3160	# account. Use raw entry from index.
	3161	flags = entry[0] & 0xFFFF
	3162	linkrev = entry[4]
	3163	p1 = index[entry[5]][7]
	3164	p2 = index[entry[6]][7]
	3165	node = entry[7]
	3166
	3167	# (Possibly) reuse the delta from the revlog if allowed and
	3168	# the revlog chunk is a delta.
	3169	cachedelta = None
	3170	rawtext = None
	3171	if deltareuse == self.DELTAREUSEFULLADD:
	3172	text = self._revisiondata(rev)
	3173	sidedata = self.sidedata(rev)
	3174
	3175	if sidedata_helpers is not None:
	3176	(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
	3177	self, sidedata_helpers, sidedata, rev
	3178	)
	3179	flags = flags \| new_flags[0] & ~new_flags[1]
	3180
	3181	destrevlog.addrevision(
	3182	text,
	3183	tr,
	3184	linkrev,
	3185	p1,
	3186	p2,
	3187	cachedelta=cachedelta,
	3188	node=node,
	3189	flags=flags,
	3190	deltacomputer=deltacomputer,
	3191	sidedata=sidedata,
	3192	)
	3193	else:
	3194	if destrevlog._lazydelta:
	3195	dp = self.deltaparent(rev)
	3196	if dp != nullrev:
	3197	cachedelta = (dp, bytes(self._chunk(rev)))
	3198
	3199	sidedata = None
	3200	if not cachedelta:
	3201	rawtext = self._revisiondata(rev)
	3202	sidedata = self.sidedata(rev)
	3203	if sidedata is None:
	3204	sidedata = self.sidedata(rev)
	3205
	3206	if sidedata_helpers is not None:
	3207	(sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
	3208	self, sidedata_helpers, sidedata, rev
	3209	)
	3210	flags = flags \| new_flags[0] & ~new_flags[1]
	3211
	3212	with destrevlog._writing(tr):
	3213	destrevlog._addrevision(
	3214	node,
	3215	rawtext,
	3216	tr,
	3217	linkrev,
	3218	p1,
	3219	p2,
	3220	flags,
	3221	cachedelta,
	3222	deltacomputer=deltacomputer,
	3223	sidedata=sidedata,
	3224	)
	3225
	3226	if addrevisioncb:
	3227	addrevisioncb(self, rev, node)
	3228
	3229	def censorrevision(self, tr, censornode, tombstone=b''):
	3230	if self._format_version == REVLOGV0:
	3231	raise error.RevlogError(
	3232	_(b'cannot censor with version %d revlogs')
	3233	% self._format_version
	3234	)
	3235
	3236	censorrev = self.rev(censornode)
	3237	tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
	3238
	3239	if len(tombstone) > self.rawsize(censorrev):
	3240	raise error.Abort(
	3241	_(b'censor tombstone must be no longer than censored data')
	3242	)
	3243		100
	3244	# Rewriting the revlog in place is hard. Our strategy for censoring is	101	rl.clearcaches()
	3245	# to create a new revlog, copy all revisions to it, then replace the	102	rl._loadindex()
	3246	# revlogs on transaction close.
	3247	#
	3248	# This is a bit dangerous. We could easily have a mismatch of state.
	3249	newrl = revlog(
	3250	self.opener,
	3251	target=self.target,
	3252	radix=self.radix,
	3253	postfix=b'tmpcensored',
	3254	censorable=True,
	3255	)
	3256	newrl._format_version = self._format_version
	3257	newrl._format_flags = self._format_flags
	3258	newrl._generaldelta = self._generaldelta
	3259	newrl._parse_index = self._parse_index
	3260
	3261	for rev in self.revs():
	3262	node = self.node(rev)
	3263	p1, p2 = self.parents(node)
	3264
	3265	if rev == censorrev:
	3266	newrl.addrawrevision(
	3267	tombstone,
	3268	tr,
	3269	self.linkrev(censorrev),
	3270	p1,
	3271	p2,
	3272	censornode,
	3273	REVIDX_ISCENSORED,
	3274	)
	3275
	3276	if newrl.deltaparent(rev) != nullrev:
	3277	raise error.Abort(
	3278	_(
	3279	b'censored revision stored as delta; '
	3280	b'cannot censor'
	3281	),
	3282	hint=_(
	3283	b'censoring of revlogs is not '
	3284	b'fully implemented; please report '
	3285	b'this bug'
	3286	),
	3287	)
	3288	continue
	3289
	3290	if self.iscensored(rev):
	3291	if self.deltaparent(rev) != nullrev:
	3292	raise error.Abort(
	3293	_(
	3294	b'cannot censor due to censored '
	3295	b'revision having delta stored'
	3296	)
	3297	)
	3298	rawtext = self._chunk(rev)
	3299	else:
	3300	rawtext = self.rawdata(rev)
	3301
	3302	newrl.addrawrevision(
	3303	rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
	3304	)
	3305
	3306	tr.addbackup(self._indexfile, location=b'store')
	3307	if not self._inline:
	3308	tr.addbackup(self._datafile, location=b'store')
	3309
	3310	self.opener.rename(newrl._indexfile, self._indexfile)
	3311	if not self._inline:
	3312	self.opener.rename(newrl._datafile, self._datafile)
	3313
	3314	self.clearcaches()
	3315	self._loadindex()
	3316
	3317	def verifyintegrity(self, state):
	3318	"""Verifies the integrity of the revlog.
	3319
	3320	Yields ``revlogproblem`` instances describing problems that are
	3321	found.
	3322	"""
	3323	dd, di = self.checksize()
	3324	if dd:
	3325	yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
	3326	if di:
	3327	yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
	3328
	3329	version = self._format_version
	3330
	3331	# The verifier tells us what version revlog we should be.
	3332	if version != state[b'expectedversion']:
	3333	yield revlogproblem(
	3334	warning=_(b"warning: '%s' uses revlog format %d; expected %d")
	3335	% (self.display_id, version, state[b'expectedversion'])
	3336	)
	3337
	3338	state[b'skipread'] = set()
	3339	state[b'safe_renamed'] = set()
	3340
	3341	for rev in self:
	3342	node = self.node(rev)
	3343
	3344	# Verify contents. 4 cases to care about:
	3345	#
	3346	# common: the most common case
	3347	# rename: with a rename
	3348	# meta: file content starts with b'\1\n', the metadata
	3349	# header defined in filelog.py, but without a rename
	3350	# ext: content stored externally
	3351	#
	3352	# More formally, their differences are shown below:
	3353	#
	3354	# \| common \| rename \| meta \| ext
	3355	# -------------------------------------------------------
	3356	# flags() \| 0 \| 0 \| 0 \| not 0
	3357	# renamed() \| False \| True \| False \| ?
	3358	# rawtext[0:2]=='\1\n'\| False \| True \| True \| ?
	3359	#
	3360	# "rawtext" means the raw text stored in revlog data, which
	3361	# could be retrieved by "rawdata(rev)". "text"
	3362	# mentioned below is "revision(rev)".
	3363	#
	3364	# There are 3 different lengths stored physically:
	3365	# 1. L1: rawsize, stored in revlog index
	3366	# 2. L2: len(rawtext), stored in revlog data
	3367	# 3. L3: len(text), stored in revlog data if flags==0, or
	3368	# possibly somewhere else if flags!=0
	3369	#
	3370	# L1 should be equal to L2. L3 could be different from them.
	3371	# "text" may or may not affect commit hash depending on flag
	3372	# processors (see flagutil.addflagprocessor).
	3373	#
	3374	# \| common \| rename \| meta \| ext
	3375	# -------------------------------------------------
	3376	# rawsize() \| L1 \| L1 \| L1 \| L1
	3377	# size() \| L1 \| L2-LM \| L1(*) \| L1 (?)
	3378	# len(rawtext) \| L2 \| L2 \| L2 \| L2
	3379	# len(text) \| L2 \| L2 \| L2 \| L3
	3380	# len(read()) \| L2 \| L2-LM \| L2-LM \| L3 (?)
	3381	#
	3382	# LM: length of metadata, depending on rawtext
	3383	# (*): not ideal, see comment in filelog.size
	3384	# (?): could be "- len(meta)" if the resolved content has
	3385	# rename metadata
	3386	#
	3387	# Checks needed to be done:
	3388	# 1. length check: L1 == L2, in all cases.
	3389	# 2. hash check: depending on flag processor, we may need to
	3390	# use either "text" (external), or "rawtext" (in revlog).
	3391
	3392	try:
	3393	skipflags = state.get(b'skipflags', 0)
	3394	if skipflags:
	3395	skipflags &= self.flags(rev)
	3396
	3397	_verify_revision(self, skipflags, state, node)
	3398
	3399	l1 = self.rawsize(rev)
	3400	l2 = len(self.rawdata(node))
	3401
	3402	if l1 != l2:
	3403	yield revlogproblem(
	3404	error=_(b'unpacked size is %d, %d expected') % (l2, l1),
	3405	node=node,
	3406	)
	3407
	3408	except error.CensoredNodeError:
	3409	if state[b'erroroncensored']:
	3410	yield revlogproblem(
	3411	error=_(b'censored file data'), node=node
	3412	)
	3413	state[b'skipread'].add(node)
	3414	except Exception as e:
	3415	yield revlogproblem(
	3416	error=_(b'unpacking %s: %s')
	3417	% (short(node), stringutil.forcebytestr(e)),
	3418	node=node,
	3419	)
	3420	state[b'skipread'].add(node)
	3421
	3422	def storageinfo(
	3423	self,
	3424	exclusivefiles=False,
	3425	sharedfiles=False,
	3426	revisionscount=False,
	3427	trackedsize=False,
	3428	storedsize=False,
	3429	):
	3430	d = {}
	3431
	3432	if exclusivefiles:
	3433	d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
	3434	if not self._inline:
	3435	d[b'exclusivefiles'].append((self.opener, self._datafile))
	3436
	3437	if sharedfiles:
	3438	d[b'sharedfiles'] = []
	3439
	3440	if revisionscount:
	3441	d[b'revisionscount'] = len(self)
	3442
	3443	if trackedsize:
	3444	d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
	3445
	3446	if storedsize:
	3447	d[b'storedsize'] = sum(
	3448	self.opener.stat(path).st_size for path in self.files()
	3449	)
	3450
	3451	return d
	3452
	3453	def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
	3454	if not self.hassidedata:
	3455	return
	3456	# revlog formats with sidedata support does not support inline
	3457	assert not self._inline
	3458	if not helpers[1] and not helpers[2]:
	3459	# Nothing to generate or remove
	3460	return
	3461
	3462	new_entries = []
	3463	# append the new sidedata
	3464	with self._writing(transaction):
	3465	ifh, dfh, sdfh = self._writinghandles
	3466	dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
	3467
	3468	current_offset = sdfh.tell()
	3469	for rev in range(startrev, endrev + 1):
	3470	entry = self.index[rev]
	3471	new_sidedata, flags = sidedatautil.run_sidedata_helpers(
	3472	store=self,
	3473	sidedata_helpers=helpers,
	3474	sidedata={},
	3475	rev=rev,
	3476	)
	3477
	3478	serialized_sidedata = sidedatautil.serialize_sidedata(
	3479	new_sidedata
	3480	)
	3481
	3482	sidedata_compression_mode = COMP_MODE_INLINE
	3483	if serialized_sidedata and self.hassidedata:
	3484	sidedata_compression_mode = COMP_MODE_PLAIN
	3485	h, comp_sidedata = self.compress(serialized_sidedata)
	3486	if (
	3487	h != b'u'
	3488	and comp_sidedata[0] != b'\0'
	3489	and len(comp_sidedata) < len(serialized_sidedata)
	3490	):
	3491	assert not h
	3492	if (
	3493	comp_sidedata[0]
	3494	== self._docket.default_compression_header
	3495	):
	3496	sidedata_compression_mode = COMP_MODE_DEFAULT
	3497	serialized_sidedata = comp_sidedata
	3498	else:
	3499	sidedata_compression_mode = COMP_MODE_INLINE
	3500	serialized_sidedata = comp_sidedata
	3501	if entry[8] != 0 or entry[9] != 0:
	3502	# rewriting entries that already have sidedata is not
	3503	# supported yet, because it introduces garbage data in the
	3504	# revlog.
	3505	msg = b"rewriting existing sidedata is not supported yet"
	3506	raise error.Abort(msg)
	3507
	3508	# Apply (potential) flags to add and to remove after running
	3509	# the sidedata helpers
	3510	new_offset_flags = entry[0] \| flags[0] & ~flags[1]
	3511	entry_update = (
	3512	current_offset,
	3513	len(serialized_sidedata),
	3514	new_offset_flags,
	3515	sidedata_compression_mode,
	3516	)
	3517
	3518	# the sidedata computation might have move the file cursors around
	3519	sdfh.seek(current_offset, os.SEEK_SET)
	3520	sdfh.write(serialized_sidedata)
	3521	new_entries.append(entry_update)
	3522	current_offset += len(serialized_sidedata)
	3523	self._docket.sidedata_end = sdfh.tell()
	3524
	3525	# rewrite the new index entries
	3526	ifh.seek(startrev * self.index.entry_size)
	3527	for i, e in enumerate(new_entries):
	3528	rev = startrev + i
	3529	self.index.replace_sidedata_info(rev, *e)
	3530	packed = self.index.entry_binary(rev)
	3531	if rev == 0 and self._docket is None:
	3532	header = self._format_flags \| self._format_version
	3533	header = self.index.pack_header(header)
	3534	packed = header + packed
	3535	ifh.write(packed)

General Comments 0

Write
Preview

You need to be logged in to leave comments. Login now

No TODOs yet

	Site-wide shortcuts
/	Use quick search box
g h	Goto home page
g g	Goto my private gists page
g G	Goto my public gists page
g 0-9	Goto bookmarked items from 0-9
n r	New repository page
n g	New gist page

	Repositories
g s	Goto summary page
g c	Goto changelog page
g f	Goto files page
g F	Goto files page with file search activated
g p	Goto pull requests page
g o	Goto repository settings
g O	Goto repository access permissions settings
t s	Toggle sidebar on some pages