##// END OF EJS Templates
remotefilelog: stop using the `pycompat.open()` shim
remotefilelog: stop using the `pycompat.open()` shim

File last commit:

r52756:f4733654 default
r53268:035375d6 default
Show More
nodemap.py
671 lines | 21.6 KiB | text/x-python | PythonLexer
revlogutils: move the NodeMap class in a dedicated nodemap module...
r44486 # nodemap.py - nodemap related code and utilities
#
# Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
# Copyright 2019 George Racinet <georges.racinet@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
Matt Harbison
typing: add `from __future__ import annotations` to most files...
r52756 from __future__ import annotations
nodemap: have some python code writing a nodemap in persistent binary form...
r44788
nodemap: delete older raw data file when creating a new ones...
r44793 import re
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 import struct
Joerg Sonnenberger
node: import symbols explicitly...
r46729 from ..node import hex
nodemap: add a new mode option, with an optional "warn" value...
r45292
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 from .. import (
error,
stream-clone: allow to change persistent-nodemap format during stream clone...
r49542 requirements,
nodemap: add a optional `nodemap_add_full` method on indexes...
r44795 util,
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 )
docket: move the uid logic in the `revlogutils.docket` module...
r48093 from . import docket as docket_mod
revlogutils: move the NodeMap class in a dedicated nodemap module...
r44486
class NodeMap(dict):
def __missing__(self, x):
raise error.RevlogError(b'unknown node: %s' % x)
nodemap: have some python code writing a nodemap in persistent binary form...
r44788
persistent-nodemap: introduce a test to highlight possible race...
r48852 def test_race_hook_1():
"""hook point for test
This let tests to have things happens between the docket reading and the
data reading"""
pass
stream-clone: allow to change persistent-nodemap format during stream clone...
r49542 def post_stream_cleanup(repo):
"""The stream clone might needs to remove some file if persisten nodemap
was dropped while stream cloning
"""
if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
return
if requirements.NODEMAP_REQUIREMENT in repo.requirements:
return
unfi = repo.unfiltered()
delete_nodemap(None, unfi, unfi.changelog)
delete_nodemap(None, repo, unfi.manifestlog._rootstore._revlog)
nodemap: add a function to read the data from disk...
r44790 def persisted_data(revlog):
"""read the nodemap for a revlog from disk"""
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 if revlog._nodemap_file is None:
nodemap: add a function to read the data from disk...
r44790 return None
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 pdata = revlog.opener.tryread(revlog._nodemap_file)
nodemap: use an intermediate "docket" file to carry small metadata...
r44792 if not pdata:
return None
offset = 0
(version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
if version != ONDISK_VERSION:
return None
offset += S_VERSION.size
nodemap: track the maximum revision tracked in the nodemap...
r44807 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
nodemap: track the tip_node for validation...
r45002 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
nodemap: use an intermediate "docket" file to carry small metadata...
r44792 offset += S_HEADER.size
nodemap: introduce an explicit class/object for the docket...
r44803 docket = NodeMapDocket(pdata[offset : offset + uid_size])
nodemap: track the tip_node for validation...
r45002 offset += uid_size
nodemap: track the maximum revision tracked in the nodemap...
r44807 docket.tip_rev = tip_rev
nodemap: track the tip_node for validation...
r45002 docket.tip_node = pdata[offset : offset + tip_node_size]
nodemap: track the total and unused amount of data in the rawdata file...
r44808 docket.data_length = data_length
docket.data_unused = data_unused
nodemap: use an intermediate "docket" file to carry small metadata...
r44792
nodemap: introduce an explicit class/object for the docket...
r44803 filename = _rawdata_filepath(revlog, docket)
nodemap: drop the 'exp-' prefix for internal opener option...
r45296 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
persistent-nodemap: introduce a test to highlight possible race...
r48852
test_race_hook_1()
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 try:
with revlog.opener(filename) as fd:
mmap: only use mmap to read revlog persistent nodemap if it is safe...
r52549 if use_mmap and revlog.opener.is_mmap_safe(filename):
nodemap: deal with data mmap error...
r47733 try:
data = util.buffer(util.mmapread(fd, data_length))
except ValueError:
# raised when the read file is too small
data = b''
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 else:
data = fd.read(data_length)
Manuel Jacob
py3: catch FileNotFoundError instead of checking errno == ENOENT
r50201 except FileNotFoundError:
return None
nodemap: never read more than the expected data amount...
r44811 if len(data) < data_length:
return None
return docket, data
nodemap: add a function to read the data from disk...
r44790
nodemap: write nodemap data on disk...
r44789 def setup_persistent_nodemap(tr, revlog):
"""Install whatever is needed transaction side to persist a nodemap on disk
(only actually persist the nodemap if this is relevant for this revlog)
"""
nodemap: only use persistent nodemap for non-inlined revlog...
r44791 if revlog._inline:
return # inlined revlog are too small for this to be relevant
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 if revlog._nodemap_file is None:
nodemap: write nodemap data on disk...
r44789 return # we do not use persistent_nodemap on this revlog
nodemap: make sure the nodemap docket is updated after the changelog...
r45004
# we need to happen after the changelog finalization, in that use "cl-"
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
nodemap: write nodemap data on disk...
r44789 if tr.hasfinalize(callback_id):
return # no need to register again
nodemap: make sure hooks have access to an up-to-date version...
r45003 tr.addpending(
Pulkit Goyal
nodemap: make `_persist_nodemap` a public function...
r47195 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
nodemap: make sure hooks have access to an up-to-date version...
r45003 )
Pulkit Goyal
nodemap: make `_persist_nodemap` a public function...
r47195 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...
r44986
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class _NoTransaction:
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """transaction like object to update the nodemap outside a transaction"""
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...
r44986
def __init__(self):
self._postclose = {}
def addpostclose(self, callback_id, callback_func):
self._postclose[callback_id] = callback_func
nodemap: write nodemap data on disk...
r44789
nodemap: make sure on disk change get rolled back with the transaction...
r45124 def registertmp(self, *args, **kwargs):
pass
def addbackup(self, *args, **kwargs):
pass
def add(self, *args, **kwargs):
pass
nodemap: display percentage of unused in `hg debugnodemap`...
r45125 def addabort(self, *args, **kwargs):
pass
nodemap: add a new mode option, with an optional "warn" value...
r45292 def _report(self, *args):
pass
nodemap: write nodemap data on disk...
r44789
nodemap: warm the persistent nodemap on disk with debugupdatecache...
r44932 def update_persistent_nodemap(revlog):
"""update the persistent nodemap right now
To be used for updating the nodemap on disk outside of a normal transaction
setup (eg, `debugupdatecache`).
"""
nodemap: skip persistent nodemap warming for revlog not using it...
r45247 if revlog._inline:
return # inlined revlog are too small for this to be relevant
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 if revlog._nodemap_file is None:
nodemap: skip persistent nodemap warming for revlog not using it...
r45247 return # we do not use persistent_nodemap on this revlog
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...
r44986 notr = _NoTransaction()
Pulkit Goyal
nodemap: make `_persist_nodemap` a public function...
r47195 persist_nodemap(notr, revlog)
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...
r44986 for k in sorted(notr._postclose):
notr._postclose[k](None)
nodemap: warm the persistent nodemap on disk with debugupdatecache...
r44932
Pulkit Goyal
upgrade: speed up when we have only nodemap to downgrade...
r47276 def delete_nodemap(tr, repo, revlog):
nodemap: fix some comment formatting...
r48244 """Delete nodemap data on disk for a given revlog"""
persistent-nodemap: properly delete all nodemap files on downgrade...
r49539 prefix = revlog.radix
pattern = re.compile(br"(^|/)%s(-[0-9a-f]+\.nd|\.n(\.a)?)$" % prefix)
dirpath = revlog.opener.dirname(revlog._indexfile)
for f in revlog.opener.listdir(dirpath):
if pattern.match(f):
repo.svfs.tryunlink(f)
Pulkit Goyal
upgrade: speed up when we have only nodemap to downgrade...
r47276
Pulkit Goyal
upgrade: implement partial upgrade for upgrading persistent-nodemap...
r47199 def persist_nodemap(tr, revlog, pending=False, force=False):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """Write nodemap data on disk for a given revlog"""
persistent-nodemap: avoid writing nodemap for empty revlog...
r52068 if len(revlog.index) <= 0:
return
nodemap: write nodemap data on disk...
r44789 if getattr(revlog, 'filteredrevs', ()):
raise error.ProgrammingError(
"cannot persist nodemap of a filtered changelog"
)
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 if revlog._nodemap_file is None:
Pulkit Goyal
upgrade: implement partial upgrade for upgrading persistent-nodemap...
r47199 if force:
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 revlog._nodemap_file = get_nodemap_file(revlog)
Pulkit Goyal
upgrade: implement partial upgrade for upgrading persistent-nodemap...
r47199 else:
msg = "calling persist nodemap on a revlog without the feature enabled"
raise error.ProgrammingError(msg)
nodemap: introduce append-only incremental update of the persistent data...
r44805
safehasattr: drop usage in favor of hasattr...
r51821 can_incremental = hasattr(revlog.index, "nodemap_data_incremental")
nodemap: introduce append-only incremental update of the persistent data...
r44805 ondisk_docket = revlog._nodemap_docket
safehasattr: drop usage in favor of hasattr...
r51821 feed_data = hasattr(revlog.index, "update_nodemap_data")
nodemap: drop the 'exp-' prefix for internal opener option...
r45296 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
nodemap: introduce append-only incremental update of the persistent data...
r44805
nodemap: double check the source docket when doing incremental update...
r44809 data = None
nodemap: introduce append-only incremental update of the persistent data...
r44805 # first attemp an incremental update of the data
if can_incremental and ondisk_docket is not None:
target_docket = revlog._nodemap_docket.copy()
nodemap: double check the source docket when doing incremental update...
r44809 (
src_docket,
data_changed_count,
data,
) = revlog.index.nodemap_data_incremental()
nodemap: automatically "vacuum" the persistent nodemap when too sparse...
r45126 new_length = target_docket.data_length + len(data)
new_unused = target_docket.data_unused + data_changed_count
nodemap: double check the source docket when doing incremental update...
r44809 if src_docket != target_docket:
data = None
nodemap: automatically "vacuum" the persistent nodemap when too sparse...
r45126 elif new_length <= (new_unused * 10): # under 10% of unused data
data = None
nodemap: double check the source docket when doing incremental update...
r44809 else:
datafile = _rawdata_filepath(revlog, target_docket)
# EXP-TODO: if this is a cache, this should use a cache vfs, not a
# store vfs
nodemap: make sure on disk change get rolled back with the transaction...
r45124 tr.add(datafile, target_docket.data_length)
nodemap: write new data from the expected current data length...
r44810 with revlog.opener(datafile, b'r+') as fd:
fd.seek(target_docket.data_length)
nodemap: double check the source docket when doing incremental update...
r44809 fd.write(data)
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 if feed_data:
mmap: only use mmap to read revlog persistent nodemap if it is safe...
r52549 if use_mmap and revlog.opener.is_mmap_safe(datafile):
persistent-nodemap: respect the mmap setting when refreshing data...
r52158 fd.flush()
new_data = util.buffer(util.mmapread(fd, new_length))
else:
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 fd.seek(0)
new_data = fd.read(new_length)
target_docket.data_length = new_length
nodemap: automatically "vacuum" the persistent nodemap when too sparse...
r45126 target_docket.data_unused = new_unused
nodemap: double check the source docket when doing incremental update...
r44809
if data is None:
nodemap: introduce append-only incremental update of the persistent data...
r44805 # otherwise fallback to a full new export
target_docket = NodeMapDocket()
datafile = _rawdata_filepath(revlog, target_docket)
safehasattr: drop usage in favor of hasattr...
r51821 if hasattr(revlog.index, "nodemap_data_all"):
nodemap: introduce append-only incremental update of the persistent data...
r44805 data = revlog.index.nodemap_data_all()
else:
data = persistent_data(revlog.index)
# EXP-TODO: if this is a cache, this should use a cache vfs, not a
# store vfs
nodemap: display percentage of unused in `hg debugnodemap`...
r45125
tryunlink = revlog.opener.tryunlink
def abortck(tr):
tryunlink(datafile)
callback_id = b"delete-%s" % datafile
# some flavor of the transaction abort does not cleanup new file, it
# simply empty them.
tr.addabort(callback_id, abortck)
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 with revlog.opener(datafile, b'w+') as fd:
nodemap: introduce append-only incremental update of the persistent data...
r44805 fd.write(data)
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 if feed_data:
mmap: only use mmap to read revlog persistent nodemap if it is safe...
r52549 if use_mmap and revlog.opener.is_mmap_safe(datafile):
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 fd.flush()
new_data = util.buffer(util.mmapread(fd, len(data)))
mmap: fix another instance of reverse mmap logic in persistent nodemap...
r52548 else:
new_data = data
nodemap: track the total and unused amount of data in the rawdata file...
r44808 target_docket.data_length = len(data)
nodemap: track the maximum revision tracked in the nodemap...
r44807 target_docket.tip_rev = revlog.tiprev()
nodemap: track the tip_node for validation...
r45002 target_docket.tip_node = revlog.node(target_docket.tip_rev)
nodemap: introduce append-only incremental update of the persistent data...
r44805 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
# store vfs
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 file_path = revlog._nodemap_file
nodemap: make sure hooks have access to an up-to-date version...
r45003 if pending:
file_path += b'.a'
nodemap: make sure on disk change get rolled back with the transaction...
r45124 tr.registertmp(file_path)
else:
tr.addbackup(file_path)
nodemap: make sure hooks have access to an up-to-date version...
r45003 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
nodemap: introduce append-only incremental update of the persistent data...
r44805 fp.write(target_docket.serialize())
revlog._nodemap_docket = target_docket
nodemap: introduce an option to use mmap to read the nodemap mapping...
r44843 if feed_data:
nodemap: update the index with the newly written data (when appropriate)...
r44812 revlog.index.update_nodemap_data(target_docket, new_data)
nodemap: introduce append-only incremental update of the persistent data...
r44805 # search for old index file in all cases, some older process might have
# left one behind.
nodemap: introduce an explicit class/object for the docket...
r44803 olds = _other_rawdata_filepath(revlog, target_docket)
nodemap: delete older raw data file when creating a new ones...
r44793 if olds:
realvfs = getattr(revlog, '_realopener', revlog.opener)
def cleanup(tr):
for oldfile in olds:
realvfs.tryunlink(oldfile)
revlog: rename `nodemap_file` to `_nodemap_file`...
r47935 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
nodemap: deal with the "debugupdatecache" case using a "fake" transaction...
r44986 tr.addpostclose(callback_id, cleanup)
nodemap: use an intermediate "docket" file to carry small metadata...
r44792
### Nodemap docket file
#
# The nodemap data are stored on disk using 2 files:
#
# * a raw data files containing a persistent nodemap
# (see `Nodemap Trie` section)
#
# * a small "docket" file containing medatadata
#
# While the nodemap data can be multiple tens of megabytes, the "docket" is
# small, it is easy to update it automatically or to duplicated its content
# during a transaction.
#
# Multiple raw data can exist at the same time (The currently valid one and a
# new one beind used by an in progress transaction). To accomodate this, the
# filename hosting the raw data has a variable parts. The exact filename is
# specified inside the "docket" file.
#
# The docket file contains information to find, qualify and validate the raw
# data. Its content is currently very light, but it will expand as the on disk
# nodemap gains the necessary features to be used in production.
nodemap: move on disk file to version 1...
r45294 ONDISK_VERSION = 1
nodemap: use an intermediate "docket" file to carry small metadata...
r44792 S_VERSION = struct.Struct(">B")
nodemap: track the tip_node for validation...
r45002 S_HEADER = struct.Struct(">BQQQQ")
nodemap: use an intermediate "docket" file to carry small metadata...
r44792
persistent-nodemap: add a way to make the picked uid predictable...
r48091
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class NodeMapDocket:
nodemap: introduce an explicit class/object for the docket...
r44803 """metadata associated with persistent nodemap data
The persistent data may come from disk or be on their way to disk.
"""
def __init__(self, uid=None):
if uid is None:
docket: move the uid logic in the `revlogutils.docket` module...
r48093 uid = docket_mod.make_uid()
nodemap: document the docket attributes...
r44983 # a unique identifier for the data file:
# - When new data are appended, it is preserved.
# - When a new data file is created, a new identifier is generated.
nodemap: introduce an explicit class/object for the docket...
r44803 self.uid = uid
nodemap: document the docket attributes...
r44983 # the tipmost revision stored in the data file. This revision and all
# revision before it are expected to be encoded in the data file.
nodemap: track the maximum revision tracked in the nodemap...
r44807 self.tip_rev = None
nodemap: track the tip_node for validation...
r45002 # the node of that tipmost revision, if it mismatch the current index
# data the docket is not valid for the current index and should be
# discarded.
#
# note: this method is not perfect as some destructive operation could
# preserve the same tip_rev + tip_node while altering lower revision.
# However this multiple other caches have the same vulnerability (eg:
# brancmap cache).
self.tip_node = None
nodemap: document the docket attributes...
r44983 # the size (in bytes) of the persisted data to encode the nodemap valid
# for `tip_rev`.
# - data file shorter than this are corrupted,
# - any extra data should be ignored.
nodemap: track the total and unused amount of data in the rawdata file...
r44808 self.data_length = None
nodemap: document the docket attributes...
r44983 # the amount (in bytes) of "dead" data, still in the data file but no
# longer used for the nodemap.
nodemap: track the total and unused amount of data in the rawdata file...
r44808 self.data_unused = 0
nodemap: introduce an explicit class/object for the docket...
r44803
def copy(self):
nodemap: track the maximum revision tracked in the nodemap...
r44807 new = NodeMapDocket(uid=self.uid)
new.tip_rev = self.tip_rev
nodemap: track the tip_node for validation...
r45002 new.tip_node = self.tip_node
nodemap: track the total and unused amount of data in the rawdata file...
r44808 new.data_length = self.data_length
new.data_unused = self.data_unused
nodemap: track the maximum revision tracked in the nodemap...
r44807 return new
nodemap: introduce an explicit class/object for the docket...
r44803
nodemap: double check the source docket when doing incremental update...
r44809 def __cmp__(self, other):
if self.uid < other.uid:
return -1
if self.uid > other.uid:
return 1
elif self.data_length < other.data_length:
return -1
elif self.data_length > other.data_length:
return 1
return 0
def __eq__(self, other):
return self.uid == other.uid and self.data_length == other.data_length
nodemap: introduce an explicit class/object for the docket...
r44803 def serialize(self):
"""return serialized bytes for a docket using the passed uid"""
data = []
data.append(S_VERSION.pack(ONDISK_VERSION))
nodemap: track the total and unused amount of data in the rawdata file...
r44808 headers = (
len(self.uid),
self.tip_rev,
self.data_length,
self.data_unused,
nodemap: track the tip_node for validation...
r45002 len(self.tip_node),
nodemap: track the total and unused amount of data in the rawdata file...
r44808 )
nodemap: track the maximum revision tracked in the nodemap...
r44807 data.append(S_HEADER.pack(*headers))
nodemap: introduce an explicit class/object for the docket...
r44803 data.append(self.uid)
nodemap: track the tip_node for validation...
r45002 data.append(self.tip_node)
nodemap: introduce an explicit class/object for the docket...
r44803 return b''.join(data)
nodemap: use an intermediate "docket" file to carry small metadata...
r44792
nodemap: introduce an explicit class/object for the docket...
r44803 def _rawdata_filepath(revlog, docket):
nodemap: use an intermediate "docket" file to carry small metadata...
r44792 """The (vfs relative) nodemap's rawdata file for a given uid"""
revlog: also use radix when computing nodemap data file...
r47923 prefix = revlog.radix
nodemap: introduce an explicit class/object for the docket...
r44803 return b"%s-%s.nd" % (prefix, docket.uid)
nodemap: write nodemap data on disk...
r44789
nodemap: introduce an explicit class/object for the docket...
r44803 def _other_rawdata_filepath(revlog, docket):
revlog: also use radix when computing nodemap data file...
r47923 prefix = revlog.radix
Augie Fackler
nodemap: fix missing r-prefix on regular expression...
r44952 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
nodemap: introduce an explicit class/object for the docket...
r44803 new_file_path = _rawdata_filepath(revlog, docket)
nodemap: delete older raw data file when creating a new ones...
r44793 new_file_name = revlog.opener.basename(new_file_path)
dirpath = revlog.opener.dirname(new_file_path)
others = []
for f in revlog.opener.listdir(dirpath):
if pattern.match(f) and f != new_file_name:
others.append(f)
return others
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 ### Nodemap Trie
#
# This is a simple reference implementation to compute and persist a nodemap
# trie. This reference implementation is write only. The python version of this
# is not expected to be actually used, since it wont provide performance
# improvement over existing non-persistent C implementation.
#
# The nodemap is persisted as Trie using 4bits-address/16-entries block. each
# revision can be adressed using its node shortest prefix.
#
# The trie is stored as a sequence of block. Each block contains 16 entries
# (signed 64bit integer, big endian). Each entry can be one of the following:
#
# * value >= 0 -> index of sub-block
# * value == -1 -> no value
Joerg Sonnenberger
nodemap: match comment to actual code...
r46815 # * value < -1 -> encoded revision: rev = -(value+2)
#
# See REV_OFFSET and _transform_rev below.
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 #
# The implementation focus on simplicity, not on performance. A Rust
# implementation should provide a efficient version of the same binary
# persistence. This reference python implementation is never meant to be
# extensively use in production.
def persistent_data(index):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """return the persistent binary form for a nodemap for a given index"""
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 trie = _build_trie(index)
return _persist_trie(trie)
nodemap: introduce append-only incremental update of the persistent data...
r44805 def update_persistent_data(index, root, max_idx, last_rev):
Augie Fackler
formating: upgrade to black 20.8b1...
r46554 """return the incremental update for persistent nodemap from a given index"""
nodemap: track the total and unused amount of data in the rawdata file...
r44808 changed_block, trie = _update_trie(index, root, last_rev)
return (
changed_block * S_BLOCK.size,
_persist_trie(trie, existing_idx=max_idx),
)
nodemap: introduce append-only incremental update of the persistent data...
r44805
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 S_BLOCK = struct.Struct(">" + ("l" * 16))
NO_ENTRY = -1
# rev 0 need to be -2 because 0 is used by block, -1 is a special value.
REV_OFFSET = 2
def _transform_rev(rev):
"""Return the number used to represent the rev in the tree.
(or retrieve a rev number from such representation)
Note that this is an involution, a function equal to its inverse (i.e.
which gives the identity when applied to itself).
"""
return -(rev + REV_OFFSET)
def _to_int(hex_digit):
"""turn an hexadecimal digit into a proper integer"""
return int(hex_digit, 16)
nodemap: use an explicit "Block" object in the reference implementation...
r44796 class Block(dict):
"""represent a block of the Trie
contains up to 16 entry indexed from 0 to 15"""
nodemap: keep track of the ondisk id of nodemap blocks...
r44802 def __init__(self):
super(Block, self).__init__()
# If this block exist on disk, here is its ID
self.ondisk_id = None
nodemap: move the iteratio inside the Block object...
r44797 def __iter__(self):
return iter(self.get(i) for i in range(16))
nodemap: use an explicit "Block" object in the reference implementation...
r44796
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 def _build_trie(index):
"""build a nodemap trie
The nodemap stores revision number for each unique prefix.
Each block is a dictionary with keys in `[0, 15]`. Values are either
another block or a revision number.
"""
nodemap: use an explicit "Block" object in the reference implementation...
r44796 root = Block()
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 for rev in range(len(index)):
Joerg Sonnenberger
node: import symbols explicitly...
r46729 current_hex = hex(index[rev][7])
_insert_into_block(index, 0, root, rev, current_hex)
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 return root
nodemap: introduce append-only incremental update of the persistent data...
r44805 def _update_trie(index, root, last_rev):
"""consume"""
nodemap: track the total and unused amount of data in the rawdata file...
r44808 changed = 0
nodemap: introduce append-only incremental update of the persistent data...
r44805 for rev in range(last_rev + 1, len(index)):
Joerg Sonnenberger
node: import symbols explicitly...
r46729 current_hex = hex(index[rev][7])
changed += _insert_into_block(index, 0, root, rev, current_hex)
nodemap: track the total and unused amount of data in the rawdata file...
r44808 return changed, root
nodemap: introduce append-only incremental update of the persistent data...
r44805
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 def _insert_into_block(index, level, block, current_rev, current_hex):
"""insert a new revision in a block
index: the index we are adding revision for
level: the depth of the current block in the trie
block: the block currently being considered
current_rev: the revision number we are adding
current_hex: the hexadecimal representation of the of that revision
"""
nodemap: track the total and unused amount of data in the rawdata file...
r44808 changed = 1
nodemap: introduce append-only incremental update of the persistent data...
r44805 if block.ondisk_id is not None:
block.ondisk_id = None
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 hex_digit = _to_int(current_hex[level : level + 1])
entry = block.get(hex_digit)
if entry is None:
# no entry, simply store the revision number
block[hex_digit] = current_rev
elif isinstance(entry, dict):
# need to recurse to an underlying block
nodemap: track the total and unused amount of data in the rawdata file...
r44808 changed += _insert_into_block(
index, level + 1, entry, current_rev, current_hex
)
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 else:
# collision with a previously unique prefix, inserting new
# vertices to fit both entry.
Joerg Sonnenberger
node: import symbols explicitly...
r46729 other_hex = hex(index[entry][7])
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 other_rev = entry
nodemap: use an explicit "Block" object in the reference implementation...
r44796 new = Block()
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 block[hex_digit] = new
_insert_into_block(index, level + 1, new, other_rev, other_hex)
_insert_into_block(index, level + 1, new, current_rev, current_hex)
nodemap: track the total and unused amount of data in the rawdata file...
r44808 return changed
nodemap: have some python code writing a nodemap in persistent binary form...
r44788
nodemap: introduce append-only incremental update of the persistent data...
r44805 def _persist_trie(root, existing_idx=None):
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 """turn a nodemap trie into persistent binary data
See `_build_trie` for nodemap trie structure"""
block_map = {}
nodemap: introduce append-only incremental update of the persistent data...
r44805 if existing_idx is not None:
base_idx = existing_idx + 1
else:
base_idx = 0
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 chunks = []
for tn in _walk_trie(root):
nodemap: introduce append-only incremental update of the persistent data...
r44805 if tn.ondisk_id is not None:
block_map[id(tn)] = tn.ondisk_id
else:
block_map[id(tn)] = len(chunks) + base_idx
chunks.append(_persist_block(tn, block_map))
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 return b''.join(chunks)
def _walk_trie(block):
"""yield all the block in a trie
Children blocks are always yield before their parent block.
"""
Raphaël Gomès
black: format the codebase with 23.3.0...
r52596 for __, item in sorted(block.items()):
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 if isinstance(item, dict):
for sub_block in _walk_trie(item):
yield sub_block
yield block
def _persist_block(block_node, block_map):
"""produce persistent binary data for a single block
Children block are assumed to be already persisted and present in
block_map.
"""
nodemap: move the iteratio inside the Block object...
r44797 data = tuple(_to_value(v, block_map) for v in block_node)
nodemap: have some python code writing a nodemap in persistent binary form...
r44788 return S_BLOCK.pack(*data)
def _to_value(item, block_map):
"""persist any value as an integer"""
if item is None:
return NO_ENTRY
elif isinstance(item, dict):
return block_map[id(item)]
else:
return _transform_rev(item)
nodemap: code to parse the persistent binary nodemap data...
r44798
def parse_data(data):
"""parse parse nodemap data into a nodemap Trie"""
if (len(data) % S_BLOCK.size) != 0:
Matt Harbison
nodemap: convert error message to bytes...
r47511 msg = b"nodemap data size is not a multiple of block size (%d): %d"
nodemap: code to parse the persistent binary nodemap data...
r44798 raise error.Abort(msg % (S_BLOCK.size, len(data)))
if not data:
nodemap: introduce append-only incremental update of the persistent data...
r44805 return Block(), None
nodemap: code to parse the persistent binary nodemap data...
r44798 block_map = {}
new_blocks = []
for i in range(0, len(data), S_BLOCK.size):
block = Block()
nodemap: keep track of the ondisk id of nodemap blocks...
r44802 block.ondisk_id = len(block_map)
block_map[block.ondisk_id] = block
nodemap: code to parse the persistent binary nodemap data...
r44798 block_data = data[i : i + S_BLOCK.size]
values = S_BLOCK.unpack(block_data)
new_blocks.append((block, values))
for b, values in new_blocks:
for idx, v in enumerate(values):
if v == NO_ENTRY:
continue
elif v >= 0:
b[idx] = block_map[v]
else:
b[idx] = _transform_rev(v)
nodemap: introduce append-only incremental update of the persistent data...
r44805 return block, i // S_BLOCK.size
nodemap: add basic checking of the on disk nodemap content...
r44799
# debug utility
def check_data(ui, index, data):
"""verify that the provided nodemap data are valid for the given idex"""
ret = 0
Arseniy Alekseyev
cli: fix spelling in `debugnodemap` error messages
r51403 ui.status((b"revisions in index: %d\n") % len(index))
nodemap: introduce append-only incremental update of the persistent data...
r44805 root, __ = parse_data(data)
nodemap: add basic checking of the on disk nodemap content...
r44799 all_revs = set(_all_revisions(root))
Arseniy Alekseyev
cli: fix spelling in `debugnodemap` error messages
r51403 ui.status((b"revisions in nodemap: %d\n") % len(all_revs))
nodemap: add basic checking of the on disk nodemap content...
r44799 for r in range(len(index)):
if r not in all_revs:
msg = b" revision missing from nodemap: %d\n" % r
ui.write_err(msg)
ret = 1
else:
all_revs.remove(r)
Joerg Sonnenberger
node: import symbols explicitly...
r46729 nm_rev = _find_node(root, hex(index[r][7]))
nodemap: all check that revision and nodes match in the nodemap...
r44800 if nm_rev is None:
msg = b" revision node does not match any entries: %d\n" % r
ui.write_err(msg)
ret = 1
elif nm_rev != r:
msg = (
b" revision node does not match the expected revision: "
b"%d != %d\n" % (r, nm_rev)
)
ui.write_err(msg)
ret = 1
nodemap: add basic checking of the on disk nodemap content...
r44799 if all_revs:
for r in sorted(all_revs):
Arseniy Alekseyev
cli: fix spelling in `debugnodemap` error messages
r51403 msg = b" extra revisions in nodemap: %d\n" % r
nodemap: add basic checking of the on disk nodemap content...
r44799 ui.write_err(msg)
ret = 1
return ret
def _all_revisions(root):
"""return all revisions stored in a Trie"""
for block in _walk_trie(root):
for v in block:
if v is None or isinstance(v, Block):
continue
yield v
nodemap: all check that revision and nodes match in the nodemap...
r44800
def _find_node(block, node):
"""find the revision associated with a given node"""
entry = block.get(_to_int(node[0:1]))
if isinstance(entry, dict):
return _find_node(entry, node[1:])
return entry
Pulkit Goyal
revlog: refactor logic to compute nodemap file in separate function...
r47196
revlog: stop usage of `_indexfile` to computing nodemap path...
r47922 def get_nodemap_file(revlog):
revlog: move the `trypending` logic from the `changelog` to the `revlog`...
r48014 if revlog._trypending:
revlog: stop usage of `_indexfile` to computing nodemap path...
r47922 pending_path = revlog.radix + b".n.a"
if revlog.opener.exists(pending_path):
Pulkit Goyal
revlog: refactor logic to compute nodemap file in separate function...
r47196 return pending_path
revlog: stop usage of `_indexfile` to computing nodemap path...
r47922 return revlog.radix + b".n"