##// END OF EJS Templates
inline-changelog: fix a critical bug in write_pending that delete data...
inline-changelog: fix a critical bug in write_pending that delete data Since a93e52f0b6ff we no longer use inline-revlog for the changelog. The goal there was to solve the lack of testing for the two variants (inline vs split) and reduce the complexity of the interaction with "diverted-write" on the changelog level. However many existing repository still have inline-changelog and we automatically move them to normal revlog as soon as we have the chances. Unfortunately This conversion is buggy and can result in the destruction of the changelog.i if hook triggers the "write pending" mechanism. The bugs comes from the "revlog splitting" logic and the "write_pending" logic stepping over each other. Ironically the change in a93e52f0b6ff aims at no longer having this kind of problem. This changesets fix this issue and add associated tests. Fixing this reveal that the transaction hooks end up not seeing the pending transaction content, because the name is not right ("changelog.i.s.a" instead of "changelog.i.s") we fix this in the next changeset.

File last commit:

r51949:f0756d06 default
r52530:3cf9e52f stable
Show More
docket.py
430 lines | 14.1 KiB | text/x-python | PythonLexer
revlogv2: introduce a very basic docket file...
r48008 # docket - code related to revlog "docket"
#
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
### Revlog docket file
#
# The revlog is stored on disk using multiple files:
#
# * a small docket file, containing metadata and a pointer,
#
# * an index file, containing fixed width information about revisions,
#
# * a data file, containing variable width data for these revisions,
docket: move the uid logic in the `revlogutils.docket` module...
r48093 import os
import random
revlogv2: introduce a very basic docket file...
r48008 import struct
revlogv2: track pending write in the docket and expose it to hooks...
r48015 from .. import (
docket: move the uid logic in the `revlogutils.docket` module...
r48093 encoding,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 error,
docket: move the uid logic in the `revlogutils.docket` module...
r48093 node,
revlog: implement a "default compression" mode...
r48029 util,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 )
revlogv2: introduce a very basic docket file...
r48008 from . import (
constants,
)
docket: move the uid logic in the `revlogutils.docket` module...
r48093
def make_uid(id_size=8):
"""return a new unique identifier.
The identifier is random and composed of ascii characters."""
# size we "hex" the result we need half the number of bits to have a final
# uuid of size ID_SIZE
return node.hex(os.urandom(id_size // 2))
# some special test logic to avoid anoying random output in the test
stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
if stable_docket_file:
def make_uid(id_size=8):
try:
with open(stable_docket_file, mode='rb') as f:
seed = f.read().strip()
Manuel Jacob
py3: catch FileNotFoundError instead of checking errno == ENOENT
r50201 except FileNotFoundError:
docket: move the uid logic in the `revlogutils.docket` module...
r48093 seed = b'04' # chosen by a fair dice roll. garanteed to be random
Gregory Szorc
revlogutils: remove Python 2 variant for iter_seed...
r49763 iter_seed = iter(seed)
docket: move the uid logic in the `revlogutils.docket` module...
r48093 # some basic circular sum hashing on 64 bits
int_seed = 0
low_mask = int('1' * 35, 2)
for i in iter_seed:
high_part = int_seed >> 35
low_part = (int_seed & low_mask) << 28
int_seed = high_part + low_part + i
r = random.Random()
Gregory Szorc
revlogutils: unconditionally pass version to random seed...
r49764 r.seed(int_seed, version=1)
docket: move the uid logic in the `revlogutils.docket` module...
r48093 # once we drop python 3.8 support we can simply use r.randbytes
raw = r.getrandbits(id_size * 4)
assert id_size == 8
p = struct.pack('>L', raw)
new = node.hex(p)
with open(stable_docket_file, 'wb') as f:
f.write(new)
return new
revlogv2: introduce a very basic docket file...
r48008 # Docket format
#
# * 4 bytes: revlog version
# | This is mandatory as docket must be compatible with the previous
# | revlog index header.
revlogv2: use a unique filename for index...
r48114 # * 1 bytes: size of index uuid
revlog: add a way to keep track of older uids in the docket...
r48246 # * 1 bytes: number of outdated index uuid
revlogv2: use a unique filename for data...
r48115 # * 1 bytes: size of data uuid
revlog: add a way to keep track of older uids in the docket...
r48246 # * 1 bytes: number of outdated data uuid
revlog: store sidedata in their own file...
r48181 # * 1 bytes: size of sizedata uuid
revlog: add a way to keep track of older uids in the docket...
r48246 # * 1 bytes: number of outdated data uuid
revlogv2: also keep track for the size of the "data" file...
r48016 # * 8 bytes: size of index-data
# * 8 bytes: pending size of index-data
# * 8 bytes: size of data
revlog: store sidedata in their own file...
r48181 # * 8 bytes: size of sidedata
revlogv2: also keep track for the size of the "data" file...
r48016 # * 8 bytes: pending size of data
revlog: store sidedata in their own file...
r48181 # * 8 bytes: pending size of sidedata
revlog: implement a "default compression" mode...
r48029 # * 1 bytes: default compression header
changelog-v2: fix the docket `struct`...
r50609 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBQQQQQQc')
revlog: add a way to keep track of older uids in the docket...
r48246 # * 1 bytes: size of index uuid
# * 8 bytes: size of file
S_OLD_UID = struct.Struct('>BL')
revlogv2: introduce a very basic docket file...
r48008
Gregory Szorc
py3: use class X: instead of class X(object):...
r49801 class RevlogDocket:
revlogv2: introduce a very basic docket file...
r48008 """metadata associated with revlog"""
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def __init__(
self,
revlog,
use_pending=False,
version_header=None,
revlogv2: use a unique filename for index...
r48114 index_uuid=None,
revlog: add a way to keep track of older uids in the docket...
r48246 older_index_uuids=(),
revlogv2: use a unique filename for data...
r48115 data_uuid=None,
revlog: add a way to keep track of older uids in the docket...
r48246 older_data_uuids=(),
revlog: store sidedata in their own file...
r48181 sidedata_uuid=None,
revlog: add a way to keep track of older uids in the docket...
r48246 older_sidedata_uuids=(),
revlogv2: track pending write in the docket and expose it to hooks...
r48015 index_end=0,
pending_index_end=0,
revlogv2: also keep track for the size of the "data" file...
r48016 data_end=0,
pending_data_end=0,
revlog: store sidedata in their own file...
r48181 sidedata_end=0,
pending_sidedata_end=0,
revlog: implement a "default compression" mode...
r48029 default_compression_header=None,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 ):
revlogv2: introduce a very basic docket file...
r48008 self._version_header = version_header
revlogv2: track pending write in the docket and expose it to hooks...
r48015 self._read_only = bool(use_pending)
revlogv2: introduce a very basic docket file...
r48008 self._dirty = False
self._radix = revlog.radix
self._path = revlog._docket_file
self._opener = revlog.opener
revlogv2: use a unique filename for index...
r48114 self._index_uuid = index_uuid
revlog: add a way to keep track of older uids in the docket...
r48246 self._older_index_uuids = older_index_uuids
revlogv2: use a unique filename for data...
r48115 self._data_uuid = data_uuid
revlog: add a way to keep track of older uids in the docket...
r48246 self._older_data_uuids = older_data_uuids
revlog: store sidedata in their own file...
r48181 self._sidedata_uuid = sidedata_uuid
revlog: add a way to keep track of older uids in the docket...
r48246 self._older_sidedata_uuids = older_sidedata_uuids
assert not set(older_index_uuids) & set(older_data_uuids)
assert not set(older_data_uuids) & set(older_sidedata_uuids)
assert not set(older_index_uuids) & set(older_sidedata_uuids)
revlogv2: also keep track for the size of the "data" file...
r48016 # thes asserts should be True as long as we have a single index filename
revlogv2: track pending write in the docket and expose it to hooks...
r48015 assert index_end <= pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 assert data_end <= pending_data_end
revlog: store sidedata in their own file...
r48181 assert sidedata_end <= pending_sidedata_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 self._initial_index_end = index_end
self._pending_index_end = pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._initial_data_end = data_end
self._pending_data_end = pending_data_end
revlog: store sidedata in their own file...
r48181 self._initial_sidedata_end = sidedata_end
self._pending_sidedata_end = pending_sidedata_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 if use_pending:
self._index_end = self._pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._data_end = self._pending_data_end
revlog: store sidedata in their own file...
r48181 self._sidedata_end = self._pending_sidedata_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 else:
self._index_end = self._initial_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._data_end = self._initial_data_end
revlog: store sidedata in their own file...
r48181 self._sidedata_end = self._initial_sidedata_end
revlog: implement a "default compression" mode...
r48029 self.default_compression_header = default_compression_header
revlogv2: introduce a very basic docket file...
r48008
def index_filepath(self):
"""file path to the current index file associated to this docket"""
# very simplistic version at first
revlogv2: use a unique filename for index...
r48114 if self._index_uuid is None:
self._index_uuid = make_uid()
return b"%s-%s.idx" % (self._radix, self._index_uuid)
revlogv2: introduce a very basic docket file...
r48008
revlog: add docket method to request new content files...
r48247 def new_index_file(self):
"""switch index file to a new UID
The previous index UID is moved to the "older" list."""
old = (self._index_uuid, self._index_end)
self._older_index_uuids.insert(0, old)
self._index_uuid = make_uid()
return self.index_filepath()
revlog: list older-but-still-around file in `files`...
r48248 def old_index_filepaths(self, include_empty=True):
"""yield file path to older index files associated to this docket"""
# very simplistic version at first
for uuid, size in self._older_index_uuids:
if include_empty or size > 0:
yield b"%s-%s.idx" % (self._radix, uuid)
revlogv2: use a unique filename for data...
r48115 def data_filepath(self):
revlog: fix docket.date_filepath docstring...
r48180 """file path to the current data file associated to this docket"""
revlogv2: use a unique filename for data...
r48115 # very simplistic version at first
if self._data_uuid is None:
self._data_uuid = make_uid()
return b"%s-%s.dat" % (self._radix, self._data_uuid)
revlog: add docket method to request new content files...
r48247 def new_data_file(self):
"""switch data file to a new UID
The previous data UID is moved to the "older" list."""
old = (self._data_uuid, self._data_end)
self._older_data_uuids.insert(0, old)
self._data_uuid = make_uid()
return self.data_filepath()
revlog: list older-but-still-around file in `files`...
r48248 def old_data_filepaths(self, include_empty=True):
"""yield file path to older data files associated to this docket"""
# very simplistic version at first
for uuid, size in self._older_data_uuids:
if include_empty or size > 0:
yield b"%s-%s.dat" % (self._radix, uuid)
revlog: store sidedata in their own file...
r48181 def sidedata_filepath(self):
"""file path to the current sidedata file associated to this docket"""
# very simplistic version at first
if self._sidedata_uuid is None:
self._sidedata_uuid = make_uid()
return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
revlog: add docket method to request new content files...
r48247 def new_sidedata_file(self):
"""switch sidedata file to a new UID
The previous sidedata UID is moved to the "older" list."""
old = (self._sidedata_uuid, self._sidedata_end)
self._older_sidedata_uuids.insert(0, old)
self._sidedata_uuid = make_uid()
return self.sidedata_filepath()
revlog: list older-but-still-around file in `files`...
r48248 def old_sidedata_filepaths(self, include_empty=True):
"""yield file path to older sidedata files associated to this docket"""
# very simplistic version at first
for uuid, size in self._older_sidedata_uuids:
if include_empty or size > 0:
yield b"%s-%s.sda" % (self._radix, uuid)
revlogv2: track current index size in the docket...
r48012 @property
def index_end(self):
return self._index_end
@index_end.setter
def index_end(self, new_size):
if new_size != self._index_end:
self._index_end = new_size
self._dirty = True
revlogv2: also keep track for the size of the "data" file...
r48016 @property
def data_end(self):
return self._data_end
@data_end.setter
def data_end(self, new_size):
if new_size != self._data_end:
self._data_end = new_size
self._dirty = True
revlog: store sidedata in their own file...
r48181 @property
def sidedata_end(self):
return self._sidedata_end
@sidedata_end.setter
def sidedata_end(self, new_size):
if new_size != self._sidedata_end:
self._sidedata_end = new_size
self._dirty = True
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def write(self, transaction, pending=False, stripping=False):
revlogv2: introduce a very basic docket file...
r48008 """write the modification of disk if any
This make the new content visible to all process"""
revlogv2: track pending write in the docket and expose it to hooks...
r48015 if not self._dirty:
return False
else:
if self._read_only:
msg = b'writing read-only docket: %s'
msg %= self._path
raise error.ProgrammingError(msg)
revlogv2: track current index size in the docket...
r48012 if not stripping:
# XXX we could, leverage the docket while stripping. However it
# is not powerfull enough at the time of this comment
transaction.addbackup(self._path, location=b'store')
revlogv2: introduce a very basic docket file...
r48008 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
revlogv2: track pending write in the docket and expose it to hooks...
r48015 f.write(self._serialize(pending=pending))
# if pending we still need to the write final data eventually
self._dirty = pending
return True
revlogv2: introduce a very basic docket file...
r48008
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def _serialize(self, pending=False):
if pending:
official_index_end = self._initial_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end = self._initial_data_end
revlog: store sidedata in their own file...
r48181 official_sidedata_end = self._initial_sidedata_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 else:
official_index_end = self._index_end
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end = self._data_end
revlog: store sidedata in their own file...
r48181 official_sidedata_end = self._sidedata_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015
# this assert should be True as long as we have a single index filename
revlogv2: also keep track for the size of the "data" file...
r48016 assert official_data_end <= self._data_end
revlog: store sidedata in their own file...
r48181 assert official_sidedata_end <= self._sidedata_end
revlogv2: track current index size in the docket...
r48012 data = (
self._version_header,
revlogv2: use a unique filename for index...
r48114 len(self._index_uuid),
revlog: add a way to keep track of older uids in the docket...
r48246 len(self._older_index_uuids),
revlogv2: use a unique filename for data...
r48115 len(self._data_uuid),
revlog: add a way to keep track of older uids in the docket...
r48246 len(self._older_data_uuids),
revlog: store sidedata in their own file...
r48181 len(self._sidedata_uuid),
revlog: add a way to keep track of older uids in the docket...
r48246 len(self._older_sidedata_uuids),
revlogv2: track pending write in the docket and expose it to hooks...
r48015 official_index_end,
revlogv2: track current index size in the docket...
r48012 self._index_end,
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end,
self._data_end,
revlog: store sidedata in their own file...
r48181 official_sidedata_end,
self._sidedata_end,
revlog: implement a "default compression" mode...
r48029 self.default_compression_header,
revlogv2: track current index size in the docket...
r48012 )
revlogv2: use a unique filename for index...
r48114 s = []
s.append(S_HEADER.pack(*data))
revlog: add a way to keep track of older uids in the docket...
r48246
revlogv2: use a unique filename for index...
r48114 s.append(self._index_uuid)
revlog: add a way to keep track of older uids in the docket...
r48246 for u, size in self._older_index_uuids:
s.append(S_OLD_UID.pack(len(u), size))
for u, size in self._older_index_uuids:
s.append(u)
revlogv2: use a unique filename for data...
r48115 s.append(self._data_uuid)
revlog: add a way to keep track of older uids in the docket...
r48246 for u, size in self._older_data_uuids:
s.append(S_OLD_UID.pack(len(u), size))
for u, size in self._older_data_uuids:
s.append(u)
revlog: store sidedata in their own file...
r48181 s.append(self._sidedata_uuid)
revlog: add a way to keep track of older uids in the docket...
r48246 for u, size in self._older_sidedata_uuids:
s.append(S_OLD_UID.pack(len(u), size))
for u, size in self._older_sidedata_uuids:
s.append(u)
revlogv2: use a unique filename for index...
r48114 return b''.join(s)
revlogv2: introduce a very basic docket file...
r48008
def default_docket(revlog, version_header):
"""given a revlog version a new docket object for the given revlog"""
changelogv2: use a dedicated version number...
r48040 rl_version = version_header & 0xFFFF
if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
revlogv2: introduce a very basic docket file...
r48008 return None
revlog: remove legacy usage of `_compengine`...
r51949 comp = util.compengines[
revlog.feature_config.compression_engine
].revlogheader()
revlog: implement a "default compression" mode...
r48029 docket = RevlogDocket(
revlog,
version_header=version_header,
default_compression_header=comp,
)
revlogv2: introduce a very basic docket file...
r48008 docket._dirty = True
return docket
revlog: add a way to keep track of older uids in the docket...
r48246 def _parse_old_uids(get_data, count):
all_sizes = []
all_uids = []
for i in range(0, count):
raw = get_data(S_OLD_UID.size)
all_sizes.append(S_OLD_UID.unpack(raw))
for uid_size, file_size in all_sizes:
uid = get_data(uid_size)
all_uids.append((uid, file_size))
return all_uids
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def parse_docket(revlog, data, use_pending=False):
revlogv2: introduce a very basic docket file...
r48008 """given some docket data return a docket object for the given revlog"""
header = S_HEADER.unpack(data[: S_HEADER.size])
revlogv2: add a `get_data` helper to grab the next piece of docket...
r48117
# this is a mutable closure capture used in `get_data`
offset = [S_HEADER.size]
def get_data(size):
"""utility closure to access the `size` next bytes"""
if offset[0] + size > len(data):
# XXX better class
msg = b"docket is too short, expected %d got %d"
msg %= (offset[0] + size, len(data))
raise error.Abort(msg)
raw = data[offset[0] : offset[0] + size]
offset[0] += size
return raw
revlogv2: simplify and clarify the processing of each entry...
r48116
iheader = iter(header)
version_header = next(iheader)
index_uuid_size = next(iheader)
revlogv2: add a `get_data` helper to grab the next piece of docket...
r48117 index_uuid = get_data(index_uuid_size)
revlogv2: simplify and clarify the processing of each entry...
r48116
revlog: add a way to keep track of older uids in the docket...
r48246 older_index_uuid_count = next(iheader)
older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
revlogv2: simplify and clarify the processing of each entry...
r48116 data_uuid_size = next(iheader)
revlogv2: add a `get_data` helper to grab the next piece of docket...
r48117 data_uuid = get_data(data_uuid_size)
revlogv2: simplify and clarify the processing of each entry...
r48116
revlog: add a way to keep track of older uids in the docket...
r48246 older_data_uuid_count = next(iheader)
older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
revlog: store sidedata in their own file...
r48181 sidedata_uuid_size = next(iheader)
sidedata_uuid = get_data(sidedata_uuid_size)
revlog: add a way to keep track of older uids in the docket...
r48246 older_sidedata_uuid_count = next(iheader)
older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
revlogv2: simplify and clarify the processing of each entry...
r48116 index_size = next(iheader)
pending_index_size = next(iheader)
data_size = next(iheader)
pending_data_size = next(iheader)
revlog: store sidedata in their own file...
r48181 sidedata_size = next(iheader)
pending_sidedata_size = next(iheader)
revlogv2: simplify and clarify the processing of each entry...
r48116 default_compression_header = next(iheader)
revlogv2: introduce a very basic docket file...
r48008 docket = RevlogDocket(
revlog,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 use_pending=use_pending,
revlogv2: introduce a very basic docket file...
r48008 version_header=version_header,
revlogv2: use a unique filename for index...
r48114 index_uuid=index_uuid,
revlog: add a way to keep track of older uids in the docket...
r48246 older_index_uuids=older_index_uuids,
revlogv2: use a unique filename for data...
r48115 data_uuid=data_uuid,
revlog: add a way to keep track of older uids in the docket...
r48246 older_data_uuids=older_data_uuids,
revlog: store sidedata in their own file...
r48181 sidedata_uuid=sidedata_uuid,
revlog: add a way to keep track of older uids in the docket...
r48246 older_sidedata_uuids=older_sidedata_uuids,
revlogv2: track current index size in the docket...
r48012 index_end=index_size,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 pending_index_end=pending_index_size,
revlogv2: also keep track for the size of the "data" file...
r48016 data_end=data_size,
pending_data_end=pending_data_size,
revlog: store sidedata in their own file...
r48181 sidedata_end=sidedata_size,
pending_sidedata_end=pending_sidedata_size,
revlog: implement a "default compression" mode...
r48029 default_compression_header=default_compression_header,
revlogv2: introduce a very basic docket file...
r48008 )
return docket