docket.py
441 lines
| 14.5 KiB
| text/x-python
|
PythonLexer
r48008 | # docket - code related to revlog "docket" | |||
# | ||||
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
### Revlog docket file | ||||
# | ||||
# The revlog is stored on disk using multiple files: | ||||
# | ||||
# * a small docket file, containing metadata and a pointer, | ||||
# | ||||
# * an index file, containing fixed width information about revisions, | ||||
# | ||||
# * a data file, containing variable width data for these revisions, | ||||
from __future__ import absolute_import | ||||
r48093 | import errno | |||
import os | ||||
import random | ||||
r48008 | import struct | |||
r48015 | from .. import ( | |||
r48093 | encoding, | |||
r48015 | error, | |||
r48093 | node, | |||
pycompat, | ||||
r48029 | util, | |||
r48015 | ) | |||
r48008 | from . import ( | |||
constants, | ||||
) | ||||
r48093 | ||||
def make_uid(id_size=8): | ||||
"""return a new unique identifier. | ||||
The identifier is random and composed of ascii characters.""" | ||||
# size we "hex" the result we need half the number of bits to have a final | ||||
# uuid of size ID_SIZE | ||||
return node.hex(os.urandom(id_size // 2)) | ||||
# some special test logic to avoid anoying random output in the test | ||||
stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE') | ||||
if stable_docket_file: | ||||
def make_uid(id_size=8): | ||||
try: | ||||
with open(stable_docket_file, mode='rb') as f: | ||||
seed = f.read().strip() | ||||
except IOError as inst: | ||||
if inst.errno != errno.ENOENT: | ||||
raise | ||||
seed = b'04' # chosen by a fair dice roll. garanteed to be random | ||||
if pycompat.ispy3: | ||||
iter_seed = iter(seed) | ||||
else: | ||||
Matt Harbison
|
r48216 | # pytype: disable=wrong-arg-types | ||
r48093 | iter_seed = (ord(c) for c in seed) | |||
Matt Harbison
|
r48216 | # pytype: enable=wrong-arg-types | ||
r48093 | # some basic circular sum hashing on 64 bits | |||
int_seed = 0 | ||||
low_mask = int('1' * 35, 2) | ||||
for i in iter_seed: | ||||
high_part = int_seed >> 35 | ||||
low_part = (int_seed & low_mask) << 28 | ||||
int_seed = high_part + low_part + i | ||||
r = random.Random() | ||||
if pycompat.ispy3: | ||||
r.seed(int_seed, version=1) | ||||
else: | ||||
r.seed(int_seed) | ||||
# once we drop python 3.8 support we can simply use r.randbytes | ||||
raw = r.getrandbits(id_size * 4) | ||||
assert id_size == 8 | ||||
p = struct.pack('>L', raw) | ||||
new = node.hex(p) | ||||
with open(stable_docket_file, 'wb') as f: | ||||
f.write(new) | ||||
return new | ||||
r48008 | # Docket format | |||
# | ||||
# * 4 bytes: revlog version | ||||
# | This is mandatory as docket must be compatible with the previous | ||||
# | revlog index header. | ||||
r48114 | # * 1 bytes: size of index uuid | |||
r48246 | # * 1 bytes: number of outdated index uuid | |||
r48115 | # * 1 bytes: size of data uuid | |||
r48246 | # * 1 bytes: number of outdated data uuid | |||
r48181 | # * 1 bytes: size of sizedata uuid | |||
r48246 | # * 1 bytes: number of outdated data uuid | |||
r48016 | # * 8 bytes: size of index-data | |||
# * 8 bytes: pending size of index-data | ||||
# * 8 bytes: size of data | ||||
r48181 | # * 8 bytes: size of sidedata | |||
r48016 | # * 8 bytes: pending size of data | |||
r48181 | # * 8 bytes: pending size of sidedata | |||
r48029 | # * 1 bytes: default compression header | |||
r48246 | S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc') | |||
# * 1 bytes: size of index uuid | ||||
# * 8 bytes: size of file | ||||
S_OLD_UID = struct.Struct('>BL') | ||||
r48008 | ||||
class RevlogDocket(object): | ||||
"""metadata associated with revlog""" | ||||
r48015 | def __init__( | |||
self, | ||||
revlog, | ||||
use_pending=False, | ||||
version_header=None, | ||||
r48114 | index_uuid=None, | |||
r48246 | older_index_uuids=(), | |||
r48115 | data_uuid=None, | |||
r48246 | older_data_uuids=(), | |||
r48181 | sidedata_uuid=None, | |||
r48246 | older_sidedata_uuids=(), | |||
r48015 | index_end=0, | |||
pending_index_end=0, | ||||
r48016 | data_end=0, | |||
pending_data_end=0, | ||||
r48181 | sidedata_end=0, | |||
pending_sidedata_end=0, | ||||
r48029 | default_compression_header=None, | |||
r48015 | ): | |||
r48008 | self._version_header = version_header | |||
r48015 | self._read_only = bool(use_pending) | |||
r48008 | self._dirty = False | |||
self._radix = revlog.radix | ||||
self._path = revlog._docket_file | ||||
self._opener = revlog.opener | ||||
r48114 | self._index_uuid = index_uuid | |||
r48246 | self._older_index_uuids = older_index_uuids | |||
r48115 | self._data_uuid = data_uuid | |||
r48246 | self._older_data_uuids = older_data_uuids | |||
r48181 | self._sidedata_uuid = sidedata_uuid | |||
r48246 | self._older_sidedata_uuids = older_sidedata_uuids | |||
assert not set(older_index_uuids) & set(older_data_uuids) | ||||
assert not set(older_data_uuids) & set(older_sidedata_uuids) | ||||
assert not set(older_index_uuids) & set(older_sidedata_uuids) | ||||
r48016 | # thes asserts should be True as long as we have a single index filename | |||
r48015 | assert index_end <= pending_index_end | |||
r48016 | assert data_end <= pending_data_end | |||
r48181 | assert sidedata_end <= pending_sidedata_end | |||
r48015 | self._initial_index_end = index_end | |||
self._pending_index_end = pending_index_end | ||||
r48016 | self._initial_data_end = data_end | |||
self._pending_data_end = pending_data_end | ||||
r48181 | self._initial_sidedata_end = sidedata_end | |||
self._pending_sidedata_end = pending_sidedata_end | ||||
r48015 | if use_pending: | |||
self._index_end = self._pending_index_end | ||||
r48016 | self._data_end = self._pending_data_end | |||
r48181 | self._sidedata_end = self._pending_sidedata_end | |||
r48015 | else: | |||
self._index_end = self._initial_index_end | ||||
r48016 | self._data_end = self._initial_data_end | |||
r48181 | self._sidedata_end = self._initial_sidedata_end | |||
r48029 | self.default_compression_header = default_compression_header | |||
r48008 | ||||
def index_filepath(self): | ||||
"""file path to the current index file associated to this docket""" | ||||
# very simplistic version at first | ||||
r48114 | if self._index_uuid is None: | |||
self._index_uuid = make_uid() | ||||
return b"%s-%s.idx" % (self._radix, self._index_uuid) | ||||
r48008 | ||||
r48247 | def new_index_file(self): | |||
"""switch index file to a new UID | ||||
The previous index UID is moved to the "older" list.""" | ||||
old = (self._index_uuid, self._index_end) | ||||
self._older_index_uuids.insert(0, old) | ||||
self._index_uuid = make_uid() | ||||
return self.index_filepath() | ||||
r48248 | def old_index_filepaths(self, include_empty=True): | |||
"""yield file path to older index files associated to this docket""" | ||||
# very simplistic version at first | ||||
for uuid, size in self._older_index_uuids: | ||||
if include_empty or size > 0: | ||||
yield b"%s-%s.idx" % (self._radix, uuid) | ||||
r48115 | def data_filepath(self): | |||
r48180 | """file path to the current data file associated to this docket""" | |||
r48115 | # very simplistic version at first | |||
if self._data_uuid is None: | ||||
self._data_uuid = make_uid() | ||||
return b"%s-%s.dat" % (self._radix, self._data_uuid) | ||||
r48247 | def new_data_file(self): | |||
"""switch data file to a new UID | ||||
The previous data UID is moved to the "older" list.""" | ||||
old = (self._data_uuid, self._data_end) | ||||
self._older_data_uuids.insert(0, old) | ||||
self._data_uuid = make_uid() | ||||
return self.data_filepath() | ||||
r48248 | def old_data_filepaths(self, include_empty=True): | |||
"""yield file path to older data files associated to this docket""" | ||||
# very simplistic version at first | ||||
for uuid, size in self._older_data_uuids: | ||||
if include_empty or size > 0: | ||||
yield b"%s-%s.dat" % (self._radix, uuid) | ||||
r48181 | def sidedata_filepath(self): | |||
"""file path to the current sidedata file associated to this docket""" | ||||
# very simplistic version at first | ||||
if self._sidedata_uuid is None: | ||||
self._sidedata_uuid = make_uid() | ||||
return b"%s-%s.sda" % (self._radix, self._sidedata_uuid) | ||||
r48247 | def new_sidedata_file(self): | |||
"""switch sidedata file to a new UID | ||||
The previous sidedata UID is moved to the "older" list.""" | ||||
old = (self._sidedata_uuid, self._sidedata_end) | ||||
self._older_sidedata_uuids.insert(0, old) | ||||
self._sidedata_uuid = make_uid() | ||||
return self.sidedata_filepath() | ||||
r48248 | def old_sidedata_filepaths(self, include_empty=True): | |||
"""yield file path to older sidedata files associated to this docket""" | ||||
# very simplistic version at first | ||||
for uuid, size in self._older_sidedata_uuids: | ||||
if include_empty or size > 0: | ||||
yield b"%s-%s.sda" % (self._radix, uuid) | ||||
r48012 | @property | |||
def index_end(self): | ||||
return self._index_end | ||||
@index_end.setter | ||||
def index_end(self, new_size): | ||||
if new_size != self._index_end: | ||||
self._index_end = new_size | ||||
self._dirty = True | ||||
r48016 | @property | |||
def data_end(self): | ||||
return self._data_end | ||||
@data_end.setter | ||||
def data_end(self, new_size): | ||||
if new_size != self._data_end: | ||||
self._data_end = new_size | ||||
self._dirty = True | ||||
r48181 | @property | |||
def sidedata_end(self): | ||||
return self._sidedata_end | ||||
@sidedata_end.setter | ||||
def sidedata_end(self, new_size): | ||||
if new_size != self._sidedata_end: | ||||
self._sidedata_end = new_size | ||||
self._dirty = True | ||||
r48015 | def write(self, transaction, pending=False, stripping=False): | |||
r48008 | """write the modification of disk if any | |||
This make the new content visible to all process""" | ||||
r48015 | if not self._dirty: | |||
return False | ||||
else: | ||||
if self._read_only: | ||||
msg = b'writing read-only docket: %s' | ||||
msg %= self._path | ||||
raise error.ProgrammingError(msg) | ||||
r48012 | if not stripping: | |||
# XXX we could, leverage the docket while stripping. However it | ||||
# is not powerfull enough at the time of this comment | ||||
transaction.addbackup(self._path, location=b'store') | ||||
r48008 | with self._opener(self._path, mode=b'w', atomictemp=True) as f: | |||
r48015 | f.write(self._serialize(pending=pending)) | |||
# if pending we still need to the write final data eventually | ||||
self._dirty = pending | ||||
return True | ||||
r48008 | ||||
r48015 | def _serialize(self, pending=False): | |||
if pending: | ||||
official_index_end = self._initial_index_end | ||||
r48016 | official_data_end = self._initial_data_end | |||
r48181 | official_sidedata_end = self._initial_sidedata_end | |||
r48015 | else: | |||
official_index_end = self._index_end | ||||
r48016 | official_data_end = self._data_end | |||
r48181 | official_sidedata_end = self._sidedata_end | |||
r48015 | ||||
# this assert should be True as long as we have a single index filename | ||||
r48016 | assert official_data_end <= self._data_end | |||
r48181 | assert official_sidedata_end <= self._sidedata_end | |||
r48012 | data = ( | |||
self._version_header, | ||||
r48114 | len(self._index_uuid), | |||
r48246 | len(self._older_index_uuids), | |||
r48115 | len(self._data_uuid), | |||
r48246 | len(self._older_data_uuids), | |||
r48181 | len(self._sidedata_uuid), | |||
r48246 | len(self._older_sidedata_uuids), | |||
r48015 | official_index_end, | |||
r48012 | self._index_end, | |||
r48016 | official_data_end, | |||
self._data_end, | ||||
r48181 | official_sidedata_end, | |||
self._sidedata_end, | ||||
r48029 | self.default_compression_header, | |||
r48012 | ) | |||
r48114 | s = [] | |||
s.append(S_HEADER.pack(*data)) | ||||
r48246 | ||||
r48114 | s.append(self._index_uuid) | |||
r48246 | for u, size in self._older_index_uuids: | |||
s.append(S_OLD_UID.pack(len(u), size)) | ||||
for u, size in self._older_index_uuids: | ||||
s.append(u) | ||||
r48115 | s.append(self._data_uuid) | |||
r48246 | for u, size in self._older_data_uuids: | |||
s.append(S_OLD_UID.pack(len(u), size)) | ||||
for u, size in self._older_data_uuids: | ||||
s.append(u) | ||||
r48181 | s.append(self._sidedata_uuid) | |||
r48246 | for u, size in self._older_sidedata_uuids: | |||
s.append(S_OLD_UID.pack(len(u), size)) | ||||
for u, size in self._older_sidedata_uuids: | ||||
s.append(u) | ||||
r48114 | return b''.join(s) | |||
r48008 | ||||
def default_docket(revlog, version_header): | ||||
"""given a revlog version a new docket object for the given revlog""" | ||||
r48040 | rl_version = version_header & 0xFFFF | |||
if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2): | ||||
r48008 | return None | |||
r48029 | comp = util.compengines[revlog._compengine].revlogheader() | |||
docket = RevlogDocket( | ||||
revlog, | ||||
version_header=version_header, | ||||
default_compression_header=comp, | ||||
) | ||||
r48008 | docket._dirty = True | |||
return docket | ||||
r48246 | def _parse_old_uids(get_data, count): | |||
all_sizes = [] | ||||
all_uids = [] | ||||
for i in range(0, count): | ||||
raw = get_data(S_OLD_UID.size) | ||||
all_sizes.append(S_OLD_UID.unpack(raw)) | ||||
for uid_size, file_size in all_sizes: | ||||
uid = get_data(uid_size) | ||||
all_uids.append((uid, file_size)) | ||||
return all_uids | ||||
r48015 | def parse_docket(revlog, data, use_pending=False): | |||
r48008 | """given some docket data return a docket object for the given revlog""" | |||
header = S_HEADER.unpack(data[: S_HEADER.size]) | ||||
r48117 | ||||
# this is a mutable closure capture used in `get_data` | ||||
offset = [S_HEADER.size] | ||||
def get_data(size): | ||||
"""utility closure to access the `size` next bytes""" | ||||
if offset[0] + size > len(data): | ||||
# XXX better class | ||||
msg = b"docket is too short, expected %d got %d" | ||||
msg %= (offset[0] + size, len(data)) | ||||
raise error.Abort(msg) | ||||
raw = data[offset[0] : offset[0] + size] | ||||
offset[0] += size | ||||
return raw | ||||
r48116 | ||||
iheader = iter(header) | ||||
version_header = next(iheader) | ||||
index_uuid_size = next(iheader) | ||||
r48117 | index_uuid = get_data(index_uuid_size) | |||
r48116 | ||||
r48246 | older_index_uuid_count = next(iheader) | |||
older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count) | ||||
r48116 | data_uuid_size = next(iheader) | |||
r48117 | data_uuid = get_data(data_uuid_size) | |||
r48116 | ||||
r48246 | older_data_uuid_count = next(iheader) | |||
older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count) | ||||
r48181 | sidedata_uuid_size = next(iheader) | |||
sidedata_uuid = get_data(sidedata_uuid_size) | ||||
r48246 | older_sidedata_uuid_count = next(iheader) | |||
older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count) | ||||
r48116 | index_size = next(iheader) | |||
pending_index_size = next(iheader) | ||||
data_size = next(iheader) | ||||
pending_data_size = next(iheader) | ||||
r48181 | sidedata_size = next(iheader) | |||
pending_sidedata_size = next(iheader) | ||||
r48116 | default_compression_header = next(iheader) | |||
r48008 | docket = RevlogDocket( | |||
revlog, | ||||
r48015 | use_pending=use_pending, | |||
r48008 | version_header=version_header, | |||
r48114 | index_uuid=index_uuid, | |||
r48246 | older_index_uuids=older_index_uuids, | |||
r48115 | data_uuid=data_uuid, | |||
r48246 | older_data_uuids=older_data_uuids, | |||
r48181 | sidedata_uuid=sidedata_uuid, | |||
r48246 | older_sidedata_uuids=older_sidedata_uuids, | |||
r48012 | index_end=index_size, | |||
r48015 | pending_index_end=pending_index_size, | |||
r48016 | data_end=data_size, | |||
pending_data_end=pending_data_size, | ||||
r48181 | sidedata_end=sidedata_size, | |||
pending_sidedata_end=pending_sidedata_size, | ||||
r48029 | default_compression_header=default_compression_header, | |||
r48008 | ) | |||
return docket | ||||