##// END OF EJS Templates
revlogv2: use a unique filename for index...
revlogv2: use a unique filename for index Having a unique index will allow for ambiguity less rewriting of revlog content, something useful to clarify handling of some operation like censoring or stripping. Differential Revision: https://phab.mercurial-scm.org/D10771

File last commit:

r48104:ce0e8971 default
r48104:ce0e8971 default
Show More
docket.py
249 lines | 7.8 KiB | text/x-python | PythonLexer
revlogv2: introduce a very basic docket file...
r48008 # docket - code related to revlog "docket"
#
# Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
#
# This software may be used and distributed according to the terms of the
# GNU General Public License version 2 or any later version.
### Revlog docket file
#
# The revlog is stored on disk using multiple files:
#
# * a small docket file, containing metadata and a pointer,
#
# * an index file, containing fixed width information about revisions,
#
# * a data file, containing variable width data for these revisions,
from __future__ import absolute_import
docket: move the uid logic in the `revlogutils.docket` module...
r48093 import errno
import os
import random
revlogv2: introduce a very basic docket file...
r48008 import struct
revlogv2: track pending write in the docket and expose it to hooks...
r48015 from .. import (
docket: move the uid logic in the `revlogutils.docket` module...
r48093 encoding,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 error,
docket: move the uid logic in the `revlogutils.docket` module...
r48093 node,
pycompat,
revlog: implement a "default compression" mode...
r48029 util,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 )
revlogv2: introduce a very basic docket file...
r48008 from . import (
constants,
)
docket: move the uid logic in the `revlogutils.docket` module...
r48093
def make_uid(id_size=8):
"""return a new unique identifier.
The identifier is random and composed of ascii characters."""
# size we "hex" the result we need half the number of bits to have a final
# uuid of size ID_SIZE
return node.hex(os.urandom(id_size // 2))
# some special test logic to avoid anoying random output in the test
stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
if stable_docket_file:
def make_uid(id_size=8):
try:
with open(stable_docket_file, mode='rb') as f:
seed = f.read().strip()
except IOError as inst:
if inst.errno != errno.ENOENT:
raise
seed = b'04' # chosen by a fair dice roll. garanteed to be random
if pycompat.ispy3:
iter_seed = iter(seed)
else:
iter_seed = (ord(c) for c in seed)
# some basic circular sum hashing on 64 bits
int_seed = 0
low_mask = int('1' * 35, 2)
for i in iter_seed:
high_part = int_seed >> 35
low_part = (int_seed & low_mask) << 28
int_seed = high_part + low_part + i
r = random.Random()
if pycompat.ispy3:
r.seed(int_seed, version=1)
else:
r.seed(int_seed)
# once we drop python 3.8 support we can simply use r.randbytes
raw = r.getrandbits(id_size * 4)
assert id_size == 8
p = struct.pack('>L', raw)
new = node.hex(p)
with open(stable_docket_file, 'wb') as f:
f.write(new)
return new
revlogv2: introduce a very basic docket file...
r48008 # Docket format
#
# * 4 bytes: revlog version
# | This is mandatory as docket must be compatible with the previous
# | revlog index header.
revlogv2: use a unique filename for index...
r48104 # * 1 bytes: size of index uuid
revlogv2: also keep track for the size of the "data" file...
r48016 # * 8 bytes: size of index-data
# * 8 bytes: pending size of index-data
# * 8 bytes: size of data
# * 8 bytes: pending size of data
revlog: implement a "default compression" mode...
r48029 # * 1 bytes: default compression header
revlogv2: use a unique filename for index...
r48104 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
revlogv2: introduce a very basic docket file...
r48008
class RevlogDocket(object):
"""metadata associated with revlog"""
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def __init__(
self,
revlog,
use_pending=False,
version_header=None,
revlogv2: use a unique filename for index...
r48104 index_uuid=None,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 index_end=0,
pending_index_end=0,
revlogv2: also keep track for the size of the "data" file...
r48016 data_end=0,
pending_data_end=0,
revlog: implement a "default compression" mode...
r48029 default_compression_header=None,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 ):
revlogv2: introduce a very basic docket file...
r48008 self._version_header = version_header
revlogv2: track pending write in the docket and expose it to hooks...
r48015 self._read_only = bool(use_pending)
revlogv2: introduce a very basic docket file...
r48008 self._dirty = False
self._radix = revlog.radix
self._path = revlog._docket_file
self._opener = revlog.opener
revlogv2: use a unique filename for index...
r48104 self._index_uuid = index_uuid
revlogv2: also keep track for the size of the "data" file...
r48016 # thes asserts should be True as long as we have a single index filename
revlogv2: track pending write in the docket and expose it to hooks...
r48015 assert index_end <= pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 assert data_end <= pending_data_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 self._initial_index_end = index_end
self._pending_index_end = pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._initial_data_end = data_end
self._pending_data_end = pending_data_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 if use_pending:
self._index_end = self._pending_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._data_end = self._pending_data_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 else:
self._index_end = self._initial_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 self._data_end = self._initial_data_end
revlog: implement a "default compression" mode...
r48029 self.default_compression_header = default_compression_header
revlogv2: introduce a very basic docket file...
r48008
def index_filepath(self):
"""file path to the current index file associated to this docket"""
# very simplistic version at first
revlogv2: use a unique filename for index...
r48104 if self._index_uuid is None:
self._index_uuid = make_uid()
return b"%s-%s.idx" % (self._radix, self._index_uuid)
revlogv2: introduce a very basic docket file...
r48008
revlogv2: track current index size in the docket...
r48012 @property
def index_end(self):
return self._index_end
@index_end.setter
def index_end(self, new_size):
if new_size != self._index_end:
self._index_end = new_size
self._dirty = True
revlogv2: also keep track for the size of the "data" file...
r48016 @property
def data_end(self):
return self._data_end
@data_end.setter
def data_end(self, new_size):
if new_size != self._data_end:
self._data_end = new_size
self._dirty = True
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def write(self, transaction, pending=False, stripping=False):
revlogv2: introduce a very basic docket file...
r48008 """write the modification of disk if any
This make the new content visible to all process"""
revlogv2: track pending write in the docket and expose it to hooks...
r48015 if not self._dirty:
return False
else:
if self._read_only:
msg = b'writing read-only docket: %s'
msg %= self._path
raise error.ProgrammingError(msg)
revlogv2: track current index size in the docket...
r48012 if not stripping:
# XXX we could, leverage the docket while stripping. However it
# is not powerfull enough at the time of this comment
transaction.addbackup(self._path, location=b'store')
revlogv2: introduce a very basic docket file...
r48008 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
revlogv2: track pending write in the docket and expose it to hooks...
r48015 f.write(self._serialize(pending=pending))
# if pending we still need to the write final data eventually
self._dirty = pending
return True
revlogv2: introduce a very basic docket file...
r48008
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def _serialize(self, pending=False):
if pending:
official_index_end = self._initial_index_end
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end = self._initial_data_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015 else:
official_index_end = self._index_end
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end = self._data_end
revlogv2: track pending write in the docket and expose it to hooks...
r48015
# this assert should be True as long as we have a single index filename
revlogv2: also keep track for the size of the "data" file...
r48016 assert official_data_end <= self._data_end
revlogv2: track current index size in the docket...
r48012 data = (
self._version_header,
revlogv2: use a unique filename for index...
r48104 len(self._index_uuid),
revlogv2: track pending write in the docket and expose it to hooks...
r48015 official_index_end,
revlogv2: track current index size in the docket...
r48012 self._index_end,
revlogv2: also keep track for the size of the "data" file...
r48016 official_data_end,
self._data_end,
revlog: implement a "default compression" mode...
r48029 self.default_compression_header,
revlogv2: track current index size in the docket...
r48012 )
revlogv2: use a unique filename for index...
r48104 s = []
s.append(S_HEADER.pack(*data))
s.append(self._index_uuid)
return b''.join(s)
revlogv2: introduce a very basic docket file...
r48008
def default_docket(revlog, version_header):
"""given a revlog version a new docket object for the given revlog"""
changelogv2: use a dedicated version number...
r48040 rl_version = version_header & 0xFFFF
if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
revlogv2: introduce a very basic docket file...
r48008 return None
revlog: implement a "default compression" mode...
r48029 comp = util.compengines[revlog._compengine].revlogheader()
docket = RevlogDocket(
revlog,
version_header=version_header,
default_compression_header=comp,
)
revlogv2: introduce a very basic docket file...
r48008 docket._dirty = True
return docket
revlogv2: track pending write in the docket and expose it to hooks...
r48015 def parse_docket(revlog, data, use_pending=False):
revlogv2: introduce a very basic docket file...
r48008 """given some docket data return a docket object for the given revlog"""
header = S_HEADER.unpack(data[: S_HEADER.size])
revlogv2: use a unique filename for index...
r48104 offset = S_HEADER.size
revlogv2: also keep track for the size of the "data" file...
r48016 version_header = header[0]
revlogv2: use a unique filename for index...
r48104 index_uuid_size = header[1]
index_uuid = data[offset : offset + index_uuid_size]
offset += index_uuid_size
index_size = header[2]
pending_index_size = header[3]
data_size = header[4]
pending_data_size = header[5]
default_compression_header = header[6]
revlogv2: introduce a very basic docket file...
r48008 docket = RevlogDocket(
revlog,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 use_pending=use_pending,
revlogv2: introduce a very basic docket file...
r48008 version_header=version_header,
revlogv2: use a unique filename for index...
r48104 index_uuid=index_uuid,
revlogv2: track current index size in the docket...
r48012 index_end=index_size,
revlogv2: track pending write in the docket and expose it to hooks...
r48015 pending_index_end=pending_index_size,
revlogv2: also keep track for the size of the "data" file...
r48016 data_end=data_size,
pending_data_end=pending_data_size,
revlog: implement a "default compression" mode...
r48029 default_compression_header=default_compression_header,
revlogv2: introduce a very basic docket file...
r48008 )
return docket