##// END OF EJS Templates
revlogv2: introduce a very basic docket file...
marmoute -
r48008:616b8f41 default
parent child Browse files
Show More
@@ -0,0 +1,80 b''
1 # docket - code related to revlog "docket"
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
7
8 ### Revlog docket file
9 #
10 # The revlog is stored on disk using multiple files:
11 #
12 # * a small docket file, containing metadata and a pointer,
13 #
14 # * an index file, containing fixed width information about revisions,
15 #
16 # * a data file, containing variable width data for these revisions,
17
18 from __future__ import absolute_import
19
20 import struct
21
22 from . import (
23 constants,
24 )
25
26 # Docket format
27 #
28 # * 4 bytes: revlog version
29 # | This is mandatory as docket must be compatible with the previous
30 # | revlog index header.
31 S_HEADER = struct.Struct(constants.INDEX_HEADER.format)
32
33
34 class RevlogDocket(object):
35 """metadata associated with revlog"""
36
37 def __init__(self, revlog, version_header=None):
38 self._version_header = version_header
39 self._dirty = False
40 self._radix = revlog.radix
41 self._path = revlog._docket_file
42 self._opener = revlog.opener
43
44 def index_filepath(self):
45 """file path to the current index file associated to this docket"""
46 # very simplistic version at first
47 return b"%s.idx" % self._radix
48
49 def write(self, transaction):
50 """write the modification of disk if any
51
52 This make the new content visible to all process"""
53 if self._dirty:
54 transaction.addbackup(self._path, location=b'store')
55 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
56 f.write(self._serialize())
57 self._dirty = False
58
59 def _serialize(self):
60 return S_HEADER.pack(self._version_header)
61
62
63 def default_docket(revlog, version_header):
64 """given a revlog version a new docket object for the given revlog"""
65 if (version_header & 0xFFFF) != constants.REVLOGV2:
66 return None
67 docket = RevlogDocket(revlog, version_header=version_header)
68 docket._dirty = True
69 return docket
70
71
72 def parse_docket(revlog, data):
73 """given some docket data return a docket object for the given revlog"""
74 header = S_HEADER.unpack(data[: S_HEADER.size])
75 (version_header,) = header
76 docket = RevlogDocket(
77 revlog,
78 version_header=version_header,
79 )
80 return docket
@@ -445,6 +445,8 b' class changelog(revlog.revlog):'
445 445
446 446 def delayupdate(self, tr):
447 447 """delay visibility of index updates to other readers"""
448 if self._docket is not None:
449 return
448 450
449 451 if not self._delayed:
450 452 if len(self) == 0:
@@ -1150,14 +1150,27 b' coreconfigitem('
1150 1150 )
1151 1151 # "out of experimental" todo list.
1152 1152 #
1153 # * to grow a docket file to at least store the last offset of the data
1154 # file when rewriting sidedata.
1155 # * need a way of dealing with garbage data if we allow rewriting
1156 # *existing* sidedata.
1153 # * stop storing version information in the index (it is already in the docket)
1154 # * properly hide uncommitted content to other process
1155 # * expose transaction content hooks during pre-commit validation
1156 # * include management of a persistent nodemap in the main docket
1157 # * enforce a "no-truncate" policy for mmap safety
1158 # - for censoring operation
1159 # - for stripping operation
1160 # - for rollback operation
1161 # * store the data size in the docket to simplify sidedata rewrite.
1162 # * track garbage data to evemtually allow rewriting -existing- sidedata.
1157 1163 # * Exchange-wise, we will also need to do something more efficient than
1158 1164 # keeping references to the affected revlogs, especially memory-wise when
1159 1165 # rewriting sidedata.
1160 # * Also... compress the sidedata? (this should be coming very soon)
1166 # * sidedata compression
1167 # * introduce a proper solution to reduce the number of filelog related files.
1168 # * Improvement to consider
1169 # - track compression mode in the index entris instead of the chunks
1170 # - split the data offset and flag field (the 2 bytes save are mostly trouble)
1171 # - keep track of uncompressed -chunk- size (to preallocate memory better)
1172 # - keep track of chain base or size (probably not that useful anymore)
1173 # - store data and sidedata in different files
1161 1174 coreconfigitem(
1162 1175 b'experimental',
1163 1176 b'revlogv2',
@@ -75,6 +75,7 b' from .interfaces import ('
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 docket as docketutil,
78 79 flagutil,
79 80 nodemap as nodemaputil,
80 81 revlogv0,
@@ -317,6 +318,7 b' class revlog(object):'
317 318
318 319 self.radix = radix
319 320
321 self._docket_file = None
320 322 self._indexfile = None
321 323 self._datafile = None
322 324 self._nodemap_file = None
@@ -344,6 +346,7 b' class revlog(object):'
344 346 self._maxchainlen = None
345 347 self._deltabothparents = True
346 348 self.index = None
349 self._docket = None
347 350 self._nodemap_docket = None
348 351 # Mapping of partial identifiers to full nodes.
349 352 self._pcache = {}
@@ -505,8 +508,23 b' class revlog(object):'
505 508 self._generaldelta = features[b'generaldelta'](self._format_flags)
506 509 self.hassidedata = features[b'sidedata']
507 510
511 if not features[b'docket']:
512 self._indexfile = entry_point
508 513 index_data = entry_data
509 self._indexfile = entry_point
514 else:
515 self._docket_file = entry_point
516 if self._initempty:
517 self._docket = docketutil.default_docket(self, header)
518 else:
519 self._docket = docketutil.parse_docket(self, entry_data)
520 self._indexfile = self._docket.index_filepath()
521 index_data = self._get_data(self._indexfile, mmapindexthreshold)
522 self._inline = False
523 # generaldelta implied by version 2 revlogs.
524 self._generaldelta = True
525 # the logic for persistent nodemap will be dealt with within the
526 # main docket, so disable it for now.
527 self._nodemap_file = None
510 528
511 529 if self.postfix is None or self.postfix == b'a':
512 530 self._datafile = b'%s.d' % self.radix
@@ -2053,6 +2071,8 b' class revlog(object):'
2053 2071 self._writinghandles = (ifh, dfh)
2054 2072 try:
2055 2073 yield
2074 if self._docket is not None:
2075 self._docket.write(transaction)
2056 2076 finally:
2057 2077 self._writinghandles = None
2058 2078 finally:
@@ -3126,9 +3146,7 b' class revlog(object):'
3126 3146 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
3127 3147 if not self.hassidedata:
3128 3148 return
3129 # inline are not yet supported because they suffer from an issue when
3130 # rewriting them (since it's not an append-only operation).
3131 # See issue6485.
3149 # revlog formats with sidedata support does not support inline
3132 3150 assert not self._inline
3133 3151 if not helpers[1] and not helpers[2]:
3134 3152 # Nothing to generate or remove
@@ -133,20 +133,22 b' FEATURES_BY_VERSION = {'
133 133 b'inline': _no,
134 134 b'generaldelta': _no,
135 135 b'sidedata': False,
136 b'docket': False,
136 137 },
137 138 REVLOGV1: {
138 139 b'inline': _from_flag(FLAG_INLINE_DATA),
139 140 b'generaldelta': _from_flag(FLAG_GENERALDELTA),
140 141 b'sidedata': False,
142 b'docket': False,
141 143 },
142 144 REVLOGV2: {
143 # There is a bug in the transaction handling when going from an
144 # inline revlog to a separate index and data file. Turn it off until
145 # it's fixed, since v2 revlogs sometimes get rewritten on exchange.
146 # See issue6485
145 # The point of inline-revlog is to reduce the number of files used in
146 # the store. Using a docket defeat this purpose. So we needs other
147 # means to reduce the number of files for revlogv2.
147 148 b'inline': _no,
148 149 b'generaldelta': _yes,
149 150 b'sidedata': True,
151 b'docket': True,
150 152 },
151 153 }
152 154
@@ -389,7 +389,7 b' def _calcmode(vfs):'
389 389 ]
390 390
391 391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored')
392 REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored')
393 393 # files that are "volatile" and might change between listing and streaming
394 394 #
395 395 # note: the ".nd" file are nodemap data and won't "change" but they might be
@@ -397,7 +397,7 b" REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', "
397 397 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
398 398
399 399 # some exception to the above matching
400 EXCLUDED = re.compile(b'.*undo\.[^/]+\.nd?$')
400 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
401 401
402 402
403 403 def is_revlog(f, kind, st):
@@ -407,7 +407,7 b' def is_revlog(f, kind, st):'
407 407
408 408
409 409 def revlog_type(f):
410 if f.endswith(REVLOG_FILES_MAIN_EXT):
410 if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None:
411 411 return FILEFLAGS_REVLOG_MAIN
412 412 elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None:
413 413 t = FILETYPE_FILELOG_OTHER
General Comments 0
You need to be logged in to leave comments. Login now