Show More
@@ -0,0 +1,80 b'' | |||
|
1 | # docket - code related to revlog "docket" | |
|
2 | # | |
|
3 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> | |
|
4 | # | |
|
5 | # This software may be used and distributed according to the terms of the | |
|
6 | # GNU General Public License version 2 or any later version. | |
|
7 | ||
|
8 | ### Revlog docket file | |
|
9 | # | |
|
10 | # The revlog is stored on disk using multiple files: | |
|
11 | # | |
|
12 | # * a small docket file, containing metadata and a pointer, | |
|
13 | # | |
|
14 | # * an index file, containing fixed width information about revisions, | |
|
15 | # | |
|
16 | # * a data file, containing variable width data for these revisions, | |
|
17 | ||
|
18 | from __future__ import absolute_import | |
|
19 | ||
|
20 | import struct | |
|
21 | ||
|
22 | from . import ( | |
|
23 | constants, | |
|
24 | ) | |
|
25 | ||
|
26 | # Docket format | |
|
27 | # | |
|
28 | # * 4 bytes: revlog version | |
|
29 | # | This is mandatory as docket must be compatible with the previous | |
|
30 | # | revlog index header. | |
|
31 | S_HEADER = struct.Struct(constants.INDEX_HEADER.format) | |
|
32 | ||
|
33 | ||
|
34 | class RevlogDocket(object): | |
|
35 | """metadata associated with revlog""" | |
|
36 | ||
|
37 | def __init__(self, revlog, version_header=None): | |
|
38 | self._version_header = version_header | |
|
39 | self._dirty = False | |
|
40 | self._radix = revlog.radix | |
|
41 | self._path = revlog._docket_file | |
|
42 | self._opener = revlog.opener | |
|
43 | ||
|
44 | def index_filepath(self): | |
|
45 | """file path to the current index file associated to this docket""" | |
|
46 | # very simplistic version at first | |
|
47 | return b"%s.idx" % self._radix | |
|
48 | ||
|
49 | def write(self, transaction): | |
|
50 | """write the modification of disk if any | |
|
51 | ||
|
52 | This make the new content visible to all process""" | |
|
53 | if self._dirty: | |
|
54 | transaction.addbackup(self._path, location=b'store') | |
|
55 | with self._opener(self._path, mode=b'w', atomictemp=True) as f: | |
|
56 | f.write(self._serialize()) | |
|
57 | self._dirty = False | |
|
58 | ||
|
59 | def _serialize(self): | |
|
60 | return S_HEADER.pack(self._version_header) | |
|
61 | ||
|
62 | ||
|
63 | def default_docket(revlog, version_header): | |
|
64 | """given a revlog version a new docket object for the given revlog""" | |
|
65 | if (version_header & 0xFFFF) != constants.REVLOGV2: | |
|
66 | return None | |
|
67 | docket = RevlogDocket(revlog, version_header=version_header) | |
|
68 | docket._dirty = True | |
|
69 | return docket | |
|
70 | ||
|
71 | ||
|
72 | def parse_docket(revlog, data): | |
|
73 | """given some docket data return a docket object for the given revlog""" | |
|
74 | header = S_HEADER.unpack(data[: S_HEADER.size]) | |
|
75 | (version_header,) = header | |
|
76 | docket = RevlogDocket( | |
|
77 | revlog, | |
|
78 | version_header=version_header, | |
|
79 | ) | |
|
80 | return docket |
@@ -445,6 +445,8 b' class changelog(revlog.revlog):' | |||
|
445 | 445 | |
|
446 | 446 | def delayupdate(self, tr): |
|
447 | 447 | """delay visibility of index updates to other readers""" |
|
448 | if self._docket is not None: | |
|
449 | return | |
|
448 | 450 | |
|
449 | 451 | if not self._delayed: |
|
450 | 452 | if len(self) == 0: |
@@ -1150,14 +1150,27 b' coreconfigitem(' | |||
|
1150 | 1150 | ) |
|
1151 | 1151 | # "out of experimental" todo list. |
|
1152 | 1152 | # |
|
1153 | # * to grow a docket file to at least store the last offset of the data | |
|
1154 | # file when rewriting sidedata. | |
|
1155 | # * need a way of dealing with garbage data if we allow rewriting | |
|
1156 | # *existing* sidedata. | |
|
1153 | # * stop storing version information in the index (it is already in the docket) | |
|
1154 | # * properly hide uncommitted content to other process | |
|
1155 | # * expose transaction content hooks during pre-commit validation | |
|
1156 | # * include management of a persistent nodemap in the main docket | |
|
1157 | # * enforce a "no-truncate" policy for mmap safety | |
|
1158 | # - for censoring operation | |
|
1159 | # - for stripping operation | |
|
1160 | # - for rollback operation | |
|
1161 | # * store the data size in the docket to simplify sidedata rewrite. | |
|
1162 | # * track garbage data to evemtually allow rewriting -existing- sidedata. | |
|
1157 | 1163 | # * Exchange-wise, we will also need to do something more efficient than |
|
1158 | 1164 | # keeping references to the affected revlogs, especially memory-wise when |
|
1159 | 1165 | # rewriting sidedata. |
|
1160 | # * Also... compress the sidedata? (this should be coming very soon) | |
|
1166 | # * sidedata compression | |
|
1167 | # * introduce a proper solution to reduce the number of filelog related files. | |
|
1168 | # * Improvement to consider | |
|
1169 | # - track compression mode in the index entris instead of the chunks | |
|
1170 | # - split the data offset and flag field (the 2 bytes save are mostly trouble) | |
|
1171 | # - keep track of uncompressed -chunk- size (to preallocate memory better) | |
|
1172 | # - keep track of chain base or size (probably not that useful anymore) | |
|
1173 | # - store data and sidedata in different files | |
|
1161 | 1174 | coreconfigitem( |
|
1162 | 1175 | b'experimental', |
|
1163 | 1176 | b'revlogv2', |
@@ -75,6 +75,7 b' from .interfaces import (' | |||
|
75 | 75 | ) |
|
76 | 76 | from .revlogutils import ( |
|
77 | 77 | deltas as deltautil, |
|
78 | docket as docketutil, | |
|
78 | 79 | flagutil, |
|
79 | 80 | nodemap as nodemaputil, |
|
80 | 81 | revlogv0, |
@@ -317,6 +318,7 b' class revlog(object):' | |||
|
317 | 318 | |
|
318 | 319 | self.radix = radix |
|
319 | 320 | |
|
321 | self._docket_file = None | |
|
320 | 322 | self._indexfile = None |
|
321 | 323 | self._datafile = None |
|
322 | 324 | self._nodemap_file = None |
@@ -344,6 +346,7 b' class revlog(object):' | |||
|
344 | 346 | self._maxchainlen = None |
|
345 | 347 | self._deltabothparents = True |
|
346 | 348 | self.index = None |
|
349 | self._docket = None | |
|
347 | 350 | self._nodemap_docket = None |
|
348 | 351 | # Mapping of partial identifiers to full nodes. |
|
349 | 352 | self._pcache = {} |
@@ -505,8 +508,23 b' class revlog(object):' | |||
|
505 | 508 | self._generaldelta = features[b'generaldelta'](self._format_flags) |
|
506 | 509 | self.hassidedata = features[b'sidedata'] |
|
507 | 510 | |
|
508 | index_data = entry_data | |
|
509 | self._indexfile = entry_point | |
|
511 | if not features[b'docket']: | |
|
512 | self._indexfile = entry_point | |
|
513 | index_data = entry_data | |
|
514 | else: | |
|
515 | self._docket_file = entry_point | |
|
516 | if self._initempty: | |
|
517 | self._docket = docketutil.default_docket(self, header) | |
|
518 | else: | |
|
519 | self._docket = docketutil.parse_docket(self, entry_data) | |
|
520 | self._indexfile = self._docket.index_filepath() | |
|
521 | index_data = self._get_data(self._indexfile, mmapindexthreshold) | |
|
522 | self._inline = False | |
|
523 | # generaldelta implied by version 2 revlogs. | |
|
524 | self._generaldelta = True | |
|
525 | # the logic for persistent nodemap will be dealt with within the | |
|
526 | # main docket, so disable it for now. | |
|
527 | self._nodemap_file = None | |
|
510 | 528 | |
|
511 | 529 | if self.postfix is None or self.postfix == b'a': |
|
512 | 530 | self._datafile = b'%s.d' % self.radix |
@@ -2053,6 +2071,8 b' class revlog(object):' | |||
|
2053 | 2071 | self._writinghandles = (ifh, dfh) |
|
2054 | 2072 | try: |
|
2055 | 2073 | yield |
|
2074 | if self._docket is not None: | |
|
2075 | self._docket.write(transaction) | |
|
2056 | 2076 | finally: |
|
2057 | 2077 | self._writinghandles = None |
|
2058 | 2078 | finally: |
@@ -3126,9 +3146,7 b' class revlog(object):' | |||
|
3126 | 3146 | def rewrite_sidedata(self, transaction, helpers, startrev, endrev): |
|
3127 | 3147 | if not self.hassidedata: |
|
3128 | 3148 | return |
|
3129 | # inline are not yet supported because they suffer from an issue when | |
|
3130 | # rewriting them (since it's not an append-only operation). | |
|
3131 | # See issue6485. | |
|
3149 | # revlog formats with sidedata support does not support inline | |
|
3132 | 3150 | assert not self._inline |
|
3133 | 3151 | if not helpers[1] and not helpers[2]: |
|
3134 | 3152 | # Nothing to generate or remove |
@@ -133,20 +133,22 b' FEATURES_BY_VERSION = {' | |||
|
133 | 133 | b'inline': _no, |
|
134 | 134 | b'generaldelta': _no, |
|
135 | 135 | b'sidedata': False, |
|
136 | b'docket': False, | |
|
136 | 137 | }, |
|
137 | 138 | REVLOGV1: { |
|
138 | 139 | b'inline': _from_flag(FLAG_INLINE_DATA), |
|
139 | 140 | b'generaldelta': _from_flag(FLAG_GENERALDELTA), |
|
140 | 141 | b'sidedata': False, |
|
142 | b'docket': False, | |
|
141 | 143 | }, |
|
142 | 144 | REVLOGV2: { |
|
143 | # There is a bug in the transaction handling when going from an | |
|
144 | # inline revlog to a separate index and data file. Turn it off until | |
|
145 | # it's fixed, since v2 revlogs sometimes get rewritten on exchange. | |
|
146 | # See issue6485 | |
|
145 | # The point of inline-revlog is to reduce the number of files used in | |
|
146 | # the store. Using a docket defeat this purpose. So we needs other | |
|
147 | # means to reduce the number of files for revlogv2. | |
|
147 | 148 | b'inline': _no, |
|
148 | 149 | b'generaldelta': _yes, |
|
149 | 150 | b'sidedata': True, |
|
151 | b'docket': True, | |
|
150 | 152 | }, |
|
151 | 153 | } |
|
152 | 154 |
@@ -389,7 +389,7 b' def _calcmode(vfs):' | |||
|
389 | 389 | ] |
|
390 | 390 | |
|
391 | 391 | REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') |
|
392 | REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored') | |
|
392 | REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored') | |
|
393 | 393 | # files that are "volatile" and might change between listing and streaming |
|
394 | 394 | # |
|
395 | 395 | # note: the ".nd" file are nodemap data and won't "change" but they might be |
@@ -397,7 +397,7 b" REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', " | |||
|
397 | 397 | REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') |
|
398 | 398 | |
|
399 | 399 | # some exception to the above matching |
|
400 | EXCLUDED = re.compile(b'.*undo\.[^/]+\.nd?$') | |
|
400 | EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$') | |
|
401 | 401 | |
|
402 | 402 | |
|
403 | 403 | def is_revlog(f, kind, st): |
@@ -407,7 +407,7 b' def is_revlog(f, kind, st):' | |||
|
407 | 407 | |
|
408 | 408 | |
|
409 | 409 | def revlog_type(f): |
|
410 | if f.endswith(REVLOG_FILES_MAIN_EXT): | |
|
410 | if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None: | |
|
411 | 411 | return FILEFLAGS_REVLOG_MAIN |
|
412 | 412 | elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None: |
|
413 | 413 | t = FILETYPE_FILELOG_OTHER |
General Comments 0
You need to be logged in to leave comments.
Login now