Show More
@@ -0,0 +1,80 b'' | |||||
|
1 | # docket - code related to revlog "docket" | |||
|
2 | # | |||
|
3 | # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net> | |||
|
4 | # | |||
|
5 | # This software may be used and distributed according to the terms of the | |||
|
6 | # GNU General Public License version 2 or any later version. | |||
|
7 | ||||
|
8 | ### Revlog docket file | |||
|
9 | # | |||
|
10 | # The revlog is stored on disk using multiple files: | |||
|
11 | # | |||
|
12 | # * a small docket file, containing metadata and a pointer, | |||
|
13 | # | |||
|
14 | # * an index file, containing fixed width information about revisions, | |||
|
15 | # | |||
|
16 | # * a data file, containing variable width data for these revisions, | |||
|
17 | ||||
|
18 | from __future__ import absolute_import | |||
|
19 | ||||
|
20 | import struct | |||
|
21 | ||||
|
22 | from . import ( | |||
|
23 | constants, | |||
|
24 | ) | |||
|
25 | ||||
|
26 | # Docket format | |||
|
27 | # | |||
|
28 | # * 4 bytes: revlog version | |||
|
29 | # | This is mandatory as docket must be compatible with the previous | |||
|
30 | # | revlog index header. | |||
|
31 | S_HEADER = struct.Struct(constants.INDEX_HEADER.format) | |||
|
32 | ||||
|
33 | ||||
|
34 | class RevlogDocket(object): | |||
|
35 | """metadata associated with revlog""" | |||
|
36 | ||||
|
37 | def __init__(self, revlog, version_header=None): | |||
|
38 | self._version_header = version_header | |||
|
39 | self._dirty = False | |||
|
40 | self._radix = revlog.radix | |||
|
41 | self._path = revlog._docket_file | |||
|
42 | self._opener = revlog.opener | |||
|
43 | ||||
|
44 | def index_filepath(self): | |||
|
45 | """file path to the current index file associated to this docket""" | |||
|
46 | # very simplistic version at first | |||
|
47 | return b"%s.idx" % self._radix | |||
|
48 | ||||
|
49 | def write(self, transaction): | |||
|
50 | """write the modification of disk if any | |||
|
51 | ||||
|
52 | This make the new content visible to all process""" | |||
|
53 | if self._dirty: | |||
|
54 | transaction.addbackup(self._path, location=b'store') | |||
|
55 | with self._opener(self._path, mode=b'w', atomictemp=True) as f: | |||
|
56 | f.write(self._serialize()) | |||
|
57 | self._dirty = False | |||
|
58 | ||||
|
59 | def _serialize(self): | |||
|
60 | return S_HEADER.pack(self._version_header) | |||
|
61 | ||||
|
62 | ||||
|
63 | def default_docket(revlog, version_header): | |||
|
64 | """given a revlog version a new docket object for the given revlog""" | |||
|
65 | if (version_header & 0xFFFF) != constants.REVLOGV2: | |||
|
66 | return None | |||
|
67 | docket = RevlogDocket(revlog, version_header=version_header) | |||
|
68 | docket._dirty = True | |||
|
69 | return docket | |||
|
70 | ||||
|
71 | ||||
|
72 | def parse_docket(revlog, data): | |||
|
73 | """given some docket data return a docket object for the given revlog""" | |||
|
74 | header = S_HEADER.unpack(data[: S_HEADER.size]) | |||
|
75 | (version_header,) = header | |||
|
76 | docket = RevlogDocket( | |||
|
77 | revlog, | |||
|
78 | version_header=version_header, | |||
|
79 | ) | |||
|
80 | return docket |
@@ -445,6 +445,8 b' class changelog(revlog.revlog):' | |||||
445 |
|
445 | |||
446 | def delayupdate(self, tr): |
|
446 | def delayupdate(self, tr): | |
447 | """delay visibility of index updates to other readers""" |
|
447 | """delay visibility of index updates to other readers""" | |
|
448 | if self._docket is not None: | |||
|
449 | return | |||
448 |
|
450 | |||
449 | if not self._delayed: |
|
451 | if not self._delayed: | |
450 | if len(self) == 0: |
|
452 | if len(self) == 0: |
@@ -1150,14 +1150,27 b' coreconfigitem(' | |||||
1150 | ) |
|
1150 | ) | |
1151 | # "out of experimental" todo list. |
|
1151 | # "out of experimental" todo list. | |
1152 | # |
|
1152 | # | |
1153 | # * to grow a docket file to at least store the last offset of the data |
|
1153 | # * stop storing version information in the index (it is already in the docket) | |
1154 | # file when rewriting sidedata. |
|
1154 | # * properly hide uncommitted content to other process | |
1155 | # * need a way of dealing with garbage data if we allow rewriting |
|
1155 | # * expose transaction content hooks during pre-commit validation | |
1156 | # *existing* sidedata. |
|
1156 | # * include management of a persistent nodemap in the main docket | |
|
1157 | # * enforce a "no-truncate" policy for mmap safety | |||
|
1158 | # - for censoring operation | |||
|
1159 | # - for stripping operation | |||
|
1160 | # - for rollback operation | |||
|
1161 | # * store the data size in the docket to simplify sidedata rewrite. | |||
|
1162 | # * track garbage data to evemtually allow rewriting -existing- sidedata. | |||
1157 | # * Exchange-wise, we will also need to do something more efficient than |
|
1163 | # * Exchange-wise, we will also need to do something more efficient than | |
1158 | # keeping references to the affected revlogs, especially memory-wise when |
|
1164 | # keeping references to the affected revlogs, especially memory-wise when | |
1159 | # rewriting sidedata. |
|
1165 | # rewriting sidedata. | |
1160 | # * Also... compress the sidedata? (this should be coming very soon) |
|
1166 | # * sidedata compression | |
|
1167 | # * introduce a proper solution to reduce the number of filelog related files. | |||
|
1168 | # * Improvement to consider | |||
|
1169 | # - track compression mode in the index entris instead of the chunks | |||
|
1170 | # - split the data offset and flag field (the 2 bytes save are mostly trouble) | |||
|
1171 | # - keep track of uncompressed -chunk- size (to preallocate memory better) | |||
|
1172 | # - keep track of chain base or size (probably not that useful anymore) | |||
|
1173 | # - store data and sidedata in different files | |||
1161 | coreconfigitem( |
|
1174 | coreconfigitem( | |
1162 | b'experimental', |
|
1175 | b'experimental', | |
1163 | b'revlogv2', |
|
1176 | b'revlogv2', |
@@ -75,6 +75,7 b' from .interfaces import (' | |||||
75 | ) |
|
75 | ) | |
76 | from .revlogutils import ( |
|
76 | from .revlogutils import ( | |
77 | deltas as deltautil, |
|
77 | deltas as deltautil, | |
|
78 | docket as docketutil, | |||
78 | flagutil, |
|
79 | flagutil, | |
79 | nodemap as nodemaputil, |
|
80 | nodemap as nodemaputil, | |
80 | revlogv0, |
|
81 | revlogv0, | |
@@ -317,6 +318,7 b' class revlog(object):' | |||||
317 |
|
318 | |||
318 | self.radix = radix |
|
319 | self.radix = radix | |
319 |
|
320 | |||
|
321 | self._docket_file = None | |||
320 | self._indexfile = None |
|
322 | self._indexfile = None | |
321 | self._datafile = None |
|
323 | self._datafile = None | |
322 | self._nodemap_file = None |
|
324 | self._nodemap_file = None | |
@@ -344,6 +346,7 b' class revlog(object):' | |||||
344 | self._maxchainlen = None |
|
346 | self._maxchainlen = None | |
345 | self._deltabothparents = True |
|
347 | self._deltabothparents = True | |
346 | self.index = None |
|
348 | self.index = None | |
|
349 | self._docket = None | |||
347 | self._nodemap_docket = None |
|
350 | self._nodemap_docket = None | |
348 | # Mapping of partial identifiers to full nodes. |
|
351 | # Mapping of partial identifiers to full nodes. | |
349 | self._pcache = {} |
|
352 | self._pcache = {} | |
@@ -505,8 +508,23 b' class revlog(object):' | |||||
505 | self._generaldelta = features[b'generaldelta'](self._format_flags) |
|
508 | self._generaldelta = features[b'generaldelta'](self._format_flags) | |
506 | self.hassidedata = features[b'sidedata'] |
|
509 | self.hassidedata = features[b'sidedata'] | |
507 |
|
510 | |||
508 | index_data = entry_data |
|
511 | if not features[b'docket']: | |
509 | self._indexfile = entry_point |
|
512 | self._indexfile = entry_point | |
|
513 | index_data = entry_data | |||
|
514 | else: | |||
|
515 | self._docket_file = entry_point | |||
|
516 | if self._initempty: | |||
|
517 | self._docket = docketutil.default_docket(self, header) | |||
|
518 | else: | |||
|
519 | self._docket = docketutil.parse_docket(self, entry_data) | |||
|
520 | self._indexfile = self._docket.index_filepath() | |||
|
521 | index_data = self._get_data(self._indexfile, mmapindexthreshold) | |||
|
522 | self._inline = False | |||
|
523 | # generaldelta implied by version 2 revlogs. | |||
|
524 | self._generaldelta = True | |||
|
525 | # the logic for persistent nodemap will be dealt with within the | |||
|
526 | # main docket, so disable it for now. | |||
|
527 | self._nodemap_file = None | |||
510 |
|
528 | |||
511 | if self.postfix is None or self.postfix == b'a': |
|
529 | if self.postfix is None or self.postfix == b'a': | |
512 | self._datafile = b'%s.d' % self.radix |
|
530 | self._datafile = b'%s.d' % self.radix | |
@@ -2053,6 +2071,8 b' class revlog(object):' | |||||
2053 | self._writinghandles = (ifh, dfh) |
|
2071 | self._writinghandles = (ifh, dfh) | |
2054 | try: |
|
2072 | try: | |
2055 | yield |
|
2073 | yield | |
|
2074 | if self._docket is not None: | |||
|
2075 | self._docket.write(transaction) | |||
2056 | finally: |
|
2076 | finally: | |
2057 | self._writinghandles = None |
|
2077 | self._writinghandles = None | |
2058 | finally: |
|
2078 | finally: | |
@@ -3126,9 +3146,7 b' class revlog(object):' | |||||
3126 | def rewrite_sidedata(self, transaction, helpers, startrev, endrev): |
|
3146 | def rewrite_sidedata(self, transaction, helpers, startrev, endrev): | |
3127 | if not self.hassidedata: |
|
3147 | if not self.hassidedata: | |
3128 | return |
|
3148 | return | |
3129 | # inline are not yet supported because they suffer from an issue when |
|
3149 | # revlog formats with sidedata support does not support inline | |
3130 | # rewriting them (since it's not an append-only operation). |
|
|||
3131 | # See issue6485. |
|
|||
3132 | assert not self._inline |
|
3150 | assert not self._inline | |
3133 | if not helpers[1] and not helpers[2]: |
|
3151 | if not helpers[1] and not helpers[2]: | |
3134 | # Nothing to generate or remove |
|
3152 | # Nothing to generate or remove |
@@ -133,20 +133,22 b' FEATURES_BY_VERSION = {' | |||||
133 | b'inline': _no, |
|
133 | b'inline': _no, | |
134 | b'generaldelta': _no, |
|
134 | b'generaldelta': _no, | |
135 | b'sidedata': False, |
|
135 | b'sidedata': False, | |
|
136 | b'docket': False, | |||
136 | }, |
|
137 | }, | |
137 | REVLOGV1: { |
|
138 | REVLOGV1: { | |
138 | b'inline': _from_flag(FLAG_INLINE_DATA), |
|
139 | b'inline': _from_flag(FLAG_INLINE_DATA), | |
139 | b'generaldelta': _from_flag(FLAG_GENERALDELTA), |
|
140 | b'generaldelta': _from_flag(FLAG_GENERALDELTA), | |
140 | b'sidedata': False, |
|
141 | b'sidedata': False, | |
|
142 | b'docket': False, | |||
141 | }, |
|
143 | }, | |
142 | REVLOGV2: { |
|
144 | REVLOGV2: { | |
143 | # There is a bug in the transaction handling when going from an |
|
145 | # The point of inline-revlog is to reduce the number of files used in | |
144 | # inline revlog to a separate index and data file. Turn it off until |
|
146 | # the store. Using a docket defeat this purpose. So we needs other | |
145 | # it's fixed, since v2 revlogs sometimes get rewritten on exchange. |
|
147 | # means to reduce the number of files for revlogv2. | |
146 | # See issue6485 |
|
|||
147 | b'inline': _no, |
|
148 | b'inline': _no, | |
148 | b'generaldelta': _yes, |
|
149 | b'generaldelta': _yes, | |
149 | b'sidedata': True, |
|
150 | b'sidedata': True, | |
|
151 | b'docket': True, | |||
150 | }, |
|
152 | }, | |
151 | } |
|
153 | } | |
152 |
|
154 |
@@ -389,7 +389,7 b' def _calcmode(vfs):' | |||||
389 | ] |
|
389 | ] | |
390 |
|
390 | |||
391 | REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') |
|
391 | REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored') | |
392 | REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', b'.nd', b'd.tmpcensored') |
|
392 | REVLOG_FILES_OTHER_EXT = (b'.idx', b'.d', b'.n', b'.nd', b'd.tmpcensored') | |
393 | # files that are "volatile" and might change between listing and streaming |
|
393 | # files that are "volatile" and might change between listing and streaming | |
394 | # |
|
394 | # | |
395 | # note: the ".nd" file are nodemap data and won't "change" but they might be |
|
395 | # note: the ".nd" file are nodemap data and won't "change" but they might be | |
@@ -397,7 +397,7 b" REVLOG_FILES_OTHER_EXT = (b'.d', b'.n', " | |||||
397 | REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') |
|
397 | REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd') | |
398 |
|
398 | |||
399 | # some exception to the above matching |
|
399 | # some exception to the above matching | |
400 | EXCLUDED = re.compile(b'.*undo\.[^/]+\.nd?$') |
|
400 | EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$') | |
401 |
|
401 | |||
402 |
|
402 | |||
403 | def is_revlog(f, kind, st): |
|
403 | def is_revlog(f, kind, st): | |
@@ -407,7 +407,7 b' def is_revlog(f, kind, st):' | |||||
407 |
|
407 | |||
408 |
|
408 | |||
409 | def revlog_type(f): |
|
409 | def revlog_type(f): | |
410 | if f.endswith(REVLOG_FILES_MAIN_EXT): |
|
410 | if f.endswith(REVLOG_FILES_MAIN_EXT) and EXCLUDED.match(f) is None: | |
411 | return FILEFLAGS_REVLOG_MAIN |
|
411 | return FILEFLAGS_REVLOG_MAIN | |
412 | elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None: |
|
412 | elif f.endswith(REVLOG_FILES_OTHER_EXT) and EXCLUDED.match(f) is None: | |
413 | t = FILETYPE_FILELOG_OTHER |
|
413 | t = FILETYPE_FILELOG_OTHER |
General Comments 0
You need to be logged in to leave comments.
Login now