##// END OF EJS Templates
revlogv2: use a unique filename for index...
marmoute -
r48114:f612db76 default
parent child Browse files
Show More
@@ -1,235 +1,249 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import errno
21 21 import os
22 22 import random
23 23 import struct
24 24
25 25 from .. import (
26 26 encoding,
27 27 error,
28 28 node,
29 29 pycompat,
30 30 util,
31 31 )
32 32
33 33 from . import (
34 34 constants,
35 35 )
36 36
37 37
38 38 def make_uid(id_size=8):
39 39 """return a new unique identifier.
40 40
41 41 The identifier is random and composed of ascii characters."""
42 42 # size we "hex" the result we need half the number of bits to have a final
43 43 # uuid of size ID_SIZE
44 44 return node.hex(os.urandom(id_size // 2))
45 45
46 46
47 47 # some special test logic to avoid anoying random output in the test
48 48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49 49
50 50 if stable_docket_file:
51 51
52 52 def make_uid(id_size=8):
53 53 try:
54 54 with open(stable_docket_file, mode='rb') as f:
55 55 seed = f.read().strip()
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 60 if pycompat.ispy3:
61 61 iter_seed = iter(seed)
62 62 else:
63 63 iter_seed = (ord(c) for c in seed)
64 64 # some basic circular sum hashing on 64 bits
65 65 int_seed = 0
66 66 low_mask = int('1' * 35, 2)
67 67 for i in iter_seed:
68 68 high_part = int_seed >> 35
69 69 low_part = (int_seed & low_mask) << 28
70 70 int_seed = high_part + low_part + i
71 71 r = random.Random()
72 72 if pycompat.ispy3:
73 73 r.seed(int_seed, version=1)
74 74 else:
75 75 r.seed(int_seed)
76 76 # once we drop python 3.8 support we can simply use r.randbytes
77 77 raw = r.getrandbits(id_size * 4)
78 78 assert id_size == 8
79 79 p = struct.pack('>L', raw)
80 80 new = node.hex(p)
81 81 with open(stable_docket_file, 'wb') as f:
82 82 f.write(new)
83 83 return new
84 84
85 85
86 86 # Docket format
87 87 #
88 88 # * 4 bytes: revlog version
89 89 # | This is mandatory as docket must be compatible with the previous
90 90 # | revlog index header.
91 # * 1 bytes: size of index uuid
91 92 # * 8 bytes: size of index-data
92 93 # * 8 bytes: pending size of index-data
93 94 # * 8 bytes: size of data
94 95 # * 8 bytes: pending size of data
95 96 # * 1 bytes: default compression header
96 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
97 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
97 98
98 99
99 100 class RevlogDocket(object):
100 101 """metadata associated with revlog"""
101 102
102 103 def __init__(
103 104 self,
104 105 revlog,
105 106 use_pending=False,
106 107 version_header=None,
108 index_uuid=None,
107 109 index_end=0,
108 110 pending_index_end=0,
109 111 data_end=0,
110 112 pending_data_end=0,
111 113 default_compression_header=None,
112 114 ):
113 115 self._version_header = version_header
114 116 self._read_only = bool(use_pending)
115 117 self._dirty = False
116 118 self._radix = revlog.radix
117 119 self._path = revlog._docket_file
118 120 self._opener = revlog.opener
121 self._index_uuid = index_uuid
119 122 # thes asserts should be True as long as we have a single index filename
120 123 assert index_end <= pending_index_end
121 124 assert data_end <= pending_data_end
122 125 self._initial_index_end = index_end
123 126 self._pending_index_end = pending_index_end
124 127 self._initial_data_end = data_end
125 128 self._pending_data_end = pending_data_end
126 129 if use_pending:
127 130 self._index_end = self._pending_index_end
128 131 self._data_end = self._pending_data_end
129 132 else:
130 133 self._index_end = self._initial_index_end
131 134 self._data_end = self._initial_data_end
132 135 self.default_compression_header = default_compression_header
133 136
134 137 def index_filepath(self):
135 138 """file path to the current index file associated to this docket"""
136 139 # very simplistic version at first
137 return b"%s.idx" % self._radix
140 if self._index_uuid is None:
141 self._index_uuid = make_uid()
142 return b"%s-%s.idx" % (self._radix, self._index_uuid)
138 143
139 144 @property
140 145 def index_end(self):
141 146 return self._index_end
142 147
143 148 @index_end.setter
144 149 def index_end(self, new_size):
145 150 if new_size != self._index_end:
146 151 self._index_end = new_size
147 152 self._dirty = True
148 153
149 154 @property
150 155 def data_end(self):
151 156 return self._data_end
152 157
153 158 @data_end.setter
154 159 def data_end(self, new_size):
155 160 if new_size != self._data_end:
156 161 self._data_end = new_size
157 162 self._dirty = True
158 163
159 164 def write(self, transaction, pending=False, stripping=False):
160 165 """write the modification of disk if any
161 166
162 167 This make the new content visible to all process"""
163 168 if not self._dirty:
164 169 return False
165 170 else:
166 171 if self._read_only:
167 172 msg = b'writing read-only docket: %s'
168 173 msg %= self._path
169 174 raise error.ProgrammingError(msg)
170 175 if not stripping:
171 176 # XXX we could, leverage the docket while stripping. However it
172 177 # is not powerfull enough at the time of this comment
173 178 transaction.addbackup(self._path, location=b'store')
174 179 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
175 180 f.write(self._serialize(pending=pending))
176 181 # if pending we still need to the write final data eventually
177 182 self._dirty = pending
178 183 return True
179 184
180 185 def _serialize(self, pending=False):
181 186 if pending:
182 187 official_index_end = self._initial_index_end
183 188 official_data_end = self._initial_data_end
184 189 else:
185 190 official_index_end = self._index_end
186 191 official_data_end = self._data_end
187 192
188 193 # this assert should be True as long as we have a single index filename
189 194 assert official_data_end <= self._data_end
190 195 data = (
191 196 self._version_header,
197 len(self._index_uuid),
192 198 official_index_end,
193 199 self._index_end,
194 200 official_data_end,
195 201 self._data_end,
196 202 self.default_compression_header,
197 203 )
198 return S_HEADER.pack(*data)
204 s = []
205 s.append(S_HEADER.pack(*data))
206 s.append(self._index_uuid)
207 return b''.join(s)
199 208
200 209
201 210 def default_docket(revlog, version_header):
202 211 """given a revlog version a new docket object for the given revlog"""
203 212 rl_version = version_header & 0xFFFF
204 213 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
205 214 return None
206 215 comp = util.compengines[revlog._compengine].revlogheader()
207 216 docket = RevlogDocket(
208 217 revlog,
209 218 version_header=version_header,
210 219 default_compression_header=comp,
211 220 )
212 221 docket._dirty = True
213 222 return docket
214 223
215 224
216 225 def parse_docket(revlog, data, use_pending=False):
217 226 """given some docket data return a docket object for the given revlog"""
218 227 header = S_HEADER.unpack(data[: S_HEADER.size])
228 offset = S_HEADER.size
219 229 version_header = header[0]
220 index_size = header[1]
221 pending_index_size = header[2]
222 data_size = header[3]
223 pending_data_size = header[4]
224 default_compression_header = header[5]
230 index_uuid_size = header[1]
231 index_uuid = data[offset : offset + index_uuid_size]
232 offset += index_uuid_size
233 index_size = header[2]
234 pending_index_size = header[3]
235 data_size = header[4]
236 pending_data_size = header[5]
237 default_compression_header = header[6]
225 238 docket = RevlogDocket(
226 239 revlog,
227 240 use_pending=use_pending,
228 241 version_header=version_header,
242 index_uuid=index_uuid,
229 243 index_end=index_size,
230 244 pending_index_end=pending_index_size,
231 245 data_end=data_size,
232 246 pending_data_end=pending_data_size,
233 247 default_compression_header=default_compression_header,
234 248 )
235 249 return docket
@@ -1,69 +1,85 b''
1 1 #require reporevlogstore
2 2
3 3 A repo with unknown revlogv2 requirement string cannot be opened
4 4
5 5 $ hg init invalidreq
6 6 $ cd invalidreq
7 7 $ echo exp-revlogv2.unknown >> .hg/requires
8 8 $ hg log
9 9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 11 [255]
12 12 $ cd ..
13 13
14 14 Can create and open repo with revlog v2 requirement
15 15
16 16 $ cat >> $HGRCPATH << EOF
17 17 > [experimental]
18 18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 19 > EOF
20 20
21 21 $ hg init empty-repo
22 22 $ cd empty-repo
23 23 $ cat .hg/requires
24 24 dotencode
25 25 exp-dirstate-v2 (dirstate-v2 !)
26 26 exp-revlogv2.2
27 27 fncache
28 28 generaldelta
29 29 persistent-nodemap (rust !)
30 30 revlog-compression-zstd (zstd !)
31 31 sparserevlog
32 32 store
33 33
34 34 $ hg log
35 35
36 36 Unknown flags to revlog are rejected
37 37
38 38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
39 39 ... fh.write(b'\xff\x00\xde\xad') and None
40 40
41 41 $ hg log
42 42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
43 43 [50]
44 44
45 45 $ cd ..
46 46
47 47 Writing a simple revlog v2 works
48 48
49 49 $ hg init simple
50 50 $ cd simple
51 51 $ touch foo
52 52 $ hg -q commit -A -m initial
53 53
54 54 $ hg log
55 55 changeset: 0:96ee1d7354c4
56 56 tag: tip
57 57 user: test
58 58 date: Thu Jan 01 00:00:00 1970 +0000
59 59 summary: initial
60 60
61 61 Header written as expected
62 62
63 63 $ f --hexdump --bytes 4 .hg/store/00changelog.i
64 64 .hg/store/00changelog.i:
65 65 0000: 00 00 de ad |....|
66 66
67 67 $ f --hexdump --bytes 4 .hg/store/data/foo.i
68 68 .hg/store/data/foo.i:
69 69 0000: 00 00 de ad |....|
70
71 The expected files are generated
72 --------------------------------
73
74 We should have have:
75 - a docket
76 - a index file with a unique name
77 - a data file
78
79 $ ls .hg/store/00changelog* .hg/store/00manifest*
80 .hg/store/00changelog-b870a51b.idx
81 .hg/store/00changelog.d
82 .hg/store/00changelog.i
83 .hg/store/00manifest-88698448.idx
84 .hg/store/00manifest.d
85 .hg/store/00manifest.i
General Comments 0
You need to be logged in to leave comments. Login now