##// END OF EJS Templates
revlog: add a way to keep track of older uids in the docket...
marmoute -
r48246:c2526315 default
parent child Browse files
Show More
@@ -1,335 +1,393 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import errno
21 21 import os
22 22 import random
23 23 import struct
24 24
25 25 from .. import (
26 26 encoding,
27 27 error,
28 28 node,
29 29 pycompat,
30 30 util,
31 31 )
32 32
33 33 from . import (
34 34 constants,
35 35 )
36 36
37 37
38 38 def make_uid(id_size=8):
39 39 """return a new unique identifier.
40 40
41 41 The identifier is random and composed of ascii characters."""
42 42 # size we "hex" the result we need half the number of bits to have a final
43 43 # uuid of size ID_SIZE
44 44 return node.hex(os.urandom(id_size // 2))
45 45
46 46
47 47 # some special test logic to avoid anoying random output in the test
48 48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49 49
50 50 if stable_docket_file:
51 51
52 52 def make_uid(id_size=8):
53 53 try:
54 54 with open(stable_docket_file, mode='rb') as f:
55 55 seed = f.read().strip()
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 60 if pycompat.ispy3:
61 61 iter_seed = iter(seed)
62 62 else:
63 63 # pytype: disable=wrong-arg-types
64 64 iter_seed = (ord(c) for c in seed)
65 65 # pytype: enable=wrong-arg-types
66 66 # some basic circular sum hashing on 64 bits
67 67 int_seed = 0
68 68 low_mask = int('1' * 35, 2)
69 69 for i in iter_seed:
70 70 high_part = int_seed >> 35
71 71 low_part = (int_seed & low_mask) << 28
72 72 int_seed = high_part + low_part + i
73 73 r = random.Random()
74 74 if pycompat.ispy3:
75 75 r.seed(int_seed, version=1)
76 76 else:
77 77 r.seed(int_seed)
78 78 # once we drop python 3.8 support we can simply use r.randbytes
79 79 raw = r.getrandbits(id_size * 4)
80 80 assert id_size == 8
81 81 p = struct.pack('>L', raw)
82 82 new = node.hex(p)
83 83 with open(stable_docket_file, 'wb') as f:
84 84 f.write(new)
85 85 return new
86 86
87 87
88 88 # Docket format
89 89 #
90 90 # * 4 bytes: revlog version
91 91 # | This is mandatory as docket must be compatible with the previous
92 92 # | revlog index header.
93 93 # * 1 bytes: size of index uuid
94 # * 1 bytes: number of outdated index uuid
94 95 # * 1 bytes: size of data uuid
96 # * 1 bytes: number of outdated data uuid
95 97 # * 1 bytes: size of sizedata uuid
98 # * 1 bytes: number of outdated data uuid
96 99 # * 8 bytes: size of index-data
97 100 # * 8 bytes: pending size of index-data
98 101 # * 8 bytes: size of data
99 102 # * 8 bytes: size of sidedata
100 103 # * 8 bytes: pending size of data
101 104 # * 8 bytes: pending size of sidedata
102 105 # * 1 bytes: default compression header
103 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBLLLLLLc')
106 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc')
107 # * 1 bytes: size of index uuid
108 # * 8 bytes: size of file
109 S_OLD_UID = struct.Struct('>BL')
104 110
105 111
106 112 class RevlogDocket(object):
107 113 """metadata associated with revlog"""
108 114
109 115 def __init__(
110 116 self,
111 117 revlog,
112 118 use_pending=False,
113 119 version_header=None,
114 120 index_uuid=None,
121 older_index_uuids=(),
115 122 data_uuid=None,
123 older_data_uuids=(),
116 124 sidedata_uuid=None,
125 older_sidedata_uuids=(),
117 126 index_end=0,
118 127 pending_index_end=0,
119 128 data_end=0,
120 129 pending_data_end=0,
121 130 sidedata_end=0,
122 131 pending_sidedata_end=0,
123 132 default_compression_header=None,
124 133 ):
125 134 self._version_header = version_header
126 135 self._read_only = bool(use_pending)
127 136 self._dirty = False
128 137 self._radix = revlog.radix
129 138 self._path = revlog._docket_file
130 139 self._opener = revlog.opener
131 140 self._index_uuid = index_uuid
141 self._older_index_uuids = older_index_uuids
132 142 self._data_uuid = data_uuid
143 self._older_data_uuids = older_data_uuids
133 144 self._sidedata_uuid = sidedata_uuid
145 self._older_sidedata_uuids = older_sidedata_uuids
146 assert not set(older_index_uuids) & set(older_data_uuids)
147 assert not set(older_data_uuids) & set(older_sidedata_uuids)
148 assert not set(older_index_uuids) & set(older_sidedata_uuids)
134 149 # thes asserts should be True as long as we have a single index filename
135 150 assert index_end <= pending_index_end
136 151 assert data_end <= pending_data_end
137 152 assert sidedata_end <= pending_sidedata_end
138 153 self._initial_index_end = index_end
139 154 self._pending_index_end = pending_index_end
140 155 self._initial_data_end = data_end
141 156 self._pending_data_end = pending_data_end
142 157 self._initial_sidedata_end = sidedata_end
143 158 self._pending_sidedata_end = pending_sidedata_end
144 159 if use_pending:
145 160 self._index_end = self._pending_index_end
146 161 self._data_end = self._pending_data_end
147 162 self._sidedata_end = self._pending_sidedata_end
148 163 else:
149 164 self._index_end = self._initial_index_end
150 165 self._data_end = self._initial_data_end
151 166 self._sidedata_end = self._initial_sidedata_end
152 167 self.default_compression_header = default_compression_header
153 168
154 169 def index_filepath(self):
155 170 """file path to the current index file associated to this docket"""
156 171 # very simplistic version at first
157 172 if self._index_uuid is None:
158 173 self._index_uuid = make_uid()
159 174 return b"%s-%s.idx" % (self._radix, self._index_uuid)
160 175
161 176 def data_filepath(self):
162 177 """file path to the current data file associated to this docket"""
163 178 # very simplistic version at first
164 179 if self._data_uuid is None:
165 180 self._data_uuid = make_uid()
166 181 return b"%s-%s.dat" % (self._radix, self._data_uuid)
167 182
168 183 def sidedata_filepath(self):
169 184 """file path to the current sidedata file associated to this docket"""
170 185 # very simplistic version at first
171 186 if self._sidedata_uuid is None:
172 187 self._sidedata_uuid = make_uid()
173 188 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
174 189
175 190 @property
176 191 def index_end(self):
177 192 return self._index_end
178 193
179 194 @index_end.setter
180 195 def index_end(self, new_size):
181 196 if new_size != self._index_end:
182 197 self._index_end = new_size
183 198 self._dirty = True
184 199
185 200 @property
186 201 def data_end(self):
187 202 return self._data_end
188 203
189 204 @data_end.setter
190 205 def data_end(self, new_size):
191 206 if new_size != self._data_end:
192 207 self._data_end = new_size
193 208 self._dirty = True
194 209
195 210 @property
196 211 def sidedata_end(self):
197 212 return self._sidedata_end
198 213
199 214 @sidedata_end.setter
200 215 def sidedata_end(self, new_size):
201 216 if new_size != self._sidedata_end:
202 217 self._sidedata_end = new_size
203 218 self._dirty = True
204 219
205 220 def write(self, transaction, pending=False, stripping=False):
206 221 """write the modification of disk if any
207 222
208 223 This make the new content visible to all process"""
209 224 if not self._dirty:
210 225 return False
211 226 else:
212 227 if self._read_only:
213 228 msg = b'writing read-only docket: %s'
214 229 msg %= self._path
215 230 raise error.ProgrammingError(msg)
216 231 if not stripping:
217 232 # XXX we could, leverage the docket while stripping. However it
218 233 # is not powerfull enough at the time of this comment
219 234 transaction.addbackup(self._path, location=b'store')
220 235 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
221 236 f.write(self._serialize(pending=pending))
222 237 # if pending we still need to the write final data eventually
223 238 self._dirty = pending
224 239 return True
225 240
226 241 def _serialize(self, pending=False):
227 242 if pending:
228 243 official_index_end = self._initial_index_end
229 244 official_data_end = self._initial_data_end
230 245 official_sidedata_end = self._initial_sidedata_end
231 246 else:
232 247 official_index_end = self._index_end
233 248 official_data_end = self._data_end
234 249 official_sidedata_end = self._sidedata_end
235 250
236 251 # this assert should be True as long as we have a single index filename
237 252 assert official_data_end <= self._data_end
238 253 assert official_sidedata_end <= self._sidedata_end
239 254 data = (
240 255 self._version_header,
241 256 len(self._index_uuid),
257 len(self._older_index_uuids),
242 258 len(self._data_uuid),
259 len(self._older_data_uuids),
243 260 len(self._sidedata_uuid),
261 len(self._older_sidedata_uuids),
244 262 official_index_end,
245 263 self._index_end,
246 264 official_data_end,
247 265 self._data_end,
248 266 official_sidedata_end,
249 267 self._sidedata_end,
250 268 self.default_compression_header,
251 269 )
252 270 s = []
253 271 s.append(S_HEADER.pack(*data))
272
254 273 s.append(self._index_uuid)
274 for u, size in self._older_index_uuids:
275 s.append(S_OLD_UID.pack(len(u), size))
276 for u, size in self._older_index_uuids:
277 s.append(u)
278
255 279 s.append(self._data_uuid)
280 for u, size in self._older_data_uuids:
281 s.append(S_OLD_UID.pack(len(u), size))
282 for u, size in self._older_data_uuids:
283 s.append(u)
284
256 285 s.append(self._sidedata_uuid)
286 for u, size in self._older_sidedata_uuids:
287 s.append(S_OLD_UID.pack(len(u), size))
288 for u, size in self._older_sidedata_uuids:
289 s.append(u)
257 290 return b''.join(s)
258 291
259 292
260 293 def default_docket(revlog, version_header):
261 294 """given a revlog version a new docket object for the given revlog"""
262 295 rl_version = version_header & 0xFFFF
263 296 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
264 297 return None
265 298 comp = util.compengines[revlog._compengine].revlogheader()
266 299 docket = RevlogDocket(
267 300 revlog,
268 301 version_header=version_header,
269 302 default_compression_header=comp,
270 303 )
271 304 docket._dirty = True
272 305 return docket
273 306
274 307
308 def _parse_old_uids(get_data, count):
309 all_sizes = []
310 all_uids = []
311 for i in range(0, count):
312 raw = get_data(S_OLD_UID.size)
313 all_sizes.append(S_OLD_UID.unpack(raw))
314
315 for uid_size, file_size in all_sizes:
316 uid = get_data(uid_size)
317 all_uids.append((uid, file_size))
318 return all_uids
319
320
275 321 def parse_docket(revlog, data, use_pending=False):
276 322 """given some docket data return a docket object for the given revlog"""
277 323 header = S_HEADER.unpack(data[: S_HEADER.size])
278 324
279 325 # this is a mutable closure capture used in `get_data`
280 326 offset = [S_HEADER.size]
281 327
282 328 def get_data(size):
283 329 """utility closure to access the `size` next bytes"""
284 330 if offset[0] + size > len(data):
285 331 # XXX better class
286 332 msg = b"docket is too short, expected %d got %d"
287 333 msg %= (offset[0] + size, len(data))
288 334 raise error.Abort(msg)
289 335 raw = data[offset[0] : offset[0] + size]
290 336 offset[0] += size
291 337 return raw
292 338
293 339 iheader = iter(header)
294 340
295 341 version_header = next(iheader)
296 342
297 343 index_uuid_size = next(iheader)
298 344 index_uuid = get_data(index_uuid_size)
299 345
346 older_index_uuid_count = next(iheader)
347 older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
348
300 349 data_uuid_size = next(iheader)
301 350 data_uuid = get_data(data_uuid_size)
302 351
352 older_data_uuid_count = next(iheader)
353 older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
354
303 355 sidedata_uuid_size = next(iheader)
304 356 sidedata_uuid = get_data(sidedata_uuid_size)
305 357
358 older_sidedata_uuid_count = next(iheader)
359 older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
360
306 361 index_size = next(iheader)
307 362
308 363 pending_index_size = next(iheader)
309 364
310 365 data_size = next(iheader)
311 366
312 367 pending_data_size = next(iheader)
313 368
314 369 sidedata_size = next(iheader)
315 370
316 371 pending_sidedata_size = next(iheader)
317 372
318 373 default_compression_header = next(iheader)
319 374
320 375 docket = RevlogDocket(
321 376 revlog,
322 377 use_pending=use_pending,
323 378 version_header=version_header,
324 379 index_uuid=index_uuid,
380 older_index_uuids=older_index_uuids,
325 381 data_uuid=data_uuid,
382 older_data_uuids=older_data_uuids,
326 383 sidedata_uuid=sidedata_uuid,
384 older_sidedata_uuids=older_sidedata_uuids,
327 385 index_end=index_size,
328 386 pending_index_end=pending_index_size,
329 387 data_end=data_size,
330 388 pending_data_end=pending_data_size,
331 389 sidedata_end=sidedata_size,
332 390 pending_sidedata_end=pending_sidedata_size,
333 391 default_compression_header=default_compression_header,
334 392 )
335 393 return docket
General Comments 0
You need to be logged in to leave comments. Login now