##// END OF EJS Templates
revlogutils: unconditionally pass version to random seed...
Gregory Szorc -
r49764:0aae0e2e default
parent child Browse files
Show More
@@ -1,435 +1,431 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18
19 19 import errno
20 20 import os
21 21 import random
22 22 import struct
23 23
24 24 from .. import (
25 25 encoding,
26 26 error,
27 27 node,
28 pycompat,
29 28 util,
30 29 )
31 30
32 31 from . import (
33 32 constants,
34 33 )
35 34
36 35
37 36 def make_uid(id_size=8):
38 37 """return a new unique identifier.
39 38
40 39 The identifier is random and composed of ascii characters."""
41 40 # size we "hex" the result we need half the number of bits to have a final
42 41 # uuid of size ID_SIZE
43 42 return node.hex(os.urandom(id_size // 2))
44 43
45 44
46 45 # some special test logic to avoid anoying random output in the test
47 46 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 47
49 48 if stable_docket_file:
50 49
51 50 def make_uid(id_size=8):
52 51 try:
53 52 with open(stable_docket_file, mode='rb') as f:
54 53 seed = f.read().strip()
55 54 except IOError as inst:
56 55 if inst.errno != errno.ENOENT:
57 56 raise
58 57 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 58 iter_seed = iter(seed)
60 59 # some basic circular sum hashing on 64 bits
61 60 int_seed = 0
62 61 low_mask = int('1' * 35, 2)
63 62 for i in iter_seed:
64 63 high_part = int_seed >> 35
65 64 low_part = (int_seed & low_mask) << 28
66 65 int_seed = high_part + low_part + i
67 66 r = random.Random()
68 if pycompat.ispy3:
69 67 r.seed(int_seed, version=1)
70 else:
71 r.seed(int_seed)
72 68 # once we drop python 3.8 support we can simply use r.randbytes
73 69 raw = r.getrandbits(id_size * 4)
74 70 assert id_size == 8
75 71 p = struct.pack('>L', raw)
76 72 new = node.hex(p)
77 73 with open(stable_docket_file, 'wb') as f:
78 74 f.write(new)
79 75 return new
80 76
81 77
82 78 # Docket format
83 79 #
84 80 # * 4 bytes: revlog version
85 81 # | This is mandatory as docket must be compatible with the previous
86 82 # | revlog index header.
87 83 # * 1 bytes: size of index uuid
88 84 # * 1 bytes: number of outdated index uuid
89 85 # * 1 bytes: size of data uuid
90 86 # * 1 bytes: number of outdated data uuid
91 87 # * 1 bytes: size of sizedata uuid
92 88 # * 1 bytes: number of outdated data uuid
93 89 # * 8 bytes: size of index-data
94 90 # * 8 bytes: pending size of index-data
95 91 # * 8 bytes: size of data
96 92 # * 8 bytes: size of sidedata
97 93 # * 8 bytes: pending size of data
98 94 # * 8 bytes: pending size of sidedata
99 95 # * 1 bytes: default compression header
100 96 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc')
101 97 # * 1 bytes: size of index uuid
102 98 # * 8 bytes: size of file
103 99 S_OLD_UID = struct.Struct('>BL')
104 100
105 101
106 102 class RevlogDocket(object):
107 103 """metadata associated with revlog"""
108 104
109 105 def __init__(
110 106 self,
111 107 revlog,
112 108 use_pending=False,
113 109 version_header=None,
114 110 index_uuid=None,
115 111 older_index_uuids=(),
116 112 data_uuid=None,
117 113 older_data_uuids=(),
118 114 sidedata_uuid=None,
119 115 older_sidedata_uuids=(),
120 116 index_end=0,
121 117 pending_index_end=0,
122 118 data_end=0,
123 119 pending_data_end=0,
124 120 sidedata_end=0,
125 121 pending_sidedata_end=0,
126 122 default_compression_header=None,
127 123 ):
128 124 self._version_header = version_header
129 125 self._read_only = bool(use_pending)
130 126 self._dirty = False
131 127 self._radix = revlog.radix
132 128 self._path = revlog._docket_file
133 129 self._opener = revlog.opener
134 130 self._index_uuid = index_uuid
135 131 self._older_index_uuids = older_index_uuids
136 132 self._data_uuid = data_uuid
137 133 self._older_data_uuids = older_data_uuids
138 134 self._sidedata_uuid = sidedata_uuid
139 135 self._older_sidedata_uuids = older_sidedata_uuids
140 136 assert not set(older_index_uuids) & set(older_data_uuids)
141 137 assert not set(older_data_uuids) & set(older_sidedata_uuids)
142 138 assert not set(older_index_uuids) & set(older_sidedata_uuids)
143 139 # thes asserts should be True as long as we have a single index filename
144 140 assert index_end <= pending_index_end
145 141 assert data_end <= pending_data_end
146 142 assert sidedata_end <= pending_sidedata_end
147 143 self._initial_index_end = index_end
148 144 self._pending_index_end = pending_index_end
149 145 self._initial_data_end = data_end
150 146 self._pending_data_end = pending_data_end
151 147 self._initial_sidedata_end = sidedata_end
152 148 self._pending_sidedata_end = pending_sidedata_end
153 149 if use_pending:
154 150 self._index_end = self._pending_index_end
155 151 self._data_end = self._pending_data_end
156 152 self._sidedata_end = self._pending_sidedata_end
157 153 else:
158 154 self._index_end = self._initial_index_end
159 155 self._data_end = self._initial_data_end
160 156 self._sidedata_end = self._initial_sidedata_end
161 157 self.default_compression_header = default_compression_header
162 158
163 159 def index_filepath(self):
164 160 """file path to the current index file associated to this docket"""
165 161 # very simplistic version at first
166 162 if self._index_uuid is None:
167 163 self._index_uuid = make_uid()
168 164 return b"%s-%s.idx" % (self._radix, self._index_uuid)
169 165
170 166 def new_index_file(self):
171 167 """switch index file to a new UID
172 168
173 169 The previous index UID is moved to the "older" list."""
174 170 old = (self._index_uuid, self._index_end)
175 171 self._older_index_uuids.insert(0, old)
176 172 self._index_uuid = make_uid()
177 173 return self.index_filepath()
178 174
179 175 def old_index_filepaths(self, include_empty=True):
180 176 """yield file path to older index files associated to this docket"""
181 177 # very simplistic version at first
182 178 for uuid, size in self._older_index_uuids:
183 179 if include_empty or size > 0:
184 180 yield b"%s-%s.idx" % (self._radix, uuid)
185 181
186 182 def data_filepath(self):
187 183 """file path to the current data file associated to this docket"""
188 184 # very simplistic version at first
189 185 if self._data_uuid is None:
190 186 self._data_uuid = make_uid()
191 187 return b"%s-%s.dat" % (self._radix, self._data_uuid)
192 188
193 189 def new_data_file(self):
194 190 """switch data file to a new UID
195 191
196 192 The previous data UID is moved to the "older" list."""
197 193 old = (self._data_uuid, self._data_end)
198 194 self._older_data_uuids.insert(0, old)
199 195 self._data_uuid = make_uid()
200 196 return self.data_filepath()
201 197
202 198 def old_data_filepaths(self, include_empty=True):
203 199 """yield file path to older data files associated to this docket"""
204 200 # very simplistic version at first
205 201 for uuid, size in self._older_data_uuids:
206 202 if include_empty or size > 0:
207 203 yield b"%s-%s.dat" % (self._radix, uuid)
208 204
209 205 def sidedata_filepath(self):
210 206 """file path to the current sidedata file associated to this docket"""
211 207 # very simplistic version at first
212 208 if self._sidedata_uuid is None:
213 209 self._sidedata_uuid = make_uid()
214 210 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
215 211
216 212 def new_sidedata_file(self):
217 213 """switch sidedata file to a new UID
218 214
219 215 The previous sidedata UID is moved to the "older" list."""
220 216 old = (self._sidedata_uuid, self._sidedata_end)
221 217 self._older_sidedata_uuids.insert(0, old)
222 218 self._sidedata_uuid = make_uid()
223 219 return self.sidedata_filepath()
224 220
225 221 def old_sidedata_filepaths(self, include_empty=True):
226 222 """yield file path to older sidedata files associated to this docket"""
227 223 # very simplistic version at first
228 224 for uuid, size in self._older_sidedata_uuids:
229 225 if include_empty or size > 0:
230 226 yield b"%s-%s.sda" % (self._radix, uuid)
231 227
232 228 @property
233 229 def index_end(self):
234 230 return self._index_end
235 231
236 232 @index_end.setter
237 233 def index_end(self, new_size):
238 234 if new_size != self._index_end:
239 235 self._index_end = new_size
240 236 self._dirty = True
241 237
242 238 @property
243 239 def data_end(self):
244 240 return self._data_end
245 241
246 242 @data_end.setter
247 243 def data_end(self, new_size):
248 244 if new_size != self._data_end:
249 245 self._data_end = new_size
250 246 self._dirty = True
251 247
252 248 @property
253 249 def sidedata_end(self):
254 250 return self._sidedata_end
255 251
256 252 @sidedata_end.setter
257 253 def sidedata_end(self, new_size):
258 254 if new_size != self._sidedata_end:
259 255 self._sidedata_end = new_size
260 256 self._dirty = True
261 257
262 258 def write(self, transaction, pending=False, stripping=False):
263 259 """write the modification of disk if any
264 260
265 261 This make the new content visible to all process"""
266 262 if not self._dirty:
267 263 return False
268 264 else:
269 265 if self._read_only:
270 266 msg = b'writing read-only docket: %s'
271 267 msg %= self._path
272 268 raise error.ProgrammingError(msg)
273 269 if not stripping:
274 270 # XXX we could, leverage the docket while stripping. However it
275 271 # is not powerfull enough at the time of this comment
276 272 transaction.addbackup(self._path, location=b'store')
277 273 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
278 274 f.write(self._serialize(pending=pending))
279 275 # if pending we still need to the write final data eventually
280 276 self._dirty = pending
281 277 return True
282 278
283 279 def _serialize(self, pending=False):
284 280 if pending:
285 281 official_index_end = self._initial_index_end
286 282 official_data_end = self._initial_data_end
287 283 official_sidedata_end = self._initial_sidedata_end
288 284 else:
289 285 official_index_end = self._index_end
290 286 official_data_end = self._data_end
291 287 official_sidedata_end = self._sidedata_end
292 288
293 289 # this assert should be True as long as we have a single index filename
294 290 assert official_data_end <= self._data_end
295 291 assert official_sidedata_end <= self._sidedata_end
296 292 data = (
297 293 self._version_header,
298 294 len(self._index_uuid),
299 295 len(self._older_index_uuids),
300 296 len(self._data_uuid),
301 297 len(self._older_data_uuids),
302 298 len(self._sidedata_uuid),
303 299 len(self._older_sidedata_uuids),
304 300 official_index_end,
305 301 self._index_end,
306 302 official_data_end,
307 303 self._data_end,
308 304 official_sidedata_end,
309 305 self._sidedata_end,
310 306 self.default_compression_header,
311 307 )
312 308 s = []
313 309 s.append(S_HEADER.pack(*data))
314 310
315 311 s.append(self._index_uuid)
316 312 for u, size in self._older_index_uuids:
317 313 s.append(S_OLD_UID.pack(len(u), size))
318 314 for u, size in self._older_index_uuids:
319 315 s.append(u)
320 316
321 317 s.append(self._data_uuid)
322 318 for u, size in self._older_data_uuids:
323 319 s.append(S_OLD_UID.pack(len(u), size))
324 320 for u, size in self._older_data_uuids:
325 321 s.append(u)
326 322
327 323 s.append(self._sidedata_uuid)
328 324 for u, size in self._older_sidedata_uuids:
329 325 s.append(S_OLD_UID.pack(len(u), size))
330 326 for u, size in self._older_sidedata_uuids:
331 327 s.append(u)
332 328 return b''.join(s)
333 329
334 330
335 331 def default_docket(revlog, version_header):
336 332 """given a revlog version a new docket object for the given revlog"""
337 333 rl_version = version_header & 0xFFFF
338 334 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
339 335 return None
340 336 comp = util.compengines[revlog._compengine].revlogheader()
341 337 docket = RevlogDocket(
342 338 revlog,
343 339 version_header=version_header,
344 340 default_compression_header=comp,
345 341 )
346 342 docket._dirty = True
347 343 return docket
348 344
349 345
350 346 def _parse_old_uids(get_data, count):
351 347 all_sizes = []
352 348 all_uids = []
353 349 for i in range(0, count):
354 350 raw = get_data(S_OLD_UID.size)
355 351 all_sizes.append(S_OLD_UID.unpack(raw))
356 352
357 353 for uid_size, file_size in all_sizes:
358 354 uid = get_data(uid_size)
359 355 all_uids.append((uid, file_size))
360 356 return all_uids
361 357
362 358
363 359 def parse_docket(revlog, data, use_pending=False):
364 360 """given some docket data return a docket object for the given revlog"""
365 361 header = S_HEADER.unpack(data[: S_HEADER.size])
366 362
367 363 # this is a mutable closure capture used in `get_data`
368 364 offset = [S_HEADER.size]
369 365
370 366 def get_data(size):
371 367 """utility closure to access the `size` next bytes"""
372 368 if offset[0] + size > len(data):
373 369 # XXX better class
374 370 msg = b"docket is too short, expected %d got %d"
375 371 msg %= (offset[0] + size, len(data))
376 372 raise error.Abort(msg)
377 373 raw = data[offset[0] : offset[0] + size]
378 374 offset[0] += size
379 375 return raw
380 376
381 377 iheader = iter(header)
382 378
383 379 version_header = next(iheader)
384 380
385 381 index_uuid_size = next(iheader)
386 382 index_uuid = get_data(index_uuid_size)
387 383
388 384 older_index_uuid_count = next(iheader)
389 385 older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
390 386
391 387 data_uuid_size = next(iheader)
392 388 data_uuid = get_data(data_uuid_size)
393 389
394 390 older_data_uuid_count = next(iheader)
395 391 older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
396 392
397 393 sidedata_uuid_size = next(iheader)
398 394 sidedata_uuid = get_data(sidedata_uuid_size)
399 395
400 396 older_sidedata_uuid_count = next(iheader)
401 397 older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
402 398
403 399 index_size = next(iheader)
404 400
405 401 pending_index_size = next(iheader)
406 402
407 403 data_size = next(iheader)
408 404
409 405 pending_data_size = next(iheader)
410 406
411 407 sidedata_size = next(iheader)
412 408
413 409 pending_sidedata_size = next(iheader)
414 410
415 411 default_compression_header = next(iheader)
416 412
417 413 docket = RevlogDocket(
418 414 revlog,
419 415 use_pending=use_pending,
420 416 version_header=version_header,
421 417 index_uuid=index_uuid,
422 418 older_index_uuids=older_index_uuids,
423 419 data_uuid=data_uuid,
424 420 older_data_uuids=older_data_uuids,
425 421 sidedata_uuid=sidedata_uuid,
426 422 older_sidedata_uuids=older_sidedata_uuids,
427 423 index_end=index_size,
428 424 pending_index_end=pending_index_size,
429 425 data_end=data_size,
430 426 pending_data_end=pending_data_size,
431 427 sidedata_end=sidedata_size,
432 428 pending_sidedata_end=pending_sidedata_size,
433 429 default_compression_header=default_compression_header,
434 430 )
435 431 return docket
General Comments 0
You need to be logged in to leave comments. Login now