##// END OF EJS Templates
revlogutils: remove Python 2 variant for iter_seed...
Gregory Szorc -
r49763:ed2af456 default
parent child Browse files
Show More
@@ -1,440 +1,435 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18
19 19 import errno
20 20 import os
21 21 import random
22 22 import struct
23 23
24 24 from .. import (
25 25 encoding,
26 26 error,
27 27 node,
28 28 pycompat,
29 29 util,
30 30 )
31 31
32 32 from . import (
33 33 constants,
34 34 )
35 35
36 36
37 37 def make_uid(id_size=8):
38 38 """return a new unique identifier.
39 39
40 40 The identifier is random and composed of ascii characters."""
41 41 # size we "hex" the result we need half the number of bits to have a final
42 42 # uuid of size ID_SIZE
43 43 return node.hex(os.urandom(id_size // 2))
44 44
45 45
46 46 # some special test logic to avoid anoying random output in the test
47 47 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 48
49 49 if stable_docket_file:
50 50
51 51 def make_uid(id_size=8):
52 52 try:
53 53 with open(stable_docket_file, mode='rb') as f:
54 54 seed = f.read().strip()
55 55 except IOError as inst:
56 56 if inst.errno != errno.ENOENT:
57 57 raise
58 58 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 if pycompat.ispy3:
60 iter_seed = iter(seed)
61 else:
62 # pytype: disable=wrong-arg-types
63 iter_seed = (ord(c) for c in seed)
64 # pytype: enable=wrong-arg-types
59 iter_seed = iter(seed)
65 60 # some basic circular sum hashing on 64 bits
66 61 int_seed = 0
67 62 low_mask = int('1' * 35, 2)
68 63 for i in iter_seed:
69 64 high_part = int_seed >> 35
70 65 low_part = (int_seed & low_mask) << 28
71 66 int_seed = high_part + low_part + i
72 67 r = random.Random()
73 68 if pycompat.ispy3:
74 69 r.seed(int_seed, version=1)
75 70 else:
76 71 r.seed(int_seed)
77 72 # once we drop python 3.8 support we can simply use r.randbytes
78 73 raw = r.getrandbits(id_size * 4)
79 74 assert id_size == 8
80 75 p = struct.pack('>L', raw)
81 76 new = node.hex(p)
82 77 with open(stable_docket_file, 'wb') as f:
83 78 f.write(new)
84 79 return new
85 80
86 81
87 82 # Docket format
88 83 #
89 84 # * 4 bytes: revlog version
90 85 # | This is mandatory as docket must be compatible with the previous
91 86 # | revlog index header.
92 87 # * 1 bytes: size of index uuid
93 88 # * 1 bytes: number of outdated index uuid
94 89 # * 1 bytes: size of data uuid
95 90 # * 1 bytes: number of outdated data uuid
96 91 # * 1 bytes: size of sizedata uuid
97 92 # * 1 bytes: number of outdated data uuid
98 93 # * 8 bytes: size of index-data
99 94 # * 8 bytes: pending size of index-data
100 95 # * 8 bytes: size of data
101 96 # * 8 bytes: size of sidedata
102 97 # * 8 bytes: pending size of data
103 98 # * 8 bytes: pending size of sidedata
104 99 # * 1 bytes: default compression header
105 100 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc')
106 101 # * 1 bytes: size of index uuid
107 102 # * 8 bytes: size of file
108 103 S_OLD_UID = struct.Struct('>BL')
109 104
110 105
111 106 class RevlogDocket(object):
112 107 """metadata associated with revlog"""
113 108
114 109 def __init__(
115 110 self,
116 111 revlog,
117 112 use_pending=False,
118 113 version_header=None,
119 114 index_uuid=None,
120 115 older_index_uuids=(),
121 116 data_uuid=None,
122 117 older_data_uuids=(),
123 118 sidedata_uuid=None,
124 119 older_sidedata_uuids=(),
125 120 index_end=0,
126 121 pending_index_end=0,
127 122 data_end=0,
128 123 pending_data_end=0,
129 124 sidedata_end=0,
130 125 pending_sidedata_end=0,
131 126 default_compression_header=None,
132 127 ):
133 128 self._version_header = version_header
134 129 self._read_only = bool(use_pending)
135 130 self._dirty = False
136 131 self._radix = revlog.radix
137 132 self._path = revlog._docket_file
138 133 self._opener = revlog.opener
139 134 self._index_uuid = index_uuid
140 135 self._older_index_uuids = older_index_uuids
141 136 self._data_uuid = data_uuid
142 137 self._older_data_uuids = older_data_uuids
143 138 self._sidedata_uuid = sidedata_uuid
144 139 self._older_sidedata_uuids = older_sidedata_uuids
145 140 assert not set(older_index_uuids) & set(older_data_uuids)
146 141 assert not set(older_data_uuids) & set(older_sidedata_uuids)
147 142 assert not set(older_index_uuids) & set(older_sidedata_uuids)
148 143 # thes asserts should be True as long as we have a single index filename
149 144 assert index_end <= pending_index_end
150 145 assert data_end <= pending_data_end
151 146 assert sidedata_end <= pending_sidedata_end
152 147 self._initial_index_end = index_end
153 148 self._pending_index_end = pending_index_end
154 149 self._initial_data_end = data_end
155 150 self._pending_data_end = pending_data_end
156 151 self._initial_sidedata_end = sidedata_end
157 152 self._pending_sidedata_end = pending_sidedata_end
158 153 if use_pending:
159 154 self._index_end = self._pending_index_end
160 155 self._data_end = self._pending_data_end
161 156 self._sidedata_end = self._pending_sidedata_end
162 157 else:
163 158 self._index_end = self._initial_index_end
164 159 self._data_end = self._initial_data_end
165 160 self._sidedata_end = self._initial_sidedata_end
166 161 self.default_compression_header = default_compression_header
167 162
168 163 def index_filepath(self):
169 164 """file path to the current index file associated to this docket"""
170 165 # very simplistic version at first
171 166 if self._index_uuid is None:
172 167 self._index_uuid = make_uid()
173 168 return b"%s-%s.idx" % (self._radix, self._index_uuid)
174 169
175 170 def new_index_file(self):
176 171 """switch index file to a new UID
177 172
178 173 The previous index UID is moved to the "older" list."""
179 174 old = (self._index_uuid, self._index_end)
180 175 self._older_index_uuids.insert(0, old)
181 176 self._index_uuid = make_uid()
182 177 return self.index_filepath()
183 178
184 179 def old_index_filepaths(self, include_empty=True):
185 180 """yield file path to older index files associated to this docket"""
186 181 # very simplistic version at first
187 182 for uuid, size in self._older_index_uuids:
188 183 if include_empty or size > 0:
189 184 yield b"%s-%s.idx" % (self._radix, uuid)
190 185
191 186 def data_filepath(self):
192 187 """file path to the current data file associated to this docket"""
193 188 # very simplistic version at first
194 189 if self._data_uuid is None:
195 190 self._data_uuid = make_uid()
196 191 return b"%s-%s.dat" % (self._radix, self._data_uuid)
197 192
198 193 def new_data_file(self):
199 194 """switch data file to a new UID
200 195
201 196 The previous data UID is moved to the "older" list."""
202 197 old = (self._data_uuid, self._data_end)
203 198 self._older_data_uuids.insert(0, old)
204 199 self._data_uuid = make_uid()
205 200 return self.data_filepath()
206 201
207 202 def old_data_filepaths(self, include_empty=True):
208 203 """yield file path to older data files associated to this docket"""
209 204 # very simplistic version at first
210 205 for uuid, size in self._older_data_uuids:
211 206 if include_empty or size > 0:
212 207 yield b"%s-%s.dat" % (self._radix, uuid)
213 208
214 209 def sidedata_filepath(self):
215 210 """file path to the current sidedata file associated to this docket"""
216 211 # very simplistic version at first
217 212 if self._sidedata_uuid is None:
218 213 self._sidedata_uuid = make_uid()
219 214 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
220 215
221 216 def new_sidedata_file(self):
222 217 """switch sidedata file to a new UID
223 218
224 219 The previous sidedata UID is moved to the "older" list."""
225 220 old = (self._sidedata_uuid, self._sidedata_end)
226 221 self._older_sidedata_uuids.insert(0, old)
227 222 self._sidedata_uuid = make_uid()
228 223 return self.sidedata_filepath()
229 224
230 225 def old_sidedata_filepaths(self, include_empty=True):
231 226 """yield file path to older sidedata files associated to this docket"""
232 227 # very simplistic version at first
233 228 for uuid, size in self._older_sidedata_uuids:
234 229 if include_empty or size > 0:
235 230 yield b"%s-%s.sda" % (self._radix, uuid)
236 231
237 232 @property
238 233 def index_end(self):
239 234 return self._index_end
240 235
241 236 @index_end.setter
242 237 def index_end(self, new_size):
243 238 if new_size != self._index_end:
244 239 self._index_end = new_size
245 240 self._dirty = True
246 241
247 242 @property
248 243 def data_end(self):
249 244 return self._data_end
250 245
251 246 @data_end.setter
252 247 def data_end(self, new_size):
253 248 if new_size != self._data_end:
254 249 self._data_end = new_size
255 250 self._dirty = True
256 251
257 252 @property
258 253 def sidedata_end(self):
259 254 return self._sidedata_end
260 255
261 256 @sidedata_end.setter
262 257 def sidedata_end(self, new_size):
263 258 if new_size != self._sidedata_end:
264 259 self._sidedata_end = new_size
265 260 self._dirty = True
266 261
267 262 def write(self, transaction, pending=False, stripping=False):
268 263 """write the modification of disk if any
269 264
270 265 This make the new content visible to all process"""
271 266 if not self._dirty:
272 267 return False
273 268 else:
274 269 if self._read_only:
275 270 msg = b'writing read-only docket: %s'
276 271 msg %= self._path
277 272 raise error.ProgrammingError(msg)
278 273 if not stripping:
279 274 # XXX we could, leverage the docket while stripping. However it
280 275 # is not powerfull enough at the time of this comment
281 276 transaction.addbackup(self._path, location=b'store')
282 277 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
283 278 f.write(self._serialize(pending=pending))
284 279 # if pending we still need to the write final data eventually
285 280 self._dirty = pending
286 281 return True
287 282
288 283 def _serialize(self, pending=False):
289 284 if pending:
290 285 official_index_end = self._initial_index_end
291 286 official_data_end = self._initial_data_end
292 287 official_sidedata_end = self._initial_sidedata_end
293 288 else:
294 289 official_index_end = self._index_end
295 290 official_data_end = self._data_end
296 291 official_sidedata_end = self._sidedata_end
297 292
298 293 # this assert should be True as long as we have a single index filename
299 294 assert official_data_end <= self._data_end
300 295 assert official_sidedata_end <= self._sidedata_end
301 296 data = (
302 297 self._version_header,
303 298 len(self._index_uuid),
304 299 len(self._older_index_uuids),
305 300 len(self._data_uuid),
306 301 len(self._older_data_uuids),
307 302 len(self._sidedata_uuid),
308 303 len(self._older_sidedata_uuids),
309 304 official_index_end,
310 305 self._index_end,
311 306 official_data_end,
312 307 self._data_end,
313 308 official_sidedata_end,
314 309 self._sidedata_end,
315 310 self.default_compression_header,
316 311 )
317 312 s = []
318 313 s.append(S_HEADER.pack(*data))
319 314
320 315 s.append(self._index_uuid)
321 316 for u, size in self._older_index_uuids:
322 317 s.append(S_OLD_UID.pack(len(u), size))
323 318 for u, size in self._older_index_uuids:
324 319 s.append(u)
325 320
326 321 s.append(self._data_uuid)
327 322 for u, size in self._older_data_uuids:
328 323 s.append(S_OLD_UID.pack(len(u), size))
329 324 for u, size in self._older_data_uuids:
330 325 s.append(u)
331 326
332 327 s.append(self._sidedata_uuid)
333 328 for u, size in self._older_sidedata_uuids:
334 329 s.append(S_OLD_UID.pack(len(u), size))
335 330 for u, size in self._older_sidedata_uuids:
336 331 s.append(u)
337 332 return b''.join(s)
338 333
339 334
340 335 def default_docket(revlog, version_header):
341 336 """given a revlog version a new docket object for the given revlog"""
342 337 rl_version = version_header & 0xFFFF
343 338 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
344 339 return None
345 340 comp = util.compengines[revlog._compengine].revlogheader()
346 341 docket = RevlogDocket(
347 342 revlog,
348 343 version_header=version_header,
349 344 default_compression_header=comp,
350 345 )
351 346 docket._dirty = True
352 347 return docket
353 348
354 349
355 350 def _parse_old_uids(get_data, count):
356 351 all_sizes = []
357 352 all_uids = []
358 353 for i in range(0, count):
359 354 raw = get_data(S_OLD_UID.size)
360 355 all_sizes.append(S_OLD_UID.unpack(raw))
361 356
362 357 for uid_size, file_size in all_sizes:
363 358 uid = get_data(uid_size)
364 359 all_uids.append((uid, file_size))
365 360 return all_uids
366 361
367 362
368 363 def parse_docket(revlog, data, use_pending=False):
369 364 """given some docket data return a docket object for the given revlog"""
370 365 header = S_HEADER.unpack(data[: S_HEADER.size])
371 366
372 367 # this is a mutable closure capture used in `get_data`
373 368 offset = [S_HEADER.size]
374 369
375 370 def get_data(size):
376 371 """utility closure to access the `size` next bytes"""
377 372 if offset[0] + size > len(data):
378 373 # XXX better class
379 374 msg = b"docket is too short, expected %d got %d"
380 375 msg %= (offset[0] + size, len(data))
381 376 raise error.Abort(msg)
382 377 raw = data[offset[0] : offset[0] + size]
383 378 offset[0] += size
384 379 return raw
385 380
386 381 iheader = iter(header)
387 382
388 383 version_header = next(iheader)
389 384
390 385 index_uuid_size = next(iheader)
391 386 index_uuid = get_data(index_uuid_size)
392 387
393 388 older_index_uuid_count = next(iheader)
394 389 older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
395 390
396 391 data_uuid_size = next(iheader)
397 392 data_uuid = get_data(data_uuid_size)
398 393
399 394 older_data_uuid_count = next(iheader)
400 395 older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
401 396
402 397 sidedata_uuid_size = next(iheader)
403 398 sidedata_uuid = get_data(sidedata_uuid_size)
404 399
405 400 older_sidedata_uuid_count = next(iheader)
406 401 older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
407 402
408 403 index_size = next(iheader)
409 404
410 405 pending_index_size = next(iheader)
411 406
412 407 data_size = next(iheader)
413 408
414 409 pending_data_size = next(iheader)
415 410
416 411 sidedata_size = next(iheader)
417 412
418 413 pending_sidedata_size = next(iheader)
419 414
420 415 default_compression_header = next(iheader)
421 416
422 417 docket = RevlogDocket(
423 418 revlog,
424 419 use_pending=use_pending,
425 420 version_header=version_header,
426 421 index_uuid=index_uuid,
427 422 older_index_uuids=older_index_uuids,
428 423 data_uuid=data_uuid,
429 424 older_data_uuids=older_data_uuids,
430 425 sidedata_uuid=sidedata_uuid,
431 426 older_sidedata_uuids=older_sidedata_uuids,
432 427 index_end=index_size,
433 428 pending_index_end=pending_index_size,
434 429 data_end=data_size,
435 430 pending_data_end=pending_data_size,
436 431 sidedata_end=sidedata_size,
437 432 pending_sidedata_end=pending_sidedata_size,
438 433 default_compression_header=default_compression_header,
439 434 )
440 435 return docket
General Comments 0
You need to be logged in to leave comments. Login now