##// END OF EJS Templates
revlog: add docket method to request new content files...
marmoute -
r48247:865c260d default
parent child Browse files
Show More
@@ -1,393 +1,420 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import errno
21 21 import os
22 22 import random
23 23 import struct
24 24
25 25 from .. import (
26 26 encoding,
27 27 error,
28 28 node,
29 29 pycompat,
30 30 util,
31 31 )
32 32
33 33 from . import (
34 34 constants,
35 35 )
36 36
37 37
38 38 def make_uid(id_size=8):
39 39 """return a new unique identifier.
40 40
41 41 The identifier is random and composed of ascii characters."""
42 42 # size we "hex" the result we need half the number of bits to have a final
43 43 # uuid of size ID_SIZE
44 44 return node.hex(os.urandom(id_size // 2))
45 45
46 46
47 47 # some special test logic to avoid anoying random output in the test
48 48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49 49
50 50 if stable_docket_file:
51 51
52 52 def make_uid(id_size=8):
53 53 try:
54 54 with open(stable_docket_file, mode='rb') as f:
55 55 seed = f.read().strip()
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 60 if pycompat.ispy3:
61 61 iter_seed = iter(seed)
62 62 else:
63 63 # pytype: disable=wrong-arg-types
64 64 iter_seed = (ord(c) for c in seed)
65 65 # pytype: enable=wrong-arg-types
66 66 # some basic circular sum hashing on 64 bits
67 67 int_seed = 0
68 68 low_mask = int('1' * 35, 2)
69 69 for i in iter_seed:
70 70 high_part = int_seed >> 35
71 71 low_part = (int_seed & low_mask) << 28
72 72 int_seed = high_part + low_part + i
73 73 r = random.Random()
74 74 if pycompat.ispy3:
75 75 r.seed(int_seed, version=1)
76 76 else:
77 77 r.seed(int_seed)
78 78 # once we drop python 3.8 support we can simply use r.randbytes
79 79 raw = r.getrandbits(id_size * 4)
80 80 assert id_size == 8
81 81 p = struct.pack('>L', raw)
82 82 new = node.hex(p)
83 83 with open(stable_docket_file, 'wb') as f:
84 84 f.write(new)
85 85 return new
86 86
87 87
88 88 # Docket format
89 89 #
90 90 # * 4 bytes: revlog version
91 91 # | This is mandatory as docket must be compatible with the previous
92 92 # | revlog index header.
93 93 # * 1 bytes: size of index uuid
94 94 # * 1 bytes: number of outdated index uuid
95 95 # * 1 bytes: size of data uuid
96 96 # * 1 bytes: number of outdated data uuid
97 97 # * 1 bytes: size of sizedata uuid
98 98 # * 1 bytes: number of outdated data uuid
99 99 # * 8 bytes: size of index-data
100 100 # * 8 bytes: pending size of index-data
101 101 # * 8 bytes: size of data
102 102 # * 8 bytes: size of sidedata
103 103 # * 8 bytes: pending size of data
104 104 # * 8 bytes: pending size of sidedata
105 105 # * 1 bytes: default compression header
106 106 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc')
107 107 # * 1 bytes: size of index uuid
108 108 # * 8 bytes: size of file
109 109 S_OLD_UID = struct.Struct('>BL')
110 110
111 111
112 112 class RevlogDocket(object):
113 113 """metadata associated with revlog"""
114 114
115 115 def __init__(
116 116 self,
117 117 revlog,
118 118 use_pending=False,
119 119 version_header=None,
120 120 index_uuid=None,
121 121 older_index_uuids=(),
122 122 data_uuid=None,
123 123 older_data_uuids=(),
124 124 sidedata_uuid=None,
125 125 older_sidedata_uuids=(),
126 126 index_end=0,
127 127 pending_index_end=0,
128 128 data_end=0,
129 129 pending_data_end=0,
130 130 sidedata_end=0,
131 131 pending_sidedata_end=0,
132 132 default_compression_header=None,
133 133 ):
134 134 self._version_header = version_header
135 135 self._read_only = bool(use_pending)
136 136 self._dirty = False
137 137 self._radix = revlog.radix
138 138 self._path = revlog._docket_file
139 139 self._opener = revlog.opener
140 140 self._index_uuid = index_uuid
141 141 self._older_index_uuids = older_index_uuids
142 142 self._data_uuid = data_uuid
143 143 self._older_data_uuids = older_data_uuids
144 144 self._sidedata_uuid = sidedata_uuid
145 145 self._older_sidedata_uuids = older_sidedata_uuids
146 146 assert not set(older_index_uuids) & set(older_data_uuids)
147 147 assert not set(older_data_uuids) & set(older_sidedata_uuids)
148 148 assert not set(older_index_uuids) & set(older_sidedata_uuids)
149 149 # thes asserts should be True as long as we have a single index filename
150 150 assert index_end <= pending_index_end
151 151 assert data_end <= pending_data_end
152 152 assert sidedata_end <= pending_sidedata_end
153 153 self._initial_index_end = index_end
154 154 self._pending_index_end = pending_index_end
155 155 self._initial_data_end = data_end
156 156 self._pending_data_end = pending_data_end
157 157 self._initial_sidedata_end = sidedata_end
158 158 self._pending_sidedata_end = pending_sidedata_end
159 159 if use_pending:
160 160 self._index_end = self._pending_index_end
161 161 self._data_end = self._pending_data_end
162 162 self._sidedata_end = self._pending_sidedata_end
163 163 else:
164 164 self._index_end = self._initial_index_end
165 165 self._data_end = self._initial_data_end
166 166 self._sidedata_end = self._initial_sidedata_end
167 167 self.default_compression_header = default_compression_header
168 168
169 169 def index_filepath(self):
170 170 """file path to the current index file associated to this docket"""
171 171 # very simplistic version at first
172 172 if self._index_uuid is None:
173 173 self._index_uuid = make_uid()
174 174 return b"%s-%s.idx" % (self._radix, self._index_uuid)
175 175
176 def new_index_file(self):
177 """switch index file to a new UID
178
179 The previous index UID is moved to the "older" list."""
180 old = (self._index_uuid, self._index_end)
181 self._older_index_uuids.insert(0, old)
182 self._index_uuid = make_uid()
183 return self.index_filepath()
184
176 185 def data_filepath(self):
177 186 """file path to the current data file associated to this docket"""
178 187 # very simplistic version at first
179 188 if self._data_uuid is None:
180 189 self._data_uuid = make_uid()
181 190 return b"%s-%s.dat" % (self._radix, self._data_uuid)
182 191
192 def new_data_file(self):
193 """switch data file to a new UID
194
195 The previous data UID is moved to the "older" list."""
196 old = (self._data_uuid, self._data_end)
197 self._older_data_uuids.insert(0, old)
198 self._data_uuid = make_uid()
199 return self.data_filepath()
200
183 201 def sidedata_filepath(self):
184 202 """file path to the current sidedata file associated to this docket"""
185 203 # very simplistic version at first
186 204 if self._sidedata_uuid is None:
187 205 self._sidedata_uuid = make_uid()
188 206 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
189 207
208 def new_sidedata_file(self):
209 """switch sidedata file to a new UID
210
211 The previous sidedata UID is moved to the "older" list."""
212 old = (self._sidedata_uuid, self._sidedata_end)
213 self._older_sidedata_uuids.insert(0, old)
214 self._sidedata_uuid = make_uid()
215 return self.sidedata_filepath()
216
190 217 @property
191 218 def index_end(self):
192 219 return self._index_end
193 220
194 221 @index_end.setter
195 222 def index_end(self, new_size):
196 223 if new_size != self._index_end:
197 224 self._index_end = new_size
198 225 self._dirty = True
199 226
200 227 @property
201 228 def data_end(self):
202 229 return self._data_end
203 230
204 231 @data_end.setter
205 232 def data_end(self, new_size):
206 233 if new_size != self._data_end:
207 234 self._data_end = new_size
208 235 self._dirty = True
209 236
210 237 @property
211 238 def sidedata_end(self):
212 239 return self._sidedata_end
213 240
214 241 @sidedata_end.setter
215 242 def sidedata_end(self, new_size):
216 243 if new_size != self._sidedata_end:
217 244 self._sidedata_end = new_size
218 245 self._dirty = True
219 246
220 247 def write(self, transaction, pending=False, stripping=False):
221 248 """write the modification of disk if any
222 249
223 250 This make the new content visible to all process"""
224 251 if not self._dirty:
225 252 return False
226 253 else:
227 254 if self._read_only:
228 255 msg = b'writing read-only docket: %s'
229 256 msg %= self._path
230 257 raise error.ProgrammingError(msg)
231 258 if not stripping:
232 259 # XXX we could, leverage the docket while stripping. However it
233 260 # is not powerfull enough at the time of this comment
234 261 transaction.addbackup(self._path, location=b'store')
235 262 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
236 263 f.write(self._serialize(pending=pending))
237 264 # if pending we still need to the write final data eventually
238 265 self._dirty = pending
239 266 return True
240 267
241 268 def _serialize(self, pending=False):
242 269 if pending:
243 270 official_index_end = self._initial_index_end
244 271 official_data_end = self._initial_data_end
245 272 official_sidedata_end = self._initial_sidedata_end
246 273 else:
247 274 official_index_end = self._index_end
248 275 official_data_end = self._data_end
249 276 official_sidedata_end = self._sidedata_end
250 277
251 278 # this assert should be True as long as we have a single index filename
252 279 assert official_data_end <= self._data_end
253 280 assert official_sidedata_end <= self._sidedata_end
254 281 data = (
255 282 self._version_header,
256 283 len(self._index_uuid),
257 284 len(self._older_index_uuids),
258 285 len(self._data_uuid),
259 286 len(self._older_data_uuids),
260 287 len(self._sidedata_uuid),
261 288 len(self._older_sidedata_uuids),
262 289 official_index_end,
263 290 self._index_end,
264 291 official_data_end,
265 292 self._data_end,
266 293 official_sidedata_end,
267 294 self._sidedata_end,
268 295 self.default_compression_header,
269 296 )
270 297 s = []
271 298 s.append(S_HEADER.pack(*data))
272 299
273 300 s.append(self._index_uuid)
274 301 for u, size in self._older_index_uuids:
275 302 s.append(S_OLD_UID.pack(len(u), size))
276 303 for u, size in self._older_index_uuids:
277 304 s.append(u)
278 305
279 306 s.append(self._data_uuid)
280 307 for u, size in self._older_data_uuids:
281 308 s.append(S_OLD_UID.pack(len(u), size))
282 309 for u, size in self._older_data_uuids:
283 310 s.append(u)
284 311
285 312 s.append(self._sidedata_uuid)
286 313 for u, size in self._older_sidedata_uuids:
287 314 s.append(S_OLD_UID.pack(len(u), size))
288 315 for u, size in self._older_sidedata_uuids:
289 316 s.append(u)
290 317 return b''.join(s)
291 318
292 319
293 320 def default_docket(revlog, version_header):
294 321 """given a revlog version a new docket object for the given revlog"""
295 322 rl_version = version_header & 0xFFFF
296 323 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
297 324 return None
298 325 comp = util.compengines[revlog._compengine].revlogheader()
299 326 docket = RevlogDocket(
300 327 revlog,
301 328 version_header=version_header,
302 329 default_compression_header=comp,
303 330 )
304 331 docket._dirty = True
305 332 return docket
306 333
307 334
308 335 def _parse_old_uids(get_data, count):
309 336 all_sizes = []
310 337 all_uids = []
311 338 for i in range(0, count):
312 339 raw = get_data(S_OLD_UID.size)
313 340 all_sizes.append(S_OLD_UID.unpack(raw))
314 341
315 342 for uid_size, file_size in all_sizes:
316 343 uid = get_data(uid_size)
317 344 all_uids.append((uid, file_size))
318 345 return all_uids
319 346
320 347
321 348 def parse_docket(revlog, data, use_pending=False):
322 349 """given some docket data return a docket object for the given revlog"""
323 350 header = S_HEADER.unpack(data[: S_HEADER.size])
324 351
325 352 # this is a mutable closure capture used in `get_data`
326 353 offset = [S_HEADER.size]
327 354
328 355 def get_data(size):
329 356 """utility closure to access the `size` next bytes"""
330 357 if offset[0] + size > len(data):
331 358 # XXX better class
332 359 msg = b"docket is too short, expected %d got %d"
333 360 msg %= (offset[0] + size, len(data))
334 361 raise error.Abort(msg)
335 362 raw = data[offset[0] : offset[0] + size]
336 363 offset[0] += size
337 364 return raw
338 365
339 366 iheader = iter(header)
340 367
341 368 version_header = next(iheader)
342 369
343 370 index_uuid_size = next(iheader)
344 371 index_uuid = get_data(index_uuid_size)
345 372
346 373 older_index_uuid_count = next(iheader)
347 374 older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
348 375
349 376 data_uuid_size = next(iheader)
350 377 data_uuid = get_data(data_uuid_size)
351 378
352 379 older_data_uuid_count = next(iheader)
353 380 older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
354 381
355 382 sidedata_uuid_size = next(iheader)
356 383 sidedata_uuid = get_data(sidedata_uuid_size)
357 384
358 385 older_sidedata_uuid_count = next(iheader)
359 386 older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
360 387
361 388 index_size = next(iheader)
362 389
363 390 pending_index_size = next(iheader)
364 391
365 392 data_size = next(iheader)
366 393
367 394 pending_data_size = next(iheader)
368 395
369 396 sidedata_size = next(iheader)
370 397
371 398 pending_sidedata_size = next(iheader)
372 399
373 400 default_compression_header = next(iheader)
374 401
375 402 docket = RevlogDocket(
376 403 revlog,
377 404 use_pending=use_pending,
378 405 version_header=version_header,
379 406 index_uuid=index_uuid,
380 407 older_index_uuids=older_index_uuids,
381 408 data_uuid=data_uuid,
382 409 older_data_uuids=older_data_uuids,
383 410 sidedata_uuid=sidedata_uuid,
384 411 older_sidedata_uuids=older_sidedata_uuids,
385 412 index_end=index_size,
386 413 pending_index_end=pending_index_size,
387 414 data_end=data_size,
388 415 pending_data_end=pending_data_size,
389 416 sidedata_end=sidedata_size,
390 417 pending_sidedata_end=pending_sidedata_size,
391 418 default_compression_header=default_compression_header,
392 419 )
393 420 return docket
General Comments 0
You need to be logged in to leave comments. Login now