##// END OF EJS Templates
revlog: add a way to keep track of older uids in the docket...
marmoute -
r48246:c2526315 default
parent child Browse files
Show More
@@ -1,335 +1,393
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import errno
20 import errno
21 import os
21 import os
22 import random
22 import random
23 import struct
23 import struct
24
24
25 from .. import (
25 from .. import (
26 encoding,
26 encoding,
27 error,
27 error,
28 node,
28 node,
29 pycompat,
29 pycompat,
30 util,
30 util,
31 )
31 )
32
32
33 from . import (
33 from . import (
34 constants,
34 constants,
35 )
35 )
36
36
37
37
38 def make_uid(id_size=8):
38 def make_uid(id_size=8):
39 """return a new unique identifier.
39 """return a new unique identifier.
40
40
41 The identifier is random and composed of ascii characters."""
41 The identifier is random and composed of ascii characters."""
42 # size we "hex" the result we need half the number of bits to have a final
42 # size we "hex" the result we need half the number of bits to have a final
43 # uuid of size ID_SIZE
43 # uuid of size ID_SIZE
44 return node.hex(os.urandom(id_size // 2))
44 return node.hex(os.urandom(id_size // 2))
45
45
46
46
47 # some special test logic to avoid anoying random output in the test
47 # some special test logic to avoid anoying random output in the test
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49
49
50 if stable_docket_file:
50 if stable_docket_file:
51
51
52 def make_uid(id_size=8):
52 def make_uid(id_size=8):
53 try:
53 try:
54 with open(stable_docket_file, mode='rb') as f:
54 with open(stable_docket_file, mode='rb') as f:
55 seed = f.read().strip()
55 seed = f.read().strip()
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 if pycompat.ispy3:
60 if pycompat.ispy3:
61 iter_seed = iter(seed)
61 iter_seed = iter(seed)
62 else:
62 else:
63 # pytype: disable=wrong-arg-types
63 # pytype: disable=wrong-arg-types
64 iter_seed = (ord(c) for c in seed)
64 iter_seed = (ord(c) for c in seed)
65 # pytype: enable=wrong-arg-types
65 # pytype: enable=wrong-arg-types
66 # some basic circular sum hashing on 64 bits
66 # some basic circular sum hashing on 64 bits
67 int_seed = 0
67 int_seed = 0
68 low_mask = int('1' * 35, 2)
68 low_mask = int('1' * 35, 2)
69 for i in iter_seed:
69 for i in iter_seed:
70 high_part = int_seed >> 35
70 high_part = int_seed >> 35
71 low_part = (int_seed & low_mask) << 28
71 low_part = (int_seed & low_mask) << 28
72 int_seed = high_part + low_part + i
72 int_seed = high_part + low_part + i
73 r = random.Random()
73 r = random.Random()
74 if pycompat.ispy3:
74 if pycompat.ispy3:
75 r.seed(int_seed, version=1)
75 r.seed(int_seed, version=1)
76 else:
76 else:
77 r.seed(int_seed)
77 r.seed(int_seed)
78 # once we drop python 3.8 support we can simply use r.randbytes
78 # once we drop python 3.8 support we can simply use r.randbytes
79 raw = r.getrandbits(id_size * 4)
79 raw = r.getrandbits(id_size * 4)
80 assert id_size == 8
80 assert id_size == 8
81 p = struct.pack('>L', raw)
81 p = struct.pack('>L', raw)
82 new = node.hex(p)
82 new = node.hex(p)
83 with open(stable_docket_file, 'wb') as f:
83 with open(stable_docket_file, 'wb') as f:
84 f.write(new)
84 f.write(new)
85 return new
85 return new
86
86
87
87
88 # Docket format
88 # Docket format
89 #
89 #
90 # * 4 bytes: revlog version
90 # * 4 bytes: revlog version
91 # | This is mandatory as docket must be compatible with the previous
91 # | This is mandatory as docket must be compatible with the previous
92 # | revlog index header.
92 # | revlog index header.
93 # * 1 bytes: size of index uuid
93 # * 1 bytes: size of index uuid
94 # * 1 bytes: number of outdated index uuid
94 # * 1 bytes: size of data uuid
95 # * 1 bytes: size of data uuid
96 # * 1 bytes: number of outdated data uuid
95 # * 1 bytes: size of sizedata uuid
97 # * 1 bytes: size of sizedata uuid
98 # * 1 bytes: number of outdated data uuid
96 # * 8 bytes: size of index-data
99 # * 8 bytes: size of index-data
97 # * 8 bytes: pending size of index-data
100 # * 8 bytes: pending size of index-data
98 # * 8 bytes: size of data
101 # * 8 bytes: size of data
99 # * 8 bytes: size of sidedata
102 # * 8 bytes: size of sidedata
100 # * 8 bytes: pending size of data
103 # * 8 bytes: pending size of data
101 # * 8 bytes: pending size of sidedata
104 # * 8 bytes: pending size of sidedata
102 # * 1 bytes: default compression header
105 # * 1 bytes: default compression header
103 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBLLLLLLc')
106 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBBBBLLLLLLc')
107 # * 1 bytes: size of index uuid
108 # * 8 bytes: size of file
109 S_OLD_UID = struct.Struct('>BL')
104
110
105
111
106 class RevlogDocket(object):
112 class RevlogDocket(object):
107 """metadata associated with revlog"""
113 """metadata associated with revlog"""
108
114
109 def __init__(
115 def __init__(
110 self,
116 self,
111 revlog,
117 revlog,
112 use_pending=False,
118 use_pending=False,
113 version_header=None,
119 version_header=None,
114 index_uuid=None,
120 index_uuid=None,
121 older_index_uuids=(),
115 data_uuid=None,
122 data_uuid=None,
123 older_data_uuids=(),
116 sidedata_uuid=None,
124 sidedata_uuid=None,
125 older_sidedata_uuids=(),
117 index_end=0,
126 index_end=0,
118 pending_index_end=0,
127 pending_index_end=0,
119 data_end=0,
128 data_end=0,
120 pending_data_end=0,
129 pending_data_end=0,
121 sidedata_end=0,
130 sidedata_end=0,
122 pending_sidedata_end=0,
131 pending_sidedata_end=0,
123 default_compression_header=None,
132 default_compression_header=None,
124 ):
133 ):
125 self._version_header = version_header
134 self._version_header = version_header
126 self._read_only = bool(use_pending)
135 self._read_only = bool(use_pending)
127 self._dirty = False
136 self._dirty = False
128 self._radix = revlog.radix
137 self._radix = revlog.radix
129 self._path = revlog._docket_file
138 self._path = revlog._docket_file
130 self._opener = revlog.opener
139 self._opener = revlog.opener
131 self._index_uuid = index_uuid
140 self._index_uuid = index_uuid
141 self._older_index_uuids = older_index_uuids
132 self._data_uuid = data_uuid
142 self._data_uuid = data_uuid
143 self._older_data_uuids = older_data_uuids
133 self._sidedata_uuid = sidedata_uuid
144 self._sidedata_uuid = sidedata_uuid
145 self._older_sidedata_uuids = older_sidedata_uuids
146 assert not set(older_index_uuids) & set(older_data_uuids)
147 assert not set(older_data_uuids) & set(older_sidedata_uuids)
148 assert not set(older_index_uuids) & set(older_sidedata_uuids)
134 # thes asserts should be True as long as we have a single index filename
149 # thes asserts should be True as long as we have a single index filename
135 assert index_end <= pending_index_end
150 assert index_end <= pending_index_end
136 assert data_end <= pending_data_end
151 assert data_end <= pending_data_end
137 assert sidedata_end <= pending_sidedata_end
152 assert sidedata_end <= pending_sidedata_end
138 self._initial_index_end = index_end
153 self._initial_index_end = index_end
139 self._pending_index_end = pending_index_end
154 self._pending_index_end = pending_index_end
140 self._initial_data_end = data_end
155 self._initial_data_end = data_end
141 self._pending_data_end = pending_data_end
156 self._pending_data_end = pending_data_end
142 self._initial_sidedata_end = sidedata_end
157 self._initial_sidedata_end = sidedata_end
143 self._pending_sidedata_end = pending_sidedata_end
158 self._pending_sidedata_end = pending_sidedata_end
144 if use_pending:
159 if use_pending:
145 self._index_end = self._pending_index_end
160 self._index_end = self._pending_index_end
146 self._data_end = self._pending_data_end
161 self._data_end = self._pending_data_end
147 self._sidedata_end = self._pending_sidedata_end
162 self._sidedata_end = self._pending_sidedata_end
148 else:
163 else:
149 self._index_end = self._initial_index_end
164 self._index_end = self._initial_index_end
150 self._data_end = self._initial_data_end
165 self._data_end = self._initial_data_end
151 self._sidedata_end = self._initial_sidedata_end
166 self._sidedata_end = self._initial_sidedata_end
152 self.default_compression_header = default_compression_header
167 self.default_compression_header = default_compression_header
153
168
154 def index_filepath(self):
169 def index_filepath(self):
155 """file path to the current index file associated to this docket"""
170 """file path to the current index file associated to this docket"""
156 # very simplistic version at first
171 # very simplistic version at first
157 if self._index_uuid is None:
172 if self._index_uuid is None:
158 self._index_uuid = make_uid()
173 self._index_uuid = make_uid()
159 return b"%s-%s.idx" % (self._radix, self._index_uuid)
174 return b"%s-%s.idx" % (self._radix, self._index_uuid)
160
175
161 def data_filepath(self):
176 def data_filepath(self):
162 """file path to the current data file associated to this docket"""
177 """file path to the current data file associated to this docket"""
163 # very simplistic version at first
178 # very simplistic version at first
164 if self._data_uuid is None:
179 if self._data_uuid is None:
165 self._data_uuid = make_uid()
180 self._data_uuid = make_uid()
166 return b"%s-%s.dat" % (self._radix, self._data_uuid)
181 return b"%s-%s.dat" % (self._radix, self._data_uuid)
167
182
168 def sidedata_filepath(self):
183 def sidedata_filepath(self):
169 """file path to the current sidedata file associated to this docket"""
184 """file path to the current sidedata file associated to this docket"""
170 # very simplistic version at first
185 # very simplistic version at first
171 if self._sidedata_uuid is None:
186 if self._sidedata_uuid is None:
172 self._sidedata_uuid = make_uid()
187 self._sidedata_uuid = make_uid()
173 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
188 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
174
189
175 @property
190 @property
176 def index_end(self):
191 def index_end(self):
177 return self._index_end
192 return self._index_end
178
193
179 @index_end.setter
194 @index_end.setter
180 def index_end(self, new_size):
195 def index_end(self, new_size):
181 if new_size != self._index_end:
196 if new_size != self._index_end:
182 self._index_end = new_size
197 self._index_end = new_size
183 self._dirty = True
198 self._dirty = True
184
199
185 @property
200 @property
186 def data_end(self):
201 def data_end(self):
187 return self._data_end
202 return self._data_end
188
203
189 @data_end.setter
204 @data_end.setter
190 def data_end(self, new_size):
205 def data_end(self, new_size):
191 if new_size != self._data_end:
206 if new_size != self._data_end:
192 self._data_end = new_size
207 self._data_end = new_size
193 self._dirty = True
208 self._dirty = True
194
209
195 @property
210 @property
196 def sidedata_end(self):
211 def sidedata_end(self):
197 return self._sidedata_end
212 return self._sidedata_end
198
213
199 @sidedata_end.setter
214 @sidedata_end.setter
200 def sidedata_end(self, new_size):
215 def sidedata_end(self, new_size):
201 if new_size != self._sidedata_end:
216 if new_size != self._sidedata_end:
202 self._sidedata_end = new_size
217 self._sidedata_end = new_size
203 self._dirty = True
218 self._dirty = True
204
219
205 def write(self, transaction, pending=False, stripping=False):
220 def write(self, transaction, pending=False, stripping=False):
206 """write the modification of disk if any
221 """write the modification of disk if any
207
222
208 This make the new content visible to all process"""
223 This make the new content visible to all process"""
209 if not self._dirty:
224 if not self._dirty:
210 return False
225 return False
211 else:
226 else:
212 if self._read_only:
227 if self._read_only:
213 msg = b'writing read-only docket: %s'
228 msg = b'writing read-only docket: %s'
214 msg %= self._path
229 msg %= self._path
215 raise error.ProgrammingError(msg)
230 raise error.ProgrammingError(msg)
216 if not stripping:
231 if not stripping:
217 # XXX we could, leverage the docket while stripping. However it
232 # XXX we could, leverage the docket while stripping. However it
218 # is not powerfull enough at the time of this comment
233 # is not powerfull enough at the time of this comment
219 transaction.addbackup(self._path, location=b'store')
234 transaction.addbackup(self._path, location=b'store')
220 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
235 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
221 f.write(self._serialize(pending=pending))
236 f.write(self._serialize(pending=pending))
222 # if pending we still need to the write final data eventually
237 # if pending we still need to the write final data eventually
223 self._dirty = pending
238 self._dirty = pending
224 return True
239 return True
225
240
226 def _serialize(self, pending=False):
241 def _serialize(self, pending=False):
227 if pending:
242 if pending:
228 official_index_end = self._initial_index_end
243 official_index_end = self._initial_index_end
229 official_data_end = self._initial_data_end
244 official_data_end = self._initial_data_end
230 official_sidedata_end = self._initial_sidedata_end
245 official_sidedata_end = self._initial_sidedata_end
231 else:
246 else:
232 official_index_end = self._index_end
247 official_index_end = self._index_end
233 official_data_end = self._data_end
248 official_data_end = self._data_end
234 official_sidedata_end = self._sidedata_end
249 official_sidedata_end = self._sidedata_end
235
250
236 # this assert should be True as long as we have a single index filename
251 # this assert should be True as long as we have a single index filename
237 assert official_data_end <= self._data_end
252 assert official_data_end <= self._data_end
238 assert official_sidedata_end <= self._sidedata_end
253 assert official_sidedata_end <= self._sidedata_end
239 data = (
254 data = (
240 self._version_header,
255 self._version_header,
241 len(self._index_uuid),
256 len(self._index_uuid),
257 len(self._older_index_uuids),
242 len(self._data_uuid),
258 len(self._data_uuid),
259 len(self._older_data_uuids),
243 len(self._sidedata_uuid),
260 len(self._sidedata_uuid),
261 len(self._older_sidedata_uuids),
244 official_index_end,
262 official_index_end,
245 self._index_end,
263 self._index_end,
246 official_data_end,
264 official_data_end,
247 self._data_end,
265 self._data_end,
248 official_sidedata_end,
266 official_sidedata_end,
249 self._sidedata_end,
267 self._sidedata_end,
250 self.default_compression_header,
268 self.default_compression_header,
251 )
269 )
252 s = []
270 s = []
253 s.append(S_HEADER.pack(*data))
271 s.append(S_HEADER.pack(*data))
272
254 s.append(self._index_uuid)
273 s.append(self._index_uuid)
274 for u, size in self._older_index_uuids:
275 s.append(S_OLD_UID.pack(len(u), size))
276 for u, size in self._older_index_uuids:
277 s.append(u)
278
255 s.append(self._data_uuid)
279 s.append(self._data_uuid)
280 for u, size in self._older_data_uuids:
281 s.append(S_OLD_UID.pack(len(u), size))
282 for u, size in self._older_data_uuids:
283 s.append(u)
284
256 s.append(self._sidedata_uuid)
285 s.append(self._sidedata_uuid)
286 for u, size in self._older_sidedata_uuids:
287 s.append(S_OLD_UID.pack(len(u), size))
288 for u, size in self._older_sidedata_uuids:
289 s.append(u)
257 return b''.join(s)
290 return b''.join(s)
258
291
259
292
260 def default_docket(revlog, version_header):
293 def default_docket(revlog, version_header):
261 """given a revlog version a new docket object for the given revlog"""
294 """given a revlog version a new docket object for the given revlog"""
262 rl_version = version_header & 0xFFFF
295 rl_version = version_header & 0xFFFF
263 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
296 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
264 return None
297 return None
265 comp = util.compengines[revlog._compengine].revlogheader()
298 comp = util.compengines[revlog._compengine].revlogheader()
266 docket = RevlogDocket(
299 docket = RevlogDocket(
267 revlog,
300 revlog,
268 version_header=version_header,
301 version_header=version_header,
269 default_compression_header=comp,
302 default_compression_header=comp,
270 )
303 )
271 docket._dirty = True
304 docket._dirty = True
272 return docket
305 return docket
273
306
274
307
308 def _parse_old_uids(get_data, count):
309 all_sizes = []
310 all_uids = []
311 for i in range(0, count):
312 raw = get_data(S_OLD_UID.size)
313 all_sizes.append(S_OLD_UID.unpack(raw))
314
315 for uid_size, file_size in all_sizes:
316 uid = get_data(uid_size)
317 all_uids.append((uid, file_size))
318 return all_uids
319
320
275 def parse_docket(revlog, data, use_pending=False):
321 def parse_docket(revlog, data, use_pending=False):
276 """given some docket data return a docket object for the given revlog"""
322 """given some docket data return a docket object for the given revlog"""
277 header = S_HEADER.unpack(data[: S_HEADER.size])
323 header = S_HEADER.unpack(data[: S_HEADER.size])
278
324
279 # this is a mutable closure capture used in `get_data`
325 # this is a mutable closure capture used in `get_data`
280 offset = [S_HEADER.size]
326 offset = [S_HEADER.size]
281
327
282 def get_data(size):
328 def get_data(size):
283 """utility closure to access the `size` next bytes"""
329 """utility closure to access the `size` next bytes"""
284 if offset[0] + size > len(data):
330 if offset[0] + size > len(data):
285 # XXX better class
331 # XXX better class
286 msg = b"docket is too short, expected %d got %d"
332 msg = b"docket is too short, expected %d got %d"
287 msg %= (offset[0] + size, len(data))
333 msg %= (offset[0] + size, len(data))
288 raise error.Abort(msg)
334 raise error.Abort(msg)
289 raw = data[offset[0] : offset[0] + size]
335 raw = data[offset[0] : offset[0] + size]
290 offset[0] += size
336 offset[0] += size
291 return raw
337 return raw
292
338
293 iheader = iter(header)
339 iheader = iter(header)
294
340
295 version_header = next(iheader)
341 version_header = next(iheader)
296
342
297 index_uuid_size = next(iheader)
343 index_uuid_size = next(iheader)
298 index_uuid = get_data(index_uuid_size)
344 index_uuid = get_data(index_uuid_size)
299
345
346 older_index_uuid_count = next(iheader)
347 older_index_uuids = _parse_old_uids(get_data, older_index_uuid_count)
348
300 data_uuid_size = next(iheader)
349 data_uuid_size = next(iheader)
301 data_uuid = get_data(data_uuid_size)
350 data_uuid = get_data(data_uuid_size)
302
351
352 older_data_uuid_count = next(iheader)
353 older_data_uuids = _parse_old_uids(get_data, older_data_uuid_count)
354
303 sidedata_uuid_size = next(iheader)
355 sidedata_uuid_size = next(iheader)
304 sidedata_uuid = get_data(sidedata_uuid_size)
356 sidedata_uuid = get_data(sidedata_uuid_size)
305
357
358 older_sidedata_uuid_count = next(iheader)
359 older_sidedata_uuids = _parse_old_uids(get_data, older_sidedata_uuid_count)
360
306 index_size = next(iheader)
361 index_size = next(iheader)
307
362
308 pending_index_size = next(iheader)
363 pending_index_size = next(iheader)
309
364
310 data_size = next(iheader)
365 data_size = next(iheader)
311
366
312 pending_data_size = next(iheader)
367 pending_data_size = next(iheader)
313
368
314 sidedata_size = next(iheader)
369 sidedata_size = next(iheader)
315
370
316 pending_sidedata_size = next(iheader)
371 pending_sidedata_size = next(iheader)
317
372
318 default_compression_header = next(iheader)
373 default_compression_header = next(iheader)
319
374
320 docket = RevlogDocket(
375 docket = RevlogDocket(
321 revlog,
376 revlog,
322 use_pending=use_pending,
377 use_pending=use_pending,
323 version_header=version_header,
378 version_header=version_header,
324 index_uuid=index_uuid,
379 index_uuid=index_uuid,
380 older_index_uuids=older_index_uuids,
325 data_uuid=data_uuid,
381 data_uuid=data_uuid,
382 older_data_uuids=older_data_uuids,
326 sidedata_uuid=sidedata_uuid,
383 sidedata_uuid=sidedata_uuid,
384 older_sidedata_uuids=older_sidedata_uuids,
327 index_end=index_size,
385 index_end=index_size,
328 pending_index_end=pending_index_size,
386 pending_index_end=pending_index_size,
329 data_end=data_size,
387 data_end=data_size,
330 pending_data_end=pending_data_size,
388 pending_data_end=pending_data_size,
331 sidedata_end=sidedata_size,
389 sidedata_end=sidedata_size,
332 pending_sidedata_end=pending_sidedata_size,
390 pending_sidedata_end=pending_sidedata_size,
333 default_compression_header=default_compression_header,
391 default_compression_header=default_compression_header,
334 )
392 )
335 return docket
393 return docket
General Comments 0
You need to be logged in to leave comments. Login now