##// END OF EJS Templates
revlogv2: add a `get_data` helper to grab the next piece of docket...
marmoute -
r48117:53ab13d6 default
parent child Browse files
Show More
@@ -1,276 +1,287 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import errno
20 import errno
21 import os
21 import os
22 import random
22 import random
23 import struct
23 import struct
24
24
25 from .. import (
25 from .. import (
26 encoding,
26 encoding,
27 error,
27 error,
28 node,
28 node,
29 pycompat,
29 pycompat,
30 util,
30 util,
31 )
31 )
32
32
33 from . import (
33 from . import (
34 constants,
34 constants,
35 )
35 )
36
36
37
37
38 def make_uid(id_size=8):
38 def make_uid(id_size=8):
39 """return a new unique identifier.
39 """return a new unique identifier.
40
40
41 The identifier is random and composed of ascii characters."""
41 The identifier is random and composed of ascii characters."""
42 # size we "hex" the result we need half the number of bits to have a final
42 # size we "hex" the result we need half the number of bits to have a final
43 # uuid of size ID_SIZE
43 # uuid of size ID_SIZE
44 return node.hex(os.urandom(id_size // 2))
44 return node.hex(os.urandom(id_size // 2))
45
45
46
46
47 # some special test logic to avoid anoying random output in the test
47 # some special test logic to avoid anoying random output in the test
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49
49
50 if stable_docket_file:
50 if stable_docket_file:
51
51
52 def make_uid(id_size=8):
52 def make_uid(id_size=8):
53 try:
53 try:
54 with open(stable_docket_file, mode='rb') as f:
54 with open(stable_docket_file, mode='rb') as f:
55 seed = f.read().strip()
55 seed = f.read().strip()
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 if pycompat.ispy3:
60 if pycompat.ispy3:
61 iter_seed = iter(seed)
61 iter_seed = iter(seed)
62 else:
62 else:
63 iter_seed = (ord(c) for c in seed)
63 iter_seed = (ord(c) for c in seed)
64 # some basic circular sum hashing on 64 bits
64 # some basic circular sum hashing on 64 bits
65 int_seed = 0
65 int_seed = 0
66 low_mask = int('1' * 35, 2)
66 low_mask = int('1' * 35, 2)
67 for i in iter_seed:
67 for i in iter_seed:
68 high_part = int_seed >> 35
68 high_part = int_seed >> 35
69 low_part = (int_seed & low_mask) << 28
69 low_part = (int_seed & low_mask) << 28
70 int_seed = high_part + low_part + i
70 int_seed = high_part + low_part + i
71 r = random.Random()
71 r = random.Random()
72 if pycompat.ispy3:
72 if pycompat.ispy3:
73 r.seed(int_seed, version=1)
73 r.seed(int_seed, version=1)
74 else:
74 else:
75 r.seed(int_seed)
75 r.seed(int_seed)
76 # once we drop python 3.8 support we can simply use r.randbytes
76 # once we drop python 3.8 support we can simply use r.randbytes
77 raw = r.getrandbits(id_size * 4)
77 raw = r.getrandbits(id_size * 4)
78 assert id_size == 8
78 assert id_size == 8
79 p = struct.pack('>L', raw)
79 p = struct.pack('>L', raw)
80 new = node.hex(p)
80 new = node.hex(p)
81 with open(stable_docket_file, 'wb') as f:
81 with open(stable_docket_file, 'wb') as f:
82 f.write(new)
82 f.write(new)
83 return new
83 return new
84
84
85
85
86 # Docket format
86 # Docket format
87 #
87 #
88 # * 4 bytes: revlog version
88 # * 4 bytes: revlog version
89 # | This is mandatory as docket must be compatible with the previous
89 # | This is mandatory as docket must be compatible with the previous
90 # | revlog index header.
90 # | revlog index header.
91 # * 1 bytes: size of index uuid
91 # * 1 bytes: size of index uuid
92 # * 1 bytes: size of data uuid
92 # * 1 bytes: size of data uuid
93 # * 8 bytes: size of index-data
93 # * 8 bytes: size of index-data
94 # * 8 bytes: pending size of index-data
94 # * 8 bytes: pending size of index-data
95 # * 8 bytes: size of data
95 # * 8 bytes: size of data
96 # * 8 bytes: pending size of data
96 # * 8 bytes: pending size of data
97 # * 1 bytes: default compression header
97 # * 1 bytes: default compression header
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
99
99
100
100
101 class RevlogDocket(object):
101 class RevlogDocket(object):
102 """metadata associated with revlog"""
102 """metadata associated with revlog"""
103
103
104 def __init__(
104 def __init__(
105 self,
105 self,
106 revlog,
106 revlog,
107 use_pending=False,
107 use_pending=False,
108 version_header=None,
108 version_header=None,
109 index_uuid=None,
109 index_uuid=None,
110 data_uuid=None,
110 data_uuid=None,
111 index_end=0,
111 index_end=0,
112 pending_index_end=0,
112 pending_index_end=0,
113 data_end=0,
113 data_end=0,
114 pending_data_end=0,
114 pending_data_end=0,
115 default_compression_header=None,
115 default_compression_header=None,
116 ):
116 ):
117 self._version_header = version_header
117 self._version_header = version_header
118 self._read_only = bool(use_pending)
118 self._read_only = bool(use_pending)
119 self._dirty = False
119 self._dirty = False
120 self._radix = revlog.radix
120 self._radix = revlog.radix
121 self._path = revlog._docket_file
121 self._path = revlog._docket_file
122 self._opener = revlog.opener
122 self._opener = revlog.opener
123 self._index_uuid = index_uuid
123 self._index_uuid = index_uuid
124 self._data_uuid = data_uuid
124 self._data_uuid = data_uuid
125 # thes asserts should be True as long as we have a single index filename
125 # thes asserts should be True as long as we have a single index filename
126 assert index_end <= pending_index_end
126 assert index_end <= pending_index_end
127 assert data_end <= pending_data_end
127 assert data_end <= pending_data_end
128 self._initial_index_end = index_end
128 self._initial_index_end = index_end
129 self._pending_index_end = pending_index_end
129 self._pending_index_end = pending_index_end
130 self._initial_data_end = data_end
130 self._initial_data_end = data_end
131 self._pending_data_end = pending_data_end
131 self._pending_data_end = pending_data_end
132 if use_pending:
132 if use_pending:
133 self._index_end = self._pending_index_end
133 self._index_end = self._pending_index_end
134 self._data_end = self._pending_data_end
134 self._data_end = self._pending_data_end
135 else:
135 else:
136 self._index_end = self._initial_index_end
136 self._index_end = self._initial_index_end
137 self._data_end = self._initial_data_end
137 self._data_end = self._initial_data_end
138 self.default_compression_header = default_compression_header
138 self.default_compression_header = default_compression_header
139
139
140 def index_filepath(self):
140 def index_filepath(self):
141 """file path to the current index file associated to this docket"""
141 """file path to the current index file associated to this docket"""
142 # very simplistic version at first
142 # very simplistic version at first
143 if self._index_uuid is None:
143 if self._index_uuid is None:
144 self._index_uuid = make_uid()
144 self._index_uuid = make_uid()
145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
146
146
147 def data_filepath(self):
147 def data_filepath(self):
148 """file path to the current index file associated to this docket"""
148 """file path to the current index file associated to this docket"""
149 # very simplistic version at first
149 # very simplistic version at first
150 if self._data_uuid is None:
150 if self._data_uuid is None:
151 self._data_uuid = make_uid()
151 self._data_uuid = make_uid()
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153
153
154 @property
154 @property
155 def index_end(self):
155 def index_end(self):
156 return self._index_end
156 return self._index_end
157
157
158 @index_end.setter
158 @index_end.setter
159 def index_end(self, new_size):
159 def index_end(self, new_size):
160 if new_size != self._index_end:
160 if new_size != self._index_end:
161 self._index_end = new_size
161 self._index_end = new_size
162 self._dirty = True
162 self._dirty = True
163
163
164 @property
164 @property
165 def data_end(self):
165 def data_end(self):
166 return self._data_end
166 return self._data_end
167
167
168 @data_end.setter
168 @data_end.setter
169 def data_end(self, new_size):
169 def data_end(self, new_size):
170 if new_size != self._data_end:
170 if new_size != self._data_end:
171 self._data_end = new_size
171 self._data_end = new_size
172 self._dirty = True
172 self._dirty = True
173
173
174 def write(self, transaction, pending=False, stripping=False):
174 def write(self, transaction, pending=False, stripping=False):
175 """write the modification of disk if any
175 """write the modification of disk if any
176
176
177 This make the new content visible to all process"""
177 This make the new content visible to all process"""
178 if not self._dirty:
178 if not self._dirty:
179 return False
179 return False
180 else:
180 else:
181 if self._read_only:
181 if self._read_only:
182 msg = b'writing read-only docket: %s'
182 msg = b'writing read-only docket: %s'
183 msg %= self._path
183 msg %= self._path
184 raise error.ProgrammingError(msg)
184 raise error.ProgrammingError(msg)
185 if not stripping:
185 if not stripping:
186 # XXX we could, leverage the docket while stripping. However it
186 # XXX we could, leverage the docket while stripping. However it
187 # is not powerfull enough at the time of this comment
187 # is not powerfull enough at the time of this comment
188 transaction.addbackup(self._path, location=b'store')
188 transaction.addbackup(self._path, location=b'store')
189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
190 f.write(self._serialize(pending=pending))
190 f.write(self._serialize(pending=pending))
191 # if pending we still need to the write final data eventually
191 # if pending we still need to the write final data eventually
192 self._dirty = pending
192 self._dirty = pending
193 return True
193 return True
194
194
195 def _serialize(self, pending=False):
195 def _serialize(self, pending=False):
196 if pending:
196 if pending:
197 official_index_end = self._initial_index_end
197 official_index_end = self._initial_index_end
198 official_data_end = self._initial_data_end
198 official_data_end = self._initial_data_end
199 else:
199 else:
200 official_index_end = self._index_end
200 official_index_end = self._index_end
201 official_data_end = self._data_end
201 official_data_end = self._data_end
202
202
203 # this assert should be True as long as we have a single index filename
203 # this assert should be True as long as we have a single index filename
204 assert official_data_end <= self._data_end
204 assert official_data_end <= self._data_end
205 data = (
205 data = (
206 self._version_header,
206 self._version_header,
207 len(self._index_uuid),
207 len(self._index_uuid),
208 len(self._data_uuid),
208 len(self._data_uuid),
209 official_index_end,
209 official_index_end,
210 self._index_end,
210 self._index_end,
211 official_data_end,
211 official_data_end,
212 self._data_end,
212 self._data_end,
213 self.default_compression_header,
213 self.default_compression_header,
214 )
214 )
215 s = []
215 s = []
216 s.append(S_HEADER.pack(*data))
216 s.append(S_HEADER.pack(*data))
217 s.append(self._index_uuid)
217 s.append(self._index_uuid)
218 s.append(self._data_uuid)
218 s.append(self._data_uuid)
219 return b''.join(s)
219 return b''.join(s)
220
220
221
221
222 def default_docket(revlog, version_header):
222 def default_docket(revlog, version_header):
223 """given a revlog version a new docket object for the given revlog"""
223 """given a revlog version a new docket object for the given revlog"""
224 rl_version = version_header & 0xFFFF
224 rl_version = version_header & 0xFFFF
225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
226 return None
226 return None
227 comp = util.compengines[revlog._compengine].revlogheader()
227 comp = util.compengines[revlog._compengine].revlogheader()
228 docket = RevlogDocket(
228 docket = RevlogDocket(
229 revlog,
229 revlog,
230 version_header=version_header,
230 version_header=version_header,
231 default_compression_header=comp,
231 default_compression_header=comp,
232 )
232 )
233 docket._dirty = True
233 docket._dirty = True
234 return docket
234 return docket
235
235
236
236
237 def parse_docket(revlog, data, use_pending=False):
237 def parse_docket(revlog, data, use_pending=False):
238 """given some docket data return a docket object for the given revlog"""
238 """given some docket data return a docket object for the given revlog"""
239 header = S_HEADER.unpack(data[: S_HEADER.size])
239 header = S_HEADER.unpack(data[: S_HEADER.size])
240 offset = S_HEADER.size
240
241 # this is a mutable closure capture used in `get_data`
242 offset = [S_HEADER.size]
243
244 def get_data(size):
245 """utility closure to access the `size` next bytes"""
246 if offset[0] + size > len(data):
247 # XXX better class
248 msg = b"docket is too short, expected %d got %d"
249 msg %= (offset[0] + size, len(data))
250 raise error.Abort(msg)
251 raw = data[offset[0] : offset[0] + size]
252 offset[0] += size
253 return raw
241
254
242 iheader = iter(header)
255 iheader = iter(header)
243
256
244 version_header = next(iheader)
257 version_header = next(iheader)
245
258
246 index_uuid_size = next(iheader)
259 index_uuid_size = next(iheader)
247 index_uuid = data[offset : offset + index_uuid_size]
260 index_uuid = get_data(index_uuid_size)
248 offset += index_uuid_size
249
261
250 data_uuid_size = next(iheader)
262 data_uuid_size = next(iheader)
251 data_uuid = data[offset : offset + data_uuid_size]
263 data_uuid = get_data(data_uuid_size)
252 offset += data_uuid_size
253
264
254 index_size = next(iheader)
265 index_size = next(iheader)
255
266
256 pending_index_size = next(iheader)
267 pending_index_size = next(iheader)
257
268
258 data_size = next(iheader)
269 data_size = next(iheader)
259
270
260 pending_data_size = next(iheader)
271 pending_data_size = next(iheader)
261
272
262 default_compression_header = next(iheader)
273 default_compression_header = next(iheader)
263
274
264 docket = RevlogDocket(
275 docket = RevlogDocket(
265 revlog,
276 revlog,
266 use_pending=use_pending,
277 use_pending=use_pending,
267 version_header=version_header,
278 version_header=version_header,
268 index_uuid=index_uuid,
279 index_uuid=index_uuid,
269 data_uuid=data_uuid,
280 data_uuid=data_uuid,
270 index_end=index_size,
281 index_end=index_size,
271 pending_index_end=pending_index_size,
282 pending_index_end=pending_index_size,
272 data_end=data_size,
283 data_end=data_size,
273 pending_data_end=pending_data_size,
284 pending_data_end=pending_data_size,
274 default_compression_header=default_compression_header,
285 default_compression_header=default_compression_header,
275 )
286 )
276 return docket
287 return docket
General Comments 0
You need to be logged in to leave comments. Login now