##// END OF EJS Templates
revlogv2: add a `get_data` helper to grab the next piece of docket...
marmoute -
r48117:53ab13d6 default
parent child Browse files
Show More
@@ -1,276 +1,287 b''
1 1 # docket - code related to revlog "docket"
2 2 #
3 3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 ### Revlog docket file
9 9 #
10 10 # The revlog is stored on disk using multiple files:
11 11 #
12 12 # * a small docket file, containing metadata and a pointer,
13 13 #
14 14 # * an index file, containing fixed width information about revisions,
15 15 #
16 16 # * a data file, containing variable width data for these revisions,
17 17
18 18 from __future__ import absolute_import
19 19
20 20 import errno
21 21 import os
22 22 import random
23 23 import struct
24 24
25 25 from .. import (
26 26 encoding,
27 27 error,
28 28 node,
29 29 pycompat,
30 30 util,
31 31 )
32 32
33 33 from . import (
34 34 constants,
35 35 )
36 36
37 37
38 38 def make_uid(id_size=8):
39 39 """return a new unique identifier.
40 40
41 41 The identifier is random and composed of ascii characters."""
42 42 # size we "hex" the result we need half the number of bits to have a final
43 43 # uuid of size ID_SIZE
44 44 return node.hex(os.urandom(id_size // 2))
45 45
46 46
47 47 # some special test logic to avoid anoying random output in the test
48 48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49 49
50 50 if stable_docket_file:
51 51
52 52 def make_uid(id_size=8):
53 53 try:
54 54 with open(stable_docket_file, mode='rb') as f:
55 55 seed = f.read().strip()
56 56 except IOError as inst:
57 57 if inst.errno != errno.ENOENT:
58 58 raise
59 59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 60 if pycompat.ispy3:
61 61 iter_seed = iter(seed)
62 62 else:
63 63 iter_seed = (ord(c) for c in seed)
64 64 # some basic circular sum hashing on 64 bits
65 65 int_seed = 0
66 66 low_mask = int('1' * 35, 2)
67 67 for i in iter_seed:
68 68 high_part = int_seed >> 35
69 69 low_part = (int_seed & low_mask) << 28
70 70 int_seed = high_part + low_part + i
71 71 r = random.Random()
72 72 if pycompat.ispy3:
73 73 r.seed(int_seed, version=1)
74 74 else:
75 75 r.seed(int_seed)
76 76 # once we drop python 3.8 support we can simply use r.randbytes
77 77 raw = r.getrandbits(id_size * 4)
78 78 assert id_size == 8
79 79 p = struct.pack('>L', raw)
80 80 new = node.hex(p)
81 81 with open(stable_docket_file, 'wb') as f:
82 82 f.write(new)
83 83 return new
84 84
85 85
86 86 # Docket format
87 87 #
88 88 # * 4 bytes: revlog version
89 89 # | This is mandatory as docket must be compatible with the previous
90 90 # | revlog index header.
91 91 # * 1 bytes: size of index uuid
92 92 # * 1 bytes: size of data uuid
93 93 # * 8 bytes: size of index-data
94 94 # * 8 bytes: pending size of index-data
95 95 # * 8 bytes: size of data
96 96 # * 8 bytes: pending size of data
97 97 # * 1 bytes: default compression header
98 98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
99 99
100 100
101 101 class RevlogDocket(object):
102 102 """metadata associated with revlog"""
103 103
104 104 def __init__(
105 105 self,
106 106 revlog,
107 107 use_pending=False,
108 108 version_header=None,
109 109 index_uuid=None,
110 110 data_uuid=None,
111 111 index_end=0,
112 112 pending_index_end=0,
113 113 data_end=0,
114 114 pending_data_end=0,
115 115 default_compression_header=None,
116 116 ):
117 117 self._version_header = version_header
118 118 self._read_only = bool(use_pending)
119 119 self._dirty = False
120 120 self._radix = revlog.radix
121 121 self._path = revlog._docket_file
122 122 self._opener = revlog.opener
123 123 self._index_uuid = index_uuid
124 124 self._data_uuid = data_uuid
125 125 # thes asserts should be True as long as we have a single index filename
126 126 assert index_end <= pending_index_end
127 127 assert data_end <= pending_data_end
128 128 self._initial_index_end = index_end
129 129 self._pending_index_end = pending_index_end
130 130 self._initial_data_end = data_end
131 131 self._pending_data_end = pending_data_end
132 132 if use_pending:
133 133 self._index_end = self._pending_index_end
134 134 self._data_end = self._pending_data_end
135 135 else:
136 136 self._index_end = self._initial_index_end
137 137 self._data_end = self._initial_data_end
138 138 self.default_compression_header = default_compression_header
139 139
140 140 def index_filepath(self):
141 141 """file path to the current index file associated to this docket"""
142 142 # very simplistic version at first
143 143 if self._index_uuid is None:
144 144 self._index_uuid = make_uid()
145 145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
146 146
147 147 def data_filepath(self):
148 148 """file path to the current index file associated to this docket"""
149 149 # very simplistic version at first
150 150 if self._data_uuid is None:
151 151 self._data_uuid = make_uid()
152 152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153 153
154 154 @property
155 155 def index_end(self):
156 156 return self._index_end
157 157
158 158 @index_end.setter
159 159 def index_end(self, new_size):
160 160 if new_size != self._index_end:
161 161 self._index_end = new_size
162 162 self._dirty = True
163 163
164 164 @property
165 165 def data_end(self):
166 166 return self._data_end
167 167
168 168 @data_end.setter
169 169 def data_end(self, new_size):
170 170 if new_size != self._data_end:
171 171 self._data_end = new_size
172 172 self._dirty = True
173 173
174 174 def write(self, transaction, pending=False, stripping=False):
175 175 """write the modification of disk if any
176 176
177 177 This make the new content visible to all process"""
178 178 if not self._dirty:
179 179 return False
180 180 else:
181 181 if self._read_only:
182 182 msg = b'writing read-only docket: %s'
183 183 msg %= self._path
184 184 raise error.ProgrammingError(msg)
185 185 if not stripping:
186 186 # XXX we could, leverage the docket while stripping. However it
187 187 # is not powerfull enough at the time of this comment
188 188 transaction.addbackup(self._path, location=b'store')
189 189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
190 190 f.write(self._serialize(pending=pending))
191 191 # if pending we still need to the write final data eventually
192 192 self._dirty = pending
193 193 return True
194 194
195 195 def _serialize(self, pending=False):
196 196 if pending:
197 197 official_index_end = self._initial_index_end
198 198 official_data_end = self._initial_data_end
199 199 else:
200 200 official_index_end = self._index_end
201 201 official_data_end = self._data_end
202 202
203 203 # this assert should be True as long as we have a single index filename
204 204 assert official_data_end <= self._data_end
205 205 data = (
206 206 self._version_header,
207 207 len(self._index_uuid),
208 208 len(self._data_uuid),
209 209 official_index_end,
210 210 self._index_end,
211 211 official_data_end,
212 212 self._data_end,
213 213 self.default_compression_header,
214 214 )
215 215 s = []
216 216 s.append(S_HEADER.pack(*data))
217 217 s.append(self._index_uuid)
218 218 s.append(self._data_uuid)
219 219 return b''.join(s)
220 220
221 221
222 222 def default_docket(revlog, version_header):
223 223 """given a revlog version a new docket object for the given revlog"""
224 224 rl_version = version_header & 0xFFFF
225 225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
226 226 return None
227 227 comp = util.compengines[revlog._compengine].revlogheader()
228 228 docket = RevlogDocket(
229 229 revlog,
230 230 version_header=version_header,
231 231 default_compression_header=comp,
232 232 )
233 233 docket._dirty = True
234 234 return docket
235 235
236 236
237 237 def parse_docket(revlog, data, use_pending=False):
238 238 """given some docket data return a docket object for the given revlog"""
239 239 header = S_HEADER.unpack(data[: S_HEADER.size])
240 offset = S_HEADER.size
240
241 # this is a mutable closure capture used in `get_data`
242 offset = [S_HEADER.size]
243
244 def get_data(size):
245 """utility closure to access the `size` next bytes"""
246 if offset[0] + size > len(data):
247 # XXX better class
248 msg = b"docket is too short, expected %d got %d"
249 msg %= (offset[0] + size, len(data))
250 raise error.Abort(msg)
251 raw = data[offset[0] : offset[0] + size]
252 offset[0] += size
253 return raw
241 254
242 255 iheader = iter(header)
243 256
244 257 version_header = next(iheader)
245 258
246 259 index_uuid_size = next(iheader)
247 index_uuid = data[offset : offset + index_uuid_size]
248 offset += index_uuid_size
260 index_uuid = get_data(index_uuid_size)
249 261
250 262 data_uuid_size = next(iheader)
251 data_uuid = data[offset : offset + data_uuid_size]
252 offset += data_uuid_size
263 data_uuid = get_data(data_uuid_size)
253 264
254 265 index_size = next(iheader)
255 266
256 267 pending_index_size = next(iheader)
257 268
258 269 data_size = next(iheader)
259 270
260 271 pending_data_size = next(iheader)
261 272
262 273 default_compression_header = next(iheader)
263 274
264 275 docket = RevlogDocket(
265 276 revlog,
266 277 use_pending=use_pending,
267 278 version_header=version_header,
268 279 index_uuid=index_uuid,
269 280 data_uuid=data_uuid,
270 281 index_end=index_size,
271 282 pending_index_end=pending_index_size,
272 283 data_end=data_size,
273 284 pending_data_end=pending_data_size,
274 285 default_compression_header=default_compression_header,
275 286 )
276 287 return docket
General Comments 0
You need to be logged in to leave comments. Login now