##// END OF EJS Templates
revlogv2: simplify and clarify the processing of each entry...
marmoute -
r48116:f286d715 default
parent child Browse files
Show More
@@ -1,265 +1,276 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import errno
20 import errno
21 import os
21 import os
22 import random
22 import random
23 import struct
23 import struct
24
24
25 from .. import (
25 from .. import (
26 encoding,
26 encoding,
27 error,
27 error,
28 node,
28 node,
29 pycompat,
29 pycompat,
30 util,
30 util,
31 )
31 )
32
32
33 from . import (
33 from . import (
34 constants,
34 constants,
35 )
35 )
36
36
37
37
38 def make_uid(id_size=8):
38 def make_uid(id_size=8):
39 """return a new unique identifier.
39 """return a new unique identifier.
40
40
41 The identifier is random and composed of ascii characters."""
41 The identifier is random and composed of ascii characters."""
42 # size we "hex" the result we need half the number of bits to have a final
42 # size we "hex" the result we need half the number of bits to have a final
43 # uuid of size ID_SIZE
43 # uuid of size ID_SIZE
44 return node.hex(os.urandom(id_size // 2))
44 return node.hex(os.urandom(id_size // 2))
45
45
46
46
47 # some special test logic to avoid anoying random output in the test
47 # some special test logic to avoid anoying random output in the test
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49
49
50 if stable_docket_file:
50 if stable_docket_file:
51
51
52 def make_uid(id_size=8):
52 def make_uid(id_size=8):
53 try:
53 try:
54 with open(stable_docket_file, mode='rb') as f:
54 with open(stable_docket_file, mode='rb') as f:
55 seed = f.read().strip()
55 seed = f.read().strip()
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 if pycompat.ispy3:
60 if pycompat.ispy3:
61 iter_seed = iter(seed)
61 iter_seed = iter(seed)
62 else:
62 else:
63 iter_seed = (ord(c) for c in seed)
63 iter_seed = (ord(c) for c in seed)
64 # some basic circular sum hashing on 64 bits
64 # some basic circular sum hashing on 64 bits
65 int_seed = 0
65 int_seed = 0
66 low_mask = int('1' * 35, 2)
66 low_mask = int('1' * 35, 2)
67 for i in iter_seed:
67 for i in iter_seed:
68 high_part = int_seed >> 35
68 high_part = int_seed >> 35
69 low_part = (int_seed & low_mask) << 28
69 low_part = (int_seed & low_mask) << 28
70 int_seed = high_part + low_part + i
70 int_seed = high_part + low_part + i
71 r = random.Random()
71 r = random.Random()
72 if pycompat.ispy3:
72 if pycompat.ispy3:
73 r.seed(int_seed, version=1)
73 r.seed(int_seed, version=1)
74 else:
74 else:
75 r.seed(int_seed)
75 r.seed(int_seed)
76 # once we drop python 3.8 support we can simply use r.randbytes
76 # once we drop python 3.8 support we can simply use r.randbytes
77 raw = r.getrandbits(id_size * 4)
77 raw = r.getrandbits(id_size * 4)
78 assert id_size == 8
78 assert id_size == 8
79 p = struct.pack('>L', raw)
79 p = struct.pack('>L', raw)
80 new = node.hex(p)
80 new = node.hex(p)
81 with open(stable_docket_file, 'wb') as f:
81 with open(stable_docket_file, 'wb') as f:
82 f.write(new)
82 f.write(new)
83 return new
83 return new
84
84
85
85
86 # Docket format
86 # Docket format
87 #
87 #
88 # * 4 bytes: revlog version
88 # * 4 bytes: revlog version
89 # | This is mandatory as docket must be compatible with the previous
89 # | This is mandatory as docket must be compatible with the previous
90 # | revlog index header.
90 # | revlog index header.
91 # * 1 bytes: size of index uuid
91 # * 1 bytes: size of index uuid
92 # * 1 bytes: size of data uuid
92 # * 1 bytes: size of data uuid
93 # * 8 bytes: size of index-data
93 # * 8 bytes: size of index-data
94 # * 8 bytes: pending size of index-data
94 # * 8 bytes: pending size of index-data
95 # * 8 bytes: size of data
95 # * 8 bytes: size of data
96 # * 8 bytes: pending size of data
96 # * 8 bytes: pending size of data
97 # * 1 bytes: default compression header
97 # * 1 bytes: default compression header
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
98 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BBLLLLc')
99
99
100
100
101 class RevlogDocket(object):
101 class RevlogDocket(object):
102 """metadata associated with revlog"""
102 """metadata associated with revlog"""
103
103
104 def __init__(
104 def __init__(
105 self,
105 self,
106 revlog,
106 revlog,
107 use_pending=False,
107 use_pending=False,
108 version_header=None,
108 version_header=None,
109 index_uuid=None,
109 index_uuid=None,
110 data_uuid=None,
110 data_uuid=None,
111 index_end=0,
111 index_end=0,
112 pending_index_end=0,
112 pending_index_end=0,
113 data_end=0,
113 data_end=0,
114 pending_data_end=0,
114 pending_data_end=0,
115 default_compression_header=None,
115 default_compression_header=None,
116 ):
116 ):
117 self._version_header = version_header
117 self._version_header = version_header
118 self._read_only = bool(use_pending)
118 self._read_only = bool(use_pending)
119 self._dirty = False
119 self._dirty = False
120 self._radix = revlog.radix
120 self._radix = revlog.radix
121 self._path = revlog._docket_file
121 self._path = revlog._docket_file
122 self._opener = revlog.opener
122 self._opener = revlog.opener
123 self._index_uuid = index_uuid
123 self._index_uuid = index_uuid
124 self._data_uuid = data_uuid
124 self._data_uuid = data_uuid
125 # thes asserts should be True as long as we have a single index filename
125 # thes asserts should be True as long as we have a single index filename
126 assert index_end <= pending_index_end
126 assert index_end <= pending_index_end
127 assert data_end <= pending_data_end
127 assert data_end <= pending_data_end
128 self._initial_index_end = index_end
128 self._initial_index_end = index_end
129 self._pending_index_end = pending_index_end
129 self._pending_index_end = pending_index_end
130 self._initial_data_end = data_end
130 self._initial_data_end = data_end
131 self._pending_data_end = pending_data_end
131 self._pending_data_end = pending_data_end
132 if use_pending:
132 if use_pending:
133 self._index_end = self._pending_index_end
133 self._index_end = self._pending_index_end
134 self._data_end = self._pending_data_end
134 self._data_end = self._pending_data_end
135 else:
135 else:
136 self._index_end = self._initial_index_end
136 self._index_end = self._initial_index_end
137 self._data_end = self._initial_data_end
137 self._data_end = self._initial_data_end
138 self.default_compression_header = default_compression_header
138 self.default_compression_header = default_compression_header
139
139
140 def index_filepath(self):
140 def index_filepath(self):
141 """file path to the current index file associated to this docket"""
141 """file path to the current index file associated to this docket"""
142 # very simplistic version at first
142 # very simplistic version at first
143 if self._index_uuid is None:
143 if self._index_uuid is None:
144 self._index_uuid = make_uid()
144 self._index_uuid = make_uid()
145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
145 return b"%s-%s.idx" % (self._radix, self._index_uuid)
146
146
147 def data_filepath(self):
147 def data_filepath(self):
148 """file path to the current index file associated to this docket"""
148 """file path to the current index file associated to this docket"""
149 # very simplistic version at first
149 # very simplistic version at first
150 if self._data_uuid is None:
150 if self._data_uuid is None:
151 self._data_uuid = make_uid()
151 self._data_uuid = make_uid()
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
152 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153
153
154 @property
154 @property
155 def index_end(self):
155 def index_end(self):
156 return self._index_end
156 return self._index_end
157
157
158 @index_end.setter
158 @index_end.setter
159 def index_end(self, new_size):
159 def index_end(self, new_size):
160 if new_size != self._index_end:
160 if new_size != self._index_end:
161 self._index_end = new_size
161 self._index_end = new_size
162 self._dirty = True
162 self._dirty = True
163
163
164 @property
164 @property
165 def data_end(self):
165 def data_end(self):
166 return self._data_end
166 return self._data_end
167
167
168 @data_end.setter
168 @data_end.setter
169 def data_end(self, new_size):
169 def data_end(self, new_size):
170 if new_size != self._data_end:
170 if new_size != self._data_end:
171 self._data_end = new_size
171 self._data_end = new_size
172 self._dirty = True
172 self._dirty = True
173
173
174 def write(self, transaction, pending=False, stripping=False):
174 def write(self, transaction, pending=False, stripping=False):
175 """write the modification of disk if any
175 """write the modification of disk if any
176
176
177 This make the new content visible to all process"""
177 This make the new content visible to all process"""
178 if not self._dirty:
178 if not self._dirty:
179 return False
179 return False
180 else:
180 else:
181 if self._read_only:
181 if self._read_only:
182 msg = b'writing read-only docket: %s'
182 msg = b'writing read-only docket: %s'
183 msg %= self._path
183 msg %= self._path
184 raise error.ProgrammingError(msg)
184 raise error.ProgrammingError(msg)
185 if not stripping:
185 if not stripping:
186 # XXX we could, leverage the docket while stripping. However it
186 # XXX we could, leverage the docket while stripping. However it
187 # is not powerfull enough at the time of this comment
187 # is not powerfull enough at the time of this comment
188 transaction.addbackup(self._path, location=b'store')
188 transaction.addbackup(self._path, location=b'store')
189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
189 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
190 f.write(self._serialize(pending=pending))
190 f.write(self._serialize(pending=pending))
191 # if pending we still need to the write final data eventually
191 # if pending we still need to the write final data eventually
192 self._dirty = pending
192 self._dirty = pending
193 return True
193 return True
194
194
195 def _serialize(self, pending=False):
195 def _serialize(self, pending=False):
196 if pending:
196 if pending:
197 official_index_end = self._initial_index_end
197 official_index_end = self._initial_index_end
198 official_data_end = self._initial_data_end
198 official_data_end = self._initial_data_end
199 else:
199 else:
200 official_index_end = self._index_end
200 official_index_end = self._index_end
201 official_data_end = self._data_end
201 official_data_end = self._data_end
202
202
203 # this assert should be True as long as we have a single index filename
203 # this assert should be True as long as we have a single index filename
204 assert official_data_end <= self._data_end
204 assert official_data_end <= self._data_end
205 data = (
205 data = (
206 self._version_header,
206 self._version_header,
207 len(self._index_uuid),
207 len(self._index_uuid),
208 len(self._data_uuid),
208 len(self._data_uuid),
209 official_index_end,
209 official_index_end,
210 self._index_end,
210 self._index_end,
211 official_data_end,
211 official_data_end,
212 self._data_end,
212 self._data_end,
213 self.default_compression_header,
213 self.default_compression_header,
214 )
214 )
215 s = []
215 s = []
216 s.append(S_HEADER.pack(*data))
216 s.append(S_HEADER.pack(*data))
217 s.append(self._index_uuid)
217 s.append(self._index_uuid)
218 s.append(self._data_uuid)
218 s.append(self._data_uuid)
219 return b''.join(s)
219 return b''.join(s)
220
220
221
221
222 def default_docket(revlog, version_header):
222 def default_docket(revlog, version_header):
223 """given a revlog version a new docket object for the given revlog"""
223 """given a revlog version a new docket object for the given revlog"""
224 rl_version = version_header & 0xFFFF
224 rl_version = version_header & 0xFFFF
225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
225 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
226 return None
226 return None
227 comp = util.compengines[revlog._compengine].revlogheader()
227 comp = util.compengines[revlog._compengine].revlogheader()
228 docket = RevlogDocket(
228 docket = RevlogDocket(
229 revlog,
229 revlog,
230 version_header=version_header,
230 version_header=version_header,
231 default_compression_header=comp,
231 default_compression_header=comp,
232 )
232 )
233 docket._dirty = True
233 docket._dirty = True
234 return docket
234 return docket
235
235
236
236
237 def parse_docket(revlog, data, use_pending=False):
237 def parse_docket(revlog, data, use_pending=False):
238 """given some docket data return a docket object for the given revlog"""
238 """given some docket data return a docket object for the given revlog"""
239 header = S_HEADER.unpack(data[: S_HEADER.size])
239 header = S_HEADER.unpack(data[: S_HEADER.size])
240 offset = S_HEADER.size
240 offset = S_HEADER.size
241 version_header = header[0]
241
242 index_uuid_size = header[1]
242 iheader = iter(header)
243
244 version_header = next(iheader)
245
246 index_uuid_size = next(iheader)
243 index_uuid = data[offset : offset + index_uuid_size]
247 index_uuid = data[offset : offset + index_uuid_size]
244 offset += index_uuid_size
248 offset += index_uuid_size
245 data_uuid_size = header[2]
249
250 data_uuid_size = next(iheader)
246 data_uuid = data[offset : offset + data_uuid_size]
251 data_uuid = data[offset : offset + data_uuid_size]
247 offset += data_uuid_size
252 offset += data_uuid_size
248 index_size = header[3]
253
249 pending_index_size = header[4]
254 index_size = next(iheader)
250 data_size = header[5]
255
251 pending_data_size = header[6]
256 pending_index_size = next(iheader)
252 default_compression_header = header[7]
257
258 data_size = next(iheader)
259
260 pending_data_size = next(iheader)
261
262 default_compression_header = next(iheader)
263
253 docket = RevlogDocket(
264 docket = RevlogDocket(
254 revlog,
265 revlog,
255 use_pending=use_pending,
266 use_pending=use_pending,
256 version_header=version_header,
267 version_header=version_header,
257 index_uuid=index_uuid,
268 index_uuid=index_uuid,
258 data_uuid=data_uuid,
269 data_uuid=data_uuid,
259 index_end=index_size,
270 index_end=index_size,
260 pending_index_end=pending_index_size,
271 pending_index_end=pending_index_size,
261 data_end=data_size,
272 data_end=data_size,
262 pending_data_end=pending_data_size,
273 pending_data_end=pending_data_size,
263 default_compression_header=default_compression_header,
274 default_compression_header=default_compression_header,
264 )
275 )
265 return docket
276 return docket
General Comments 0
You need to be logged in to leave comments. Login now