##// END OF EJS Templates
revlogv2: use a unique filename for index...
marmoute -
r48114:f612db76 default
parent child Browse files
Show More
@@ -1,235 +1,249 b''
1 # docket - code related to revlog "docket"
1 # docket - code related to revlog "docket"
2 #
2 #
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2021 Pierre-Yves David <pierre-yves.david@octobus.net>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 ### Revlog docket file
8 ### Revlog docket file
9 #
9 #
10 # The revlog is stored on disk using multiple files:
10 # The revlog is stored on disk using multiple files:
11 #
11 #
12 # * a small docket file, containing metadata and a pointer,
12 # * a small docket file, containing metadata and a pointer,
13 #
13 #
14 # * an index file, containing fixed width information about revisions,
14 # * an index file, containing fixed width information about revisions,
15 #
15 #
16 # * a data file, containing variable width data for these revisions,
16 # * a data file, containing variable width data for these revisions,
17
17
18 from __future__ import absolute_import
18 from __future__ import absolute_import
19
19
20 import errno
20 import errno
21 import os
21 import os
22 import random
22 import random
23 import struct
23 import struct
24
24
25 from .. import (
25 from .. import (
26 encoding,
26 encoding,
27 error,
27 error,
28 node,
28 node,
29 pycompat,
29 pycompat,
30 util,
30 util,
31 )
31 )
32
32
33 from . import (
33 from . import (
34 constants,
34 constants,
35 )
35 )
36
36
37
37
38 def make_uid(id_size=8):
38 def make_uid(id_size=8):
39 """return a new unique identifier.
39 """return a new unique identifier.
40
40
41 The identifier is random and composed of ascii characters."""
41 The identifier is random and composed of ascii characters."""
42 # size we "hex" the result we need half the number of bits to have a final
42 # size we "hex" the result we need half the number of bits to have a final
43 # uuid of size ID_SIZE
43 # uuid of size ID_SIZE
44 return node.hex(os.urandom(id_size // 2))
44 return node.hex(os.urandom(id_size // 2))
45
45
46
46
47 # some special test logic to avoid anoying random output in the test
47 # some special test logic to avoid anoying random output in the test
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
48 stable_docket_file = encoding.environ.get(b'HGTEST_UUIDFILE')
49
49
50 if stable_docket_file:
50 if stable_docket_file:
51
51
52 def make_uid(id_size=8):
52 def make_uid(id_size=8):
53 try:
53 try:
54 with open(stable_docket_file, mode='rb') as f:
54 with open(stable_docket_file, mode='rb') as f:
55 seed = f.read().strip()
55 seed = f.read().strip()
56 except IOError as inst:
56 except IOError as inst:
57 if inst.errno != errno.ENOENT:
57 if inst.errno != errno.ENOENT:
58 raise
58 raise
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
59 seed = b'04' # chosen by a fair dice roll. garanteed to be random
60 if pycompat.ispy3:
60 if pycompat.ispy3:
61 iter_seed = iter(seed)
61 iter_seed = iter(seed)
62 else:
62 else:
63 iter_seed = (ord(c) for c in seed)
63 iter_seed = (ord(c) for c in seed)
64 # some basic circular sum hashing on 64 bits
64 # some basic circular sum hashing on 64 bits
65 int_seed = 0
65 int_seed = 0
66 low_mask = int('1' * 35, 2)
66 low_mask = int('1' * 35, 2)
67 for i in iter_seed:
67 for i in iter_seed:
68 high_part = int_seed >> 35
68 high_part = int_seed >> 35
69 low_part = (int_seed & low_mask) << 28
69 low_part = (int_seed & low_mask) << 28
70 int_seed = high_part + low_part + i
70 int_seed = high_part + low_part + i
71 r = random.Random()
71 r = random.Random()
72 if pycompat.ispy3:
72 if pycompat.ispy3:
73 r.seed(int_seed, version=1)
73 r.seed(int_seed, version=1)
74 else:
74 else:
75 r.seed(int_seed)
75 r.seed(int_seed)
76 # once we drop python 3.8 support we can simply use r.randbytes
76 # once we drop python 3.8 support we can simply use r.randbytes
77 raw = r.getrandbits(id_size * 4)
77 raw = r.getrandbits(id_size * 4)
78 assert id_size == 8
78 assert id_size == 8
79 p = struct.pack('>L', raw)
79 p = struct.pack('>L', raw)
80 new = node.hex(p)
80 new = node.hex(p)
81 with open(stable_docket_file, 'wb') as f:
81 with open(stable_docket_file, 'wb') as f:
82 f.write(new)
82 f.write(new)
83 return new
83 return new
84
84
85
85
86 # Docket format
86 # Docket format
87 #
87 #
88 # * 4 bytes: revlog version
88 # * 4 bytes: revlog version
89 # | This is mandatory as docket must be compatible with the previous
89 # | This is mandatory as docket must be compatible with the previous
90 # | revlog index header.
90 # | revlog index header.
91 # * 1 bytes: size of index uuid
91 # * 8 bytes: size of index-data
92 # * 8 bytes: size of index-data
92 # * 8 bytes: pending size of index-data
93 # * 8 bytes: pending size of index-data
93 # * 8 bytes: size of data
94 # * 8 bytes: size of data
94 # * 8 bytes: pending size of data
95 # * 8 bytes: pending size of data
95 # * 1 bytes: default compression header
96 # * 1 bytes: default compression header
96 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'LLLLc')
97 S_HEADER = struct.Struct(constants.INDEX_HEADER.format + 'BLLLLc')
97
98
98
99
99 class RevlogDocket(object):
100 class RevlogDocket(object):
100 """metadata associated with revlog"""
101 """metadata associated with revlog"""
101
102
102 def __init__(
103 def __init__(
103 self,
104 self,
104 revlog,
105 revlog,
105 use_pending=False,
106 use_pending=False,
106 version_header=None,
107 version_header=None,
108 index_uuid=None,
107 index_end=0,
109 index_end=0,
108 pending_index_end=0,
110 pending_index_end=0,
109 data_end=0,
111 data_end=0,
110 pending_data_end=0,
112 pending_data_end=0,
111 default_compression_header=None,
113 default_compression_header=None,
112 ):
114 ):
113 self._version_header = version_header
115 self._version_header = version_header
114 self._read_only = bool(use_pending)
116 self._read_only = bool(use_pending)
115 self._dirty = False
117 self._dirty = False
116 self._radix = revlog.radix
118 self._radix = revlog.radix
117 self._path = revlog._docket_file
119 self._path = revlog._docket_file
118 self._opener = revlog.opener
120 self._opener = revlog.opener
121 self._index_uuid = index_uuid
119 # thes asserts should be True as long as we have a single index filename
122 # thes asserts should be True as long as we have a single index filename
120 assert index_end <= pending_index_end
123 assert index_end <= pending_index_end
121 assert data_end <= pending_data_end
124 assert data_end <= pending_data_end
122 self._initial_index_end = index_end
125 self._initial_index_end = index_end
123 self._pending_index_end = pending_index_end
126 self._pending_index_end = pending_index_end
124 self._initial_data_end = data_end
127 self._initial_data_end = data_end
125 self._pending_data_end = pending_data_end
128 self._pending_data_end = pending_data_end
126 if use_pending:
129 if use_pending:
127 self._index_end = self._pending_index_end
130 self._index_end = self._pending_index_end
128 self._data_end = self._pending_data_end
131 self._data_end = self._pending_data_end
129 else:
132 else:
130 self._index_end = self._initial_index_end
133 self._index_end = self._initial_index_end
131 self._data_end = self._initial_data_end
134 self._data_end = self._initial_data_end
132 self.default_compression_header = default_compression_header
135 self.default_compression_header = default_compression_header
133
136
134 def index_filepath(self):
137 def index_filepath(self):
135 """file path to the current index file associated to this docket"""
138 """file path to the current index file associated to this docket"""
136 # very simplistic version at first
139 # very simplistic version at first
137 return b"%s.idx" % self._radix
140 if self._index_uuid is None:
141 self._index_uuid = make_uid()
142 return b"%s-%s.idx" % (self._radix, self._index_uuid)
138
143
139 @property
144 @property
140 def index_end(self):
145 def index_end(self):
141 return self._index_end
146 return self._index_end
142
147
143 @index_end.setter
148 @index_end.setter
144 def index_end(self, new_size):
149 def index_end(self, new_size):
145 if new_size != self._index_end:
150 if new_size != self._index_end:
146 self._index_end = new_size
151 self._index_end = new_size
147 self._dirty = True
152 self._dirty = True
148
153
149 @property
154 @property
150 def data_end(self):
155 def data_end(self):
151 return self._data_end
156 return self._data_end
152
157
153 @data_end.setter
158 @data_end.setter
154 def data_end(self, new_size):
159 def data_end(self, new_size):
155 if new_size != self._data_end:
160 if new_size != self._data_end:
156 self._data_end = new_size
161 self._data_end = new_size
157 self._dirty = True
162 self._dirty = True
158
163
159 def write(self, transaction, pending=False, stripping=False):
164 def write(self, transaction, pending=False, stripping=False):
160 """write the modification of disk if any
165 """write the modification of disk if any
161
166
162 This make the new content visible to all process"""
167 This make the new content visible to all process"""
163 if not self._dirty:
168 if not self._dirty:
164 return False
169 return False
165 else:
170 else:
166 if self._read_only:
171 if self._read_only:
167 msg = b'writing read-only docket: %s'
172 msg = b'writing read-only docket: %s'
168 msg %= self._path
173 msg %= self._path
169 raise error.ProgrammingError(msg)
174 raise error.ProgrammingError(msg)
170 if not stripping:
175 if not stripping:
171 # XXX we could, leverage the docket while stripping. However it
176 # XXX we could, leverage the docket while stripping. However it
172 # is not powerfull enough at the time of this comment
177 # is not powerfull enough at the time of this comment
173 transaction.addbackup(self._path, location=b'store')
178 transaction.addbackup(self._path, location=b'store')
174 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
179 with self._opener(self._path, mode=b'w', atomictemp=True) as f:
175 f.write(self._serialize(pending=pending))
180 f.write(self._serialize(pending=pending))
176 # if pending we still need to the write final data eventually
181 # if pending we still need to the write final data eventually
177 self._dirty = pending
182 self._dirty = pending
178 return True
183 return True
179
184
180 def _serialize(self, pending=False):
185 def _serialize(self, pending=False):
181 if pending:
186 if pending:
182 official_index_end = self._initial_index_end
187 official_index_end = self._initial_index_end
183 official_data_end = self._initial_data_end
188 official_data_end = self._initial_data_end
184 else:
189 else:
185 official_index_end = self._index_end
190 official_index_end = self._index_end
186 official_data_end = self._data_end
191 official_data_end = self._data_end
187
192
188 # this assert should be True as long as we have a single index filename
193 # this assert should be True as long as we have a single index filename
189 assert official_data_end <= self._data_end
194 assert official_data_end <= self._data_end
190 data = (
195 data = (
191 self._version_header,
196 self._version_header,
197 len(self._index_uuid),
192 official_index_end,
198 official_index_end,
193 self._index_end,
199 self._index_end,
194 official_data_end,
200 official_data_end,
195 self._data_end,
201 self._data_end,
196 self.default_compression_header,
202 self.default_compression_header,
197 )
203 )
198 return S_HEADER.pack(*data)
204 s = []
205 s.append(S_HEADER.pack(*data))
206 s.append(self._index_uuid)
207 return b''.join(s)
199
208
200
209
201 def default_docket(revlog, version_header):
210 def default_docket(revlog, version_header):
202 """given a revlog version a new docket object for the given revlog"""
211 """given a revlog version a new docket object for the given revlog"""
203 rl_version = version_header & 0xFFFF
212 rl_version = version_header & 0xFFFF
204 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
213 if rl_version not in (constants.REVLOGV2, constants.CHANGELOGV2):
205 return None
214 return None
206 comp = util.compengines[revlog._compengine].revlogheader()
215 comp = util.compengines[revlog._compengine].revlogheader()
207 docket = RevlogDocket(
216 docket = RevlogDocket(
208 revlog,
217 revlog,
209 version_header=version_header,
218 version_header=version_header,
210 default_compression_header=comp,
219 default_compression_header=comp,
211 )
220 )
212 docket._dirty = True
221 docket._dirty = True
213 return docket
222 return docket
214
223
215
224
216 def parse_docket(revlog, data, use_pending=False):
225 def parse_docket(revlog, data, use_pending=False):
217 """given some docket data return a docket object for the given revlog"""
226 """given some docket data return a docket object for the given revlog"""
218 header = S_HEADER.unpack(data[: S_HEADER.size])
227 header = S_HEADER.unpack(data[: S_HEADER.size])
228 offset = S_HEADER.size
219 version_header = header[0]
229 version_header = header[0]
220 index_size = header[1]
230 index_uuid_size = header[1]
221 pending_index_size = header[2]
231 index_uuid = data[offset : offset + index_uuid_size]
222 data_size = header[3]
232 offset += index_uuid_size
223 pending_data_size = header[4]
233 index_size = header[2]
224 default_compression_header = header[5]
234 pending_index_size = header[3]
235 data_size = header[4]
236 pending_data_size = header[5]
237 default_compression_header = header[6]
225 docket = RevlogDocket(
238 docket = RevlogDocket(
226 revlog,
239 revlog,
227 use_pending=use_pending,
240 use_pending=use_pending,
228 version_header=version_header,
241 version_header=version_header,
242 index_uuid=index_uuid,
229 index_end=index_size,
243 index_end=index_size,
230 pending_index_end=pending_index_size,
244 pending_index_end=pending_index_size,
231 data_end=data_size,
245 data_end=data_size,
232 pending_data_end=pending_data_size,
246 pending_data_end=pending_data_size,
233 default_compression_header=default_compression_header,
247 default_compression_header=default_compression_header,
234 )
248 )
235 return docket
249 return docket
@@ -1,69 +1,85 b''
1 #require reporevlogstore
1 #require reporevlogstore
2
2
3 A repo with unknown revlogv2 requirement string cannot be opened
3 A repo with unknown revlogv2 requirement string cannot be opened
4
4
5 $ hg init invalidreq
5 $ hg init invalidreq
6 $ cd invalidreq
6 $ cd invalidreq
7 $ echo exp-revlogv2.unknown >> .hg/requires
7 $ echo exp-revlogv2.unknown >> .hg/requires
8 $ hg log
8 $ hg log
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
9 abort: repository requires features unknown to this Mercurial: exp-revlogv2.unknown
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
10 (see https://mercurial-scm.org/wiki/MissingRequirement for more information)
11 [255]
11 [255]
12 $ cd ..
12 $ cd ..
13
13
14 Can create and open repo with revlog v2 requirement
14 Can create and open repo with revlog v2 requirement
15
15
16 $ cat >> $HGRCPATH << EOF
16 $ cat >> $HGRCPATH << EOF
17 > [experimental]
17 > [experimental]
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
18 > revlogv2 = enable-unstable-format-and-corrupt-my-data
19 > EOF
19 > EOF
20
20
21 $ hg init empty-repo
21 $ hg init empty-repo
22 $ cd empty-repo
22 $ cd empty-repo
23 $ cat .hg/requires
23 $ cat .hg/requires
24 dotencode
24 dotencode
25 exp-dirstate-v2 (dirstate-v2 !)
25 exp-dirstate-v2 (dirstate-v2 !)
26 exp-revlogv2.2
26 exp-revlogv2.2
27 fncache
27 fncache
28 generaldelta
28 generaldelta
29 persistent-nodemap (rust !)
29 persistent-nodemap (rust !)
30 revlog-compression-zstd (zstd !)
30 revlog-compression-zstd (zstd !)
31 sparserevlog
31 sparserevlog
32 store
32 store
33
33
34 $ hg log
34 $ hg log
35
35
36 Unknown flags to revlog are rejected
36 Unknown flags to revlog are rejected
37
37
38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
38 >>> with open('.hg/store/00changelog.i', 'wb') as fh:
39 ... fh.write(b'\xff\x00\xde\xad') and None
39 ... fh.write(b'\xff\x00\xde\xad') and None
40
40
41 $ hg log
41 $ hg log
42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
42 abort: unknown flags (0xff00) in version 57005 revlog 00changelog
43 [50]
43 [50]
44
44
45 $ cd ..
45 $ cd ..
46
46
47 Writing a simple revlog v2 works
47 Writing a simple revlog v2 works
48
48
49 $ hg init simple
49 $ hg init simple
50 $ cd simple
50 $ cd simple
51 $ touch foo
51 $ touch foo
52 $ hg -q commit -A -m initial
52 $ hg -q commit -A -m initial
53
53
54 $ hg log
54 $ hg log
55 changeset: 0:96ee1d7354c4
55 changeset: 0:96ee1d7354c4
56 tag: tip
56 tag: tip
57 user: test
57 user: test
58 date: Thu Jan 01 00:00:00 1970 +0000
58 date: Thu Jan 01 00:00:00 1970 +0000
59 summary: initial
59 summary: initial
60
60
61 Header written as expected
61 Header written as expected
62
62
63 $ f --hexdump --bytes 4 .hg/store/00changelog.i
63 $ f --hexdump --bytes 4 .hg/store/00changelog.i
64 .hg/store/00changelog.i:
64 .hg/store/00changelog.i:
65 0000: 00 00 de ad |....|
65 0000: 00 00 de ad |....|
66
66
67 $ f --hexdump --bytes 4 .hg/store/data/foo.i
67 $ f --hexdump --bytes 4 .hg/store/data/foo.i
68 .hg/store/data/foo.i:
68 .hg/store/data/foo.i:
69 0000: 00 00 de ad |....|
69 0000: 00 00 de ad |....|
70
71 The expected files are generated
72 --------------------------------
73
74 We should have have:
75 - a docket
76 - a index file with a unique name
77 - a data file
78
79 $ ls .hg/store/00changelog* .hg/store/00manifest*
80 .hg/store/00changelog-b870a51b.idx
81 .hg/store/00changelog.d
82 .hg/store/00changelog.i
83 .hg/store/00manifest-88698448.idx
84 .hg/store/00manifest.d
85 .hg/store/00manifest.i
General Comments 0
You need to be logged in to leave comments. Login now