##// END OF EJS Templates
nodemap: make sure the nodemap docket is updated after the changelog...
marmoute -
r45004:448d700e default
parent child Browse files
Show More
@@ -1,598 +1,600 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from .. import (
16 from .. import (
17 error,
17 error,
18 node as nodemod,
18 node as nodemod,
19 util,
19 util,
20 )
20 )
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def persisted_data(revlog):
28 def persisted_data(revlog):
29 """read the nodemap for a revlog from disk"""
29 """read the nodemap for a revlog from disk"""
30 if revlog.nodemap_file is None:
30 if revlog.nodemap_file is None:
31 return None
31 return None
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 if not pdata:
33 if not pdata:
34 return None
34 return None
35 offset = 0
35 offset = 0
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 if version != ONDISK_VERSION:
37 if version != ONDISK_VERSION:
38 return None
38 return None
39 offset += S_VERSION.size
39 offset += S_VERSION.size
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
41 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 offset += S_HEADER.size
42 offset += S_HEADER.size
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 offset += uid_size
44 offset += uid_size
45 docket.tip_rev = tip_rev
45 docket.tip_rev = tip_rev
46 docket.tip_node = pdata[offset : offset + tip_node_size]
46 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.data_length = data_length
47 docket.data_length = data_length
48 docket.data_unused = data_unused
48 docket.data_unused = data_unused
49
49
50 filename = _rawdata_filepath(revlog, docket)
50 filename = _rawdata_filepath(revlog, docket)
51 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
51 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
52 try:
52 try:
53 with revlog.opener(filename) as fd:
53 with revlog.opener(filename) as fd:
54 if use_mmap:
54 if use_mmap:
55 data = util.buffer(util.mmapread(fd, data_length))
55 data = util.buffer(util.mmapread(fd, data_length))
56 else:
56 else:
57 data = fd.read(data_length)
57 data = fd.read(data_length)
58 except OSError as e:
58 except OSError as e:
59 if e.errno != errno.ENOENT:
59 if e.errno != errno.ENOENT:
60 raise
60 raise
61 if len(data) < data_length:
61 if len(data) < data_length:
62 return None
62 return None
63 return docket, data
63 return docket, data
64
64
65
65
66 def setup_persistent_nodemap(tr, revlog):
66 def setup_persistent_nodemap(tr, revlog):
67 """Install whatever is needed transaction side to persist a nodemap on disk
67 """Install whatever is needed transaction side to persist a nodemap on disk
68
68
69 (only actually persist the nodemap if this is relevant for this revlog)
69 (only actually persist the nodemap if this is relevant for this revlog)
70 """
70 """
71 if revlog._inline:
71 if revlog._inline:
72 return # inlined revlog are too small for this to be relevant
72 return # inlined revlog are too small for this to be relevant
73 if revlog.nodemap_file is None:
73 if revlog.nodemap_file is None:
74 return # we do not use persistent_nodemap on this revlog
74 return # we do not use persistent_nodemap on this revlog
75 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
75
76 # we need to happen after the changelog finalization, in that use "cl-"
77 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
76 if tr.hasfinalize(callback_id):
78 if tr.hasfinalize(callback_id):
77 return # no need to register again
79 return # no need to register again
78 tr.addpending(
80 tr.addpending(
79 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
81 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
80 )
82 )
81 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
83 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
82
84
83
85
84 class _NoTransaction(object):
86 class _NoTransaction(object):
85 """transaction like object to update the nodemap outside a transaction
87 """transaction like object to update the nodemap outside a transaction
86 """
88 """
87
89
88 def __init__(self):
90 def __init__(self):
89 self._postclose = {}
91 self._postclose = {}
90
92
91 def addpostclose(self, callback_id, callback_func):
93 def addpostclose(self, callback_id, callback_func):
92 self._postclose[callback_id] = callback_func
94 self._postclose[callback_id] = callback_func
93
95
94
96
95 def update_persistent_nodemap(revlog):
97 def update_persistent_nodemap(revlog):
96 """update the persistent nodemap right now
98 """update the persistent nodemap right now
97
99
98 To be used for updating the nodemap on disk outside of a normal transaction
100 To be used for updating the nodemap on disk outside of a normal transaction
99 setup (eg, `debugupdatecache`).
101 setup (eg, `debugupdatecache`).
100 """
102 """
101 notr = _NoTransaction()
103 notr = _NoTransaction()
102 _persist_nodemap(notr, revlog)
104 _persist_nodemap(notr, revlog)
103 for k in sorted(notr._postclose):
105 for k in sorted(notr._postclose):
104 notr._postclose[k](None)
106 notr._postclose[k](None)
105
107
106
108
107 def _persist_nodemap(tr, revlog, pending=False):
109 def _persist_nodemap(tr, revlog, pending=False):
108 """Write nodemap data on disk for a given revlog
110 """Write nodemap data on disk for a given revlog
109 """
111 """
110 if getattr(revlog, 'filteredrevs', ()):
112 if getattr(revlog, 'filteredrevs', ()):
111 raise error.ProgrammingError(
113 raise error.ProgrammingError(
112 "cannot persist nodemap of a filtered changelog"
114 "cannot persist nodemap of a filtered changelog"
113 )
115 )
114 if revlog.nodemap_file is None:
116 if revlog.nodemap_file is None:
115 msg = "calling persist nodemap on a revlog without the feature enableb"
117 msg = "calling persist nodemap on a revlog without the feature enableb"
116 raise error.ProgrammingError(msg)
118 raise error.ProgrammingError(msg)
117
119
118 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
120 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
119 ondisk_docket = revlog._nodemap_docket
121 ondisk_docket = revlog._nodemap_docket
120 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
122 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
121 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
123 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
122
124
123 data = None
125 data = None
124 # first attemp an incremental update of the data
126 # first attemp an incremental update of the data
125 if can_incremental and ondisk_docket is not None:
127 if can_incremental and ondisk_docket is not None:
126 target_docket = revlog._nodemap_docket.copy()
128 target_docket = revlog._nodemap_docket.copy()
127 (
129 (
128 src_docket,
130 src_docket,
129 data_changed_count,
131 data_changed_count,
130 data,
132 data,
131 ) = revlog.index.nodemap_data_incremental()
133 ) = revlog.index.nodemap_data_incremental()
132 if src_docket != target_docket:
134 if src_docket != target_docket:
133 data = None
135 data = None
134 else:
136 else:
135 datafile = _rawdata_filepath(revlog, target_docket)
137 datafile = _rawdata_filepath(revlog, target_docket)
136 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
138 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
137 # store vfs
139 # store vfs
138 new_length = target_docket.data_length + len(data)
140 new_length = target_docket.data_length + len(data)
139 with revlog.opener(datafile, b'r+') as fd:
141 with revlog.opener(datafile, b'r+') as fd:
140 fd.seek(target_docket.data_length)
142 fd.seek(target_docket.data_length)
141 fd.write(data)
143 fd.write(data)
142 if feed_data:
144 if feed_data:
143 if use_mmap:
145 if use_mmap:
144 fd.seek(0)
146 fd.seek(0)
145 new_data = fd.read(new_length)
147 new_data = fd.read(new_length)
146 else:
148 else:
147 fd.flush()
149 fd.flush()
148 new_data = util.buffer(util.mmapread(fd, new_length))
150 new_data = util.buffer(util.mmapread(fd, new_length))
149 target_docket.data_length = new_length
151 target_docket.data_length = new_length
150 target_docket.data_unused += data_changed_count
152 target_docket.data_unused += data_changed_count
151
153
152 if data is None:
154 if data is None:
153 # otherwise fallback to a full new export
155 # otherwise fallback to a full new export
154 target_docket = NodeMapDocket()
156 target_docket = NodeMapDocket()
155 datafile = _rawdata_filepath(revlog, target_docket)
157 datafile = _rawdata_filepath(revlog, target_docket)
156 if util.safehasattr(revlog.index, "nodemap_data_all"):
158 if util.safehasattr(revlog.index, "nodemap_data_all"):
157 data = revlog.index.nodemap_data_all()
159 data = revlog.index.nodemap_data_all()
158 else:
160 else:
159 data = persistent_data(revlog.index)
161 data = persistent_data(revlog.index)
160 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
162 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
161 # store vfs
163 # store vfs
162 with revlog.opener(datafile, b'w+') as fd:
164 with revlog.opener(datafile, b'w+') as fd:
163 fd.write(data)
165 fd.write(data)
164 if feed_data:
166 if feed_data:
165 if use_mmap:
167 if use_mmap:
166 new_data = data
168 new_data = data
167 else:
169 else:
168 fd.flush()
170 fd.flush()
169 new_data = util.buffer(util.mmapread(fd, len(data)))
171 new_data = util.buffer(util.mmapread(fd, len(data)))
170 target_docket.data_length = len(data)
172 target_docket.data_length = len(data)
171 target_docket.tip_rev = revlog.tiprev()
173 target_docket.tip_rev = revlog.tiprev()
172 target_docket.tip_node = revlog.node(target_docket.tip_rev)
174 target_docket.tip_node = revlog.node(target_docket.tip_rev)
173 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
175 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
174 # store vfs
176 # store vfs
175 file_path = revlog.nodemap_file
177 file_path = revlog.nodemap_file
176 if pending:
178 if pending:
177 file_path += b'.a'
179 file_path += b'.a'
178 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
180 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
179 fp.write(target_docket.serialize())
181 fp.write(target_docket.serialize())
180 revlog._nodemap_docket = target_docket
182 revlog._nodemap_docket = target_docket
181 if feed_data:
183 if feed_data:
182 revlog.index.update_nodemap_data(target_docket, new_data)
184 revlog.index.update_nodemap_data(target_docket, new_data)
183
185
184 # EXP-TODO: if the transaction abort, we should remove the new data and
186 # EXP-TODO: if the transaction abort, we should remove the new data and
185 # reinstall the old one.
187 # reinstall the old one.
186
188
187 # search for old index file in all cases, some older process might have
189 # search for old index file in all cases, some older process might have
188 # left one behind.
190 # left one behind.
189 olds = _other_rawdata_filepath(revlog, target_docket)
191 olds = _other_rawdata_filepath(revlog, target_docket)
190 if olds:
192 if olds:
191 realvfs = getattr(revlog, '_realopener', revlog.opener)
193 realvfs = getattr(revlog, '_realopener', revlog.opener)
192
194
193 def cleanup(tr):
195 def cleanup(tr):
194 for oldfile in olds:
196 for oldfile in olds:
195 realvfs.tryunlink(oldfile)
197 realvfs.tryunlink(oldfile)
196
198
197 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
199 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
198 tr.addpostclose(callback_id, cleanup)
200 tr.addpostclose(callback_id, cleanup)
199
201
200
202
201 ### Nodemap docket file
203 ### Nodemap docket file
202 #
204 #
203 # The nodemap data are stored on disk using 2 files:
205 # The nodemap data are stored on disk using 2 files:
204 #
206 #
205 # * a raw data files containing a persistent nodemap
207 # * a raw data files containing a persistent nodemap
206 # (see `Nodemap Trie` section)
208 # (see `Nodemap Trie` section)
207 #
209 #
208 # * a small "docket" file containing medatadata
210 # * a small "docket" file containing medatadata
209 #
211 #
210 # While the nodemap data can be multiple tens of megabytes, the "docket" is
212 # While the nodemap data can be multiple tens of megabytes, the "docket" is
211 # small, it is easy to update it automatically or to duplicated its content
213 # small, it is easy to update it automatically or to duplicated its content
212 # during a transaction.
214 # during a transaction.
213 #
215 #
214 # Multiple raw data can exist at the same time (The currently valid one and a
216 # Multiple raw data can exist at the same time (The currently valid one and a
215 # new one beind used by an in progress transaction). To accomodate this, the
217 # new one beind used by an in progress transaction). To accomodate this, the
216 # filename hosting the raw data has a variable parts. The exact filename is
218 # filename hosting the raw data has a variable parts. The exact filename is
217 # specified inside the "docket" file.
219 # specified inside the "docket" file.
218 #
220 #
219 # The docket file contains information to find, qualify and validate the raw
221 # The docket file contains information to find, qualify and validate the raw
220 # data. Its content is currently very light, but it will expand as the on disk
222 # data. Its content is currently very light, but it will expand as the on disk
221 # nodemap gains the necessary features to be used in production.
223 # nodemap gains the necessary features to be used in production.
222
224
223 # version 0 is experimental, no BC garantee, do no use outside of tests.
225 # version 0 is experimental, no BC garantee, do no use outside of tests.
224 ONDISK_VERSION = 0
226 ONDISK_VERSION = 0
225 S_VERSION = struct.Struct(">B")
227 S_VERSION = struct.Struct(">B")
226 S_HEADER = struct.Struct(">BQQQQ")
228 S_HEADER = struct.Struct(">BQQQQ")
227
229
228 ID_SIZE = 8
230 ID_SIZE = 8
229
231
230
232
231 def _make_uid():
233 def _make_uid():
232 """return a new unique identifier.
234 """return a new unique identifier.
233
235
234 The identifier is random and composed of ascii characters."""
236 The identifier is random and composed of ascii characters."""
235 return nodemod.hex(os.urandom(ID_SIZE))
237 return nodemod.hex(os.urandom(ID_SIZE))
236
238
237
239
238 class NodeMapDocket(object):
240 class NodeMapDocket(object):
239 """metadata associated with persistent nodemap data
241 """metadata associated with persistent nodemap data
240
242
241 The persistent data may come from disk or be on their way to disk.
243 The persistent data may come from disk or be on their way to disk.
242 """
244 """
243
245
244 def __init__(self, uid=None):
246 def __init__(self, uid=None):
245 if uid is None:
247 if uid is None:
246 uid = _make_uid()
248 uid = _make_uid()
247 # a unique identifier for the data file:
249 # a unique identifier for the data file:
248 # - When new data are appended, it is preserved.
250 # - When new data are appended, it is preserved.
249 # - When a new data file is created, a new identifier is generated.
251 # - When a new data file is created, a new identifier is generated.
250 self.uid = uid
252 self.uid = uid
251 # the tipmost revision stored in the data file. This revision and all
253 # the tipmost revision stored in the data file. This revision and all
252 # revision before it are expected to be encoded in the data file.
254 # revision before it are expected to be encoded in the data file.
253 self.tip_rev = None
255 self.tip_rev = None
254 # the node of that tipmost revision, if it mismatch the current index
256 # the node of that tipmost revision, if it mismatch the current index
255 # data the docket is not valid for the current index and should be
257 # data the docket is not valid for the current index and should be
256 # discarded.
258 # discarded.
257 #
259 #
258 # note: this method is not perfect as some destructive operation could
260 # note: this method is not perfect as some destructive operation could
259 # preserve the same tip_rev + tip_node while altering lower revision.
261 # preserve the same tip_rev + tip_node while altering lower revision.
260 # However this multiple other caches have the same vulnerability (eg:
262 # However this multiple other caches have the same vulnerability (eg:
261 # brancmap cache).
263 # brancmap cache).
262 self.tip_node = None
264 self.tip_node = None
263 # the size (in bytes) of the persisted data to encode the nodemap valid
265 # the size (in bytes) of the persisted data to encode the nodemap valid
264 # for `tip_rev`.
266 # for `tip_rev`.
265 # - data file shorter than this are corrupted,
267 # - data file shorter than this are corrupted,
266 # - any extra data should be ignored.
268 # - any extra data should be ignored.
267 self.data_length = None
269 self.data_length = None
268 # the amount (in bytes) of "dead" data, still in the data file but no
270 # the amount (in bytes) of "dead" data, still in the data file but no
269 # longer used for the nodemap.
271 # longer used for the nodemap.
270 self.data_unused = 0
272 self.data_unused = 0
271
273
272 def copy(self):
274 def copy(self):
273 new = NodeMapDocket(uid=self.uid)
275 new = NodeMapDocket(uid=self.uid)
274 new.tip_rev = self.tip_rev
276 new.tip_rev = self.tip_rev
275 new.tip_node = self.tip_node
277 new.tip_node = self.tip_node
276 new.data_length = self.data_length
278 new.data_length = self.data_length
277 new.data_unused = self.data_unused
279 new.data_unused = self.data_unused
278 return new
280 return new
279
281
280 def __cmp__(self, other):
282 def __cmp__(self, other):
281 if self.uid < other.uid:
283 if self.uid < other.uid:
282 return -1
284 return -1
283 if self.uid > other.uid:
285 if self.uid > other.uid:
284 return 1
286 return 1
285 elif self.data_length < other.data_length:
287 elif self.data_length < other.data_length:
286 return -1
288 return -1
287 elif self.data_length > other.data_length:
289 elif self.data_length > other.data_length:
288 return 1
290 return 1
289 return 0
291 return 0
290
292
291 def __eq__(self, other):
293 def __eq__(self, other):
292 return self.uid == other.uid and self.data_length == other.data_length
294 return self.uid == other.uid and self.data_length == other.data_length
293
295
294 def serialize(self):
296 def serialize(self):
295 """return serialized bytes for a docket using the passed uid"""
297 """return serialized bytes for a docket using the passed uid"""
296 data = []
298 data = []
297 data.append(S_VERSION.pack(ONDISK_VERSION))
299 data.append(S_VERSION.pack(ONDISK_VERSION))
298 headers = (
300 headers = (
299 len(self.uid),
301 len(self.uid),
300 self.tip_rev,
302 self.tip_rev,
301 self.data_length,
303 self.data_length,
302 self.data_unused,
304 self.data_unused,
303 len(self.tip_node),
305 len(self.tip_node),
304 )
306 )
305 data.append(S_HEADER.pack(*headers))
307 data.append(S_HEADER.pack(*headers))
306 data.append(self.uid)
308 data.append(self.uid)
307 data.append(self.tip_node)
309 data.append(self.tip_node)
308 return b''.join(data)
310 return b''.join(data)
309
311
310
312
311 def _rawdata_filepath(revlog, docket):
313 def _rawdata_filepath(revlog, docket):
312 """The (vfs relative) nodemap's rawdata file for a given uid"""
314 """The (vfs relative) nodemap's rawdata file for a given uid"""
313 if revlog.nodemap_file.endswith(b'.n.a'):
315 if revlog.nodemap_file.endswith(b'.n.a'):
314 prefix = revlog.nodemap_file[:-4]
316 prefix = revlog.nodemap_file[:-4]
315 else:
317 else:
316 prefix = revlog.nodemap_file[:-2]
318 prefix = revlog.nodemap_file[:-2]
317 return b"%s-%s.nd" % (prefix, docket.uid)
319 return b"%s-%s.nd" % (prefix, docket.uid)
318
320
319
321
320 def _other_rawdata_filepath(revlog, docket):
322 def _other_rawdata_filepath(revlog, docket):
321 prefix = revlog.nodemap_file[:-2]
323 prefix = revlog.nodemap_file[:-2]
322 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
324 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
323 new_file_path = _rawdata_filepath(revlog, docket)
325 new_file_path = _rawdata_filepath(revlog, docket)
324 new_file_name = revlog.opener.basename(new_file_path)
326 new_file_name = revlog.opener.basename(new_file_path)
325 dirpath = revlog.opener.dirname(new_file_path)
327 dirpath = revlog.opener.dirname(new_file_path)
326 others = []
328 others = []
327 for f in revlog.opener.listdir(dirpath):
329 for f in revlog.opener.listdir(dirpath):
328 if pattern.match(f) and f != new_file_name:
330 if pattern.match(f) and f != new_file_name:
329 others.append(f)
331 others.append(f)
330 return others
332 return others
331
333
332
334
333 ### Nodemap Trie
335 ### Nodemap Trie
334 #
336 #
335 # This is a simple reference implementation to compute and persist a nodemap
337 # This is a simple reference implementation to compute and persist a nodemap
336 # trie. This reference implementation is write only. The python version of this
338 # trie. This reference implementation is write only. The python version of this
337 # is not expected to be actually used, since it wont provide performance
339 # is not expected to be actually used, since it wont provide performance
338 # improvement over existing non-persistent C implementation.
340 # improvement over existing non-persistent C implementation.
339 #
341 #
340 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
342 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
341 # revision can be adressed using its node shortest prefix.
343 # revision can be adressed using its node shortest prefix.
342 #
344 #
343 # The trie is stored as a sequence of block. Each block contains 16 entries
345 # The trie is stored as a sequence of block. Each block contains 16 entries
344 # (signed 64bit integer, big endian). Each entry can be one of the following:
346 # (signed 64bit integer, big endian). Each entry can be one of the following:
345 #
347 #
346 # * value >= 0 -> index of sub-block
348 # * value >= 0 -> index of sub-block
347 # * value == -1 -> no value
349 # * value == -1 -> no value
348 # * value < -1 -> a revision value: rev = -(value+10)
350 # * value < -1 -> a revision value: rev = -(value+10)
349 #
351 #
350 # The implementation focus on simplicity, not on performance. A Rust
352 # The implementation focus on simplicity, not on performance. A Rust
351 # implementation should provide a efficient version of the same binary
353 # implementation should provide a efficient version of the same binary
352 # persistence. This reference python implementation is never meant to be
354 # persistence. This reference python implementation is never meant to be
353 # extensively use in production.
355 # extensively use in production.
354
356
355
357
356 def persistent_data(index):
358 def persistent_data(index):
357 """return the persistent binary form for a nodemap for a given index
359 """return the persistent binary form for a nodemap for a given index
358 """
360 """
359 trie = _build_trie(index)
361 trie = _build_trie(index)
360 return _persist_trie(trie)
362 return _persist_trie(trie)
361
363
362
364
363 def update_persistent_data(index, root, max_idx, last_rev):
365 def update_persistent_data(index, root, max_idx, last_rev):
364 """return the incremental update for persistent nodemap from a given index
366 """return the incremental update for persistent nodemap from a given index
365 """
367 """
366 changed_block, trie = _update_trie(index, root, last_rev)
368 changed_block, trie = _update_trie(index, root, last_rev)
367 return (
369 return (
368 changed_block * S_BLOCK.size,
370 changed_block * S_BLOCK.size,
369 _persist_trie(trie, existing_idx=max_idx),
371 _persist_trie(trie, existing_idx=max_idx),
370 )
372 )
371
373
372
374
373 S_BLOCK = struct.Struct(">" + ("l" * 16))
375 S_BLOCK = struct.Struct(">" + ("l" * 16))
374
376
375 NO_ENTRY = -1
377 NO_ENTRY = -1
376 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
378 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
377 REV_OFFSET = 2
379 REV_OFFSET = 2
378
380
379
381
380 def _transform_rev(rev):
382 def _transform_rev(rev):
381 """Return the number used to represent the rev in the tree.
383 """Return the number used to represent the rev in the tree.
382
384
383 (or retrieve a rev number from such representation)
385 (or retrieve a rev number from such representation)
384
386
385 Note that this is an involution, a function equal to its inverse (i.e.
387 Note that this is an involution, a function equal to its inverse (i.e.
386 which gives the identity when applied to itself).
388 which gives the identity when applied to itself).
387 """
389 """
388 return -(rev + REV_OFFSET)
390 return -(rev + REV_OFFSET)
389
391
390
392
391 def _to_int(hex_digit):
393 def _to_int(hex_digit):
392 """turn an hexadecimal digit into a proper integer"""
394 """turn an hexadecimal digit into a proper integer"""
393 return int(hex_digit, 16)
395 return int(hex_digit, 16)
394
396
395
397
396 class Block(dict):
398 class Block(dict):
397 """represent a block of the Trie
399 """represent a block of the Trie
398
400
399 contains up to 16 entry indexed from 0 to 15"""
401 contains up to 16 entry indexed from 0 to 15"""
400
402
401 def __init__(self):
403 def __init__(self):
402 super(Block, self).__init__()
404 super(Block, self).__init__()
403 # If this block exist on disk, here is its ID
405 # If this block exist on disk, here is its ID
404 self.ondisk_id = None
406 self.ondisk_id = None
405
407
406 def __iter__(self):
408 def __iter__(self):
407 return iter(self.get(i) for i in range(16))
409 return iter(self.get(i) for i in range(16))
408
410
409
411
410 def _build_trie(index):
412 def _build_trie(index):
411 """build a nodemap trie
413 """build a nodemap trie
412
414
413 The nodemap stores revision number for each unique prefix.
415 The nodemap stores revision number for each unique prefix.
414
416
415 Each block is a dictionary with keys in `[0, 15]`. Values are either
417 Each block is a dictionary with keys in `[0, 15]`. Values are either
416 another block or a revision number.
418 another block or a revision number.
417 """
419 """
418 root = Block()
420 root = Block()
419 for rev in range(len(index)):
421 for rev in range(len(index)):
420 hex = nodemod.hex(index[rev][7])
422 hex = nodemod.hex(index[rev][7])
421 _insert_into_block(index, 0, root, rev, hex)
423 _insert_into_block(index, 0, root, rev, hex)
422 return root
424 return root
423
425
424
426
425 def _update_trie(index, root, last_rev):
427 def _update_trie(index, root, last_rev):
426 """consume"""
428 """consume"""
427 changed = 0
429 changed = 0
428 for rev in range(last_rev + 1, len(index)):
430 for rev in range(last_rev + 1, len(index)):
429 hex = nodemod.hex(index[rev][7])
431 hex = nodemod.hex(index[rev][7])
430 changed += _insert_into_block(index, 0, root, rev, hex)
432 changed += _insert_into_block(index, 0, root, rev, hex)
431 return changed, root
433 return changed, root
432
434
433
435
434 def _insert_into_block(index, level, block, current_rev, current_hex):
436 def _insert_into_block(index, level, block, current_rev, current_hex):
435 """insert a new revision in a block
437 """insert a new revision in a block
436
438
437 index: the index we are adding revision for
439 index: the index we are adding revision for
438 level: the depth of the current block in the trie
440 level: the depth of the current block in the trie
439 block: the block currently being considered
441 block: the block currently being considered
440 current_rev: the revision number we are adding
442 current_rev: the revision number we are adding
441 current_hex: the hexadecimal representation of the of that revision
443 current_hex: the hexadecimal representation of the of that revision
442 """
444 """
443 changed = 1
445 changed = 1
444 if block.ondisk_id is not None:
446 if block.ondisk_id is not None:
445 block.ondisk_id = None
447 block.ondisk_id = None
446 hex_digit = _to_int(current_hex[level : level + 1])
448 hex_digit = _to_int(current_hex[level : level + 1])
447 entry = block.get(hex_digit)
449 entry = block.get(hex_digit)
448 if entry is None:
450 if entry is None:
449 # no entry, simply store the revision number
451 # no entry, simply store the revision number
450 block[hex_digit] = current_rev
452 block[hex_digit] = current_rev
451 elif isinstance(entry, dict):
453 elif isinstance(entry, dict):
452 # need to recurse to an underlying block
454 # need to recurse to an underlying block
453 changed += _insert_into_block(
455 changed += _insert_into_block(
454 index, level + 1, entry, current_rev, current_hex
456 index, level + 1, entry, current_rev, current_hex
455 )
457 )
456 else:
458 else:
457 # collision with a previously unique prefix, inserting new
459 # collision with a previously unique prefix, inserting new
458 # vertices to fit both entry.
460 # vertices to fit both entry.
459 other_hex = nodemod.hex(index[entry][7])
461 other_hex = nodemod.hex(index[entry][7])
460 other_rev = entry
462 other_rev = entry
461 new = Block()
463 new = Block()
462 block[hex_digit] = new
464 block[hex_digit] = new
463 _insert_into_block(index, level + 1, new, other_rev, other_hex)
465 _insert_into_block(index, level + 1, new, other_rev, other_hex)
464 _insert_into_block(index, level + 1, new, current_rev, current_hex)
466 _insert_into_block(index, level + 1, new, current_rev, current_hex)
465 return changed
467 return changed
466
468
467
469
468 def _persist_trie(root, existing_idx=None):
470 def _persist_trie(root, existing_idx=None):
469 """turn a nodemap trie into persistent binary data
471 """turn a nodemap trie into persistent binary data
470
472
471 See `_build_trie` for nodemap trie structure"""
473 See `_build_trie` for nodemap trie structure"""
472 block_map = {}
474 block_map = {}
473 if existing_idx is not None:
475 if existing_idx is not None:
474 base_idx = existing_idx + 1
476 base_idx = existing_idx + 1
475 else:
477 else:
476 base_idx = 0
478 base_idx = 0
477 chunks = []
479 chunks = []
478 for tn in _walk_trie(root):
480 for tn in _walk_trie(root):
479 if tn.ondisk_id is not None:
481 if tn.ondisk_id is not None:
480 block_map[id(tn)] = tn.ondisk_id
482 block_map[id(tn)] = tn.ondisk_id
481 else:
483 else:
482 block_map[id(tn)] = len(chunks) + base_idx
484 block_map[id(tn)] = len(chunks) + base_idx
483 chunks.append(_persist_block(tn, block_map))
485 chunks.append(_persist_block(tn, block_map))
484 return b''.join(chunks)
486 return b''.join(chunks)
485
487
486
488
487 def _walk_trie(block):
489 def _walk_trie(block):
488 """yield all the block in a trie
490 """yield all the block in a trie
489
491
490 Children blocks are always yield before their parent block.
492 Children blocks are always yield before their parent block.
491 """
493 """
492 for (_, item) in sorted(block.items()):
494 for (_, item) in sorted(block.items()):
493 if isinstance(item, dict):
495 if isinstance(item, dict):
494 for sub_block in _walk_trie(item):
496 for sub_block in _walk_trie(item):
495 yield sub_block
497 yield sub_block
496 yield block
498 yield block
497
499
498
500
499 def _persist_block(block_node, block_map):
501 def _persist_block(block_node, block_map):
500 """produce persistent binary data for a single block
502 """produce persistent binary data for a single block
501
503
502 Children block are assumed to be already persisted and present in
504 Children block are assumed to be already persisted and present in
503 block_map.
505 block_map.
504 """
506 """
505 data = tuple(_to_value(v, block_map) for v in block_node)
507 data = tuple(_to_value(v, block_map) for v in block_node)
506 return S_BLOCK.pack(*data)
508 return S_BLOCK.pack(*data)
507
509
508
510
509 def _to_value(item, block_map):
511 def _to_value(item, block_map):
510 """persist any value as an integer"""
512 """persist any value as an integer"""
511 if item is None:
513 if item is None:
512 return NO_ENTRY
514 return NO_ENTRY
513 elif isinstance(item, dict):
515 elif isinstance(item, dict):
514 return block_map[id(item)]
516 return block_map[id(item)]
515 else:
517 else:
516 return _transform_rev(item)
518 return _transform_rev(item)
517
519
518
520
519 def parse_data(data):
521 def parse_data(data):
520 """parse parse nodemap data into a nodemap Trie"""
522 """parse parse nodemap data into a nodemap Trie"""
521 if (len(data) % S_BLOCK.size) != 0:
523 if (len(data) % S_BLOCK.size) != 0:
522 msg = "nodemap data size is not a multiple of block size (%d): %d"
524 msg = "nodemap data size is not a multiple of block size (%d): %d"
523 raise error.Abort(msg % (S_BLOCK.size, len(data)))
525 raise error.Abort(msg % (S_BLOCK.size, len(data)))
524 if not data:
526 if not data:
525 return Block(), None
527 return Block(), None
526 block_map = {}
528 block_map = {}
527 new_blocks = []
529 new_blocks = []
528 for i in range(0, len(data), S_BLOCK.size):
530 for i in range(0, len(data), S_BLOCK.size):
529 block = Block()
531 block = Block()
530 block.ondisk_id = len(block_map)
532 block.ondisk_id = len(block_map)
531 block_map[block.ondisk_id] = block
533 block_map[block.ondisk_id] = block
532 block_data = data[i : i + S_BLOCK.size]
534 block_data = data[i : i + S_BLOCK.size]
533 values = S_BLOCK.unpack(block_data)
535 values = S_BLOCK.unpack(block_data)
534 new_blocks.append((block, values))
536 new_blocks.append((block, values))
535 for b, values in new_blocks:
537 for b, values in new_blocks:
536 for idx, v in enumerate(values):
538 for idx, v in enumerate(values):
537 if v == NO_ENTRY:
539 if v == NO_ENTRY:
538 continue
540 continue
539 elif v >= 0:
541 elif v >= 0:
540 b[idx] = block_map[v]
542 b[idx] = block_map[v]
541 else:
543 else:
542 b[idx] = _transform_rev(v)
544 b[idx] = _transform_rev(v)
543 return block, i // S_BLOCK.size
545 return block, i // S_BLOCK.size
544
546
545
547
546 # debug utility
548 # debug utility
547
549
548
550
549 def check_data(ui, index, data):
551 def check_data(ui, index, data):
550 """verify that the provided nodemap data are valid for the given idex"""
552 """verify that the provided nodemap data are valid for the given idex"""
551 ret = 0
553 ret = 0
552 ui.status((b"revision in index: %d\n") % len(index))
554 ui.status((b"revision in index: %d\n") % len(index))
553 root, __ = parse_data(data)
555 root, __ = parse_data(data)
554 all_revs = set(_all_revisions(root))
556 all_revs = set(_all_revisions(root))
555 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
557 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
556 for r in range(len(index)):
558 for r in range(len(index)):
557 if r not in all_revs:
559 if r not in all_revs:
558 msg = b" revision missing from nodemap: %d\n" % r
560 msg = b" revision missing from nodemap: %d\n" % r
559 ui.write_err(msg)
561 ui.write_err(msg)
560 ret = 1
562 ret = 1
561 else:
563 else:
562 all_revs.remove(r)
564 all_revs.remove(r)
563 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
565 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
564 if nm_rev is None:
566 if nm_rev is None:
565 msg = b" revision node does not match any entries: %d\n" % r
567 msg = b" revision node does not match any entries: %d\n" % r
566 ui.write_err(msg)
568 ui.write_err(msg)
567 ret = 1
569 ret = 1
568 elif nm_rev != r:
570 elif nm_rev != r:
569 msg = (
571 msg = (
570 b" revision node does not match the expected revision: "
572 b" revision node does not match the expected revision: "
571 b"%d != %d\n" % (r, nm_rev)
573 b"%d != %d\n" % (r, nm_rev)
572 )
574 )
573 ui.write_err(msg)
575 ui.write_err(msg)
574 ret = 1
576 ret = 1
575
577
576 if all_revs:
578 if all_revs:
577 for r in sorted(all_revs):
579 for r in sorted(all_revs):
578 msg = b" extra revision in nodemap: %d\n" % r
580 msg = b" extra revision in nodemap: %d\n" % r
579 ui.write_err(msg)
581 ui.write_err(msg)
580 ret = 1
582 ret = 1
581 return ret
583 return ret
582
584
583
585
584 def _all_revisions(root):
586 def _all_revisions(root):
585 """return all revisions stored in a Trie"""
587 """return all revisions stored in a Trie"""
586 for block in _walk_trie(root):
588 for block in _walk_trie(root):
587 for v in block:
589 for v in block:
588 if v is None or isinstance(v, Block):
590 if v is None or isinstance(v, Block):
589 continue
591 continue
590 yield v
592 yield v
591
593
592
594
593 def _find_node(block, node):
595 def _find_node(block, node):
594 """find the revision associated with a given node"""
596 """find the revision associated with a given node"""
595 entry = block.get(_to_int(node[0:1]))
597 entry = block.get(_to_int(node[0:1]))
596 if isinstance(entry, dict):
598 if isinstance(entry, dict):
597 return _find_node(entry, node[1:])
599 return _find_node(entry, node[1:])
598 return entry
600 return entry
General Comments 0
You need to be logged in to leave comments. Login now