##// END OF EJS Templates
nodemap: document the docket attributes...
marmoute -
r44983:283fd803 default
parent child Browse files
Show More
@@ -1,554 +1,565 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from .. import (
16 from .. import (
17 error,
17 error,
18 node as nodemod,
18 node as nodemod,
19 util,
19 util,
20 )
20 )
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def persisted_data(revlog):
28 def persisted_data(revlog):
29 """read the nodemap for a revlog from disk"""
29 """read the nodemap for a revlog from disk"""
30 if revlog.nodemap_file is None:
30 if revlog.nodemap_file is None:
31 return None
31 return None
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 if not pdata:
33 if not pdata:
34 return None
34 return None
35 offset = 0
35 offset = 0
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 if version != ONDISK_VERSION:
37 if version != ONDISK_VERSION:
38 return None
38 return None
39 offset += S_VERSION.size
39 offset += S_VERSION.size
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 uid_size, tip_rev, data_length, data_unused = headers
41 uid_size, tip_rev, data_length, data_unused = headers
42 offset += S_HEADER.size
42 offset += S_HEADER.size
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket.tip_rev = tip_rev
44 docket.tip_rev = tip_rev
45 docket.data_length = data_length
45 docket.data_length = data_length
46 docket.data_unused = data_unused
46 docket.data_unused = data_unused
47
47
48 filename = _rawdata_filepath(revlog, docket)
48 filename = _rawdata_filepath(revlog, docket)
49 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
49 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
50 try:
50 try:
51 with revlog.opener(filename) as fd:
51 with revlog.opener(filename) as fd:
52 if use_mmap:
52 if use_mmap:
53 data = util.buffer(util.mmapread(fd, data_length))
53 data = util.buffer(util.mmapread(fd, data_length))
54 else:
54 else:
55 data = fd.read(data_length)
55 data = fd.read(data_length)
56 except OSError as e:
56 except OSError as e:
57 if e.errno != errno.ENOENT:
57 if e.errno != errno.ENOENT:
58 raise
58 raise
59 if len(data) < data_length:
59 if len(data) < data_length:
60 return None
60 return None
61 return docket, data
61 return docket, data
62
62
63
63
64 def setup_persistent_nodemap(tr, revlog):
64 def setup_persistent_nodemap(tr, revlog):
65 """Install whatever is needed transaction side to persist a nodemap on disk
65 """Install whatever is needed transaction side to persist a nodemap on disk
66
66
67 (only actually persist the nodemap if this is relevant for this revlog)
67 (only actually persist the nodemap if this is relevant for this revlog)
68 """
68 """
69 if revlog._inline:
69 if revlog._inline:
70 return # inlined revlog are too small for this to be relevant
70 return # inlined revlog are too small for this to be relevant
71 if revlog.nodemap_file is None:
71 if revlog.nodemap_file is None:
72 return # we do not use persistent_nodemap on this revlog
72 return # we do not use persistent_nodemap on this revlog
73 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
73 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
74 if tr.hasfinalize(callback_id):
74 if tr.hasfinalize(callback_id):
75 return # no need to register again
75 return # no need to register again
76 tr.addfinalize(
76 tr.addfinalize(
77 callback_id, lambda tr: _persist_nodemap(tr.addpostclose, revlog)
77 callback_id, lambda tr: _persist_nodemap(tr.addpostclose, revlog)
78 )
78 )
79
79
80
80
81 def update_persistent_nodemap(revlog):
81 def update_persistent_nodemap(revlog):
82 """update the persistent nodemap right now
82 """update the persistent nodemap right now
83
83
84 To be used for updating the nodemap on disk outside of a normal transaction
84 To be used for updating the nodemap on disk outside of a normal transaction
85 setup (eg, `debugupdatecache`).
85 setup (eg, `debugupdatecache`).
86 """
86 """
87 cleanups = []
87 cleanups = []
88 _persist_nodemap((lambda x, y: cleanups.append(y)), revlog)
88 _persist_nodemap((lambda x, y: cleanups.append(y)), revlog)
89 for c in cleanups:
89 for c in cleanups:
90 c(None)
90 c(None)
91
91
92
92
93 def _persist_nodemap(cleaner, revlog):
93 def _persist_nodemap(cleaner, revlog):
94 """Write nodemap data on disk for a given revlog
94 """Write nodemap data on disk for a given revlog
95 """
95 """
96 if getattr(revlog, 'filteredrevs', ()):
96 if getattr(revlog, 'filteredrevs', ()):
97 raise error.ProgrammingError(
97 raise error.ProgrammingError(
98 "cannot persist nodemap of a filtered changelog"
98 "cannot persist nodemap of a filtered changelog"
99 )
99 )
100 if revlog.nodemap_file is None:
100 if revlog.nodemap_file is None:
101 msg = "calling persist nodemap on a revlog without the feature enableb"
101 msg = "calling persist nodemap on a revlog without the feature enableb"
102 raise error.ProgrammingError(msg)
102 raise error.ProgrammingError(msg)
103
103
104 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
104 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
105 ondisk_docket = revlog._nodemap_docket
105 ondisk_docket = revlog._nodemap_docket
106 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
106 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
107 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
107 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
108
108
109 data = None
109 data = None
110 # first attemp an incremental update of the data
110 # first attemp an incremental update of the data
111 if can_incremental and ondisk_docket is not None:
111 if can_incremental and ondisk_docket is not None:
112 target_docket = revlog._nodemap_docket.copy()
112 target_docket = revlog._nodemap_docket.copy()
113 (
113 (
114 src_docket,
114 src_docket,
115 data_changed_count,
115 data_changed_count,
116 data,
116 data,
117 ) = revlog.index.nodemap_data_incremental()
117 ) = revlog.index.nodemap_data_incremental()
118 if src_docket != target_docket:
118 if src_docket != target_docket:
119 data = None
119 data = None
120 else:
120 else:
121 datafile = _rawdata_filepath(revlog, target_docket)
121 datafile = _rawdata_filepath(revlog, target_docket)
122 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
122 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
123 # store vfs
123 # store vfs
124 new_length = target_docket.data_length + len(data)
124 new_length = target_docket.data_length + len(data)
125 with revlog.opener(datafile, b'r+') as fd:
125 with revlog.opener(datafile, b'r+') as fd:
126 fd.seek(target_docket.data_length)
126 fd.seek(target_docket.data_length)
127 fd.write(data)
127 fd.write(data)
128 if feed_data:
128 if feed_data:
129 if use_mmap:
129 if use_mmap:
130 fd.seek(0)
130 fd.seek(0)
131 new_data = fd.read(new_length)
131 new_data = fd.read(new_length)
132 else:
132 else:
133 fd.flush()
133 fd.flush()
134 new_data = util.buffer(util.mmapread(fd, new_length))
134 new_data = util.buffer(util.mmapread(fd, new_length))
135 target_docket.data_length = new_length
135 target_docket.data_length = new_length
136 target_docket.data_unused += data_changed_count
136 target_docket.data_unused += data_changed_count
137
137
138 if data is None:
138 if data is None:
139 # otherwise fallback to a full new export
139 # otherwise fallback to a full new export
140 target_docket = NodeMapDocket()
140 target_docket = NodeMapDocket()
141 datafile = _rawdata_filepath(revlog, target_docket)
141 datafile = _rawdata_filepath(revlog, target_docket)
142 if util.safehasattr(revlog.index, "nodemap_data_all"):
142 if util.safehasattr(revlog.index, "nodemap_data_all"):
143 data = revlog.index.nodemap_data_all()
143 data = revlog.index.nodemap_data_all()
144 else:
144 else:
145 data = persistent_data(revlog.index)
145 data = persistent_data(revlog.index)
146 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
146 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
147 # store vfs
147 # store vfs
148 with revlog.opener(datafile, b'w+') as fd:
148 with revlog.opener(datafile, b'w+') as fd:
149 fd.write(data)
149 fd.write(data)
150 if feed_data:
150 if feed_data:
151 if use_mmap:
151 if use_mmap:
152 new_data = data
152 new_data = data
153 else:
153 else:
154 fd.flush()
154 fd.flush()
155 new_data = util.buffer(util.mmapread(fd, len(data)))
155 new_data = util.buffer(util.mmapread(fd, len(data)))
156 target_docket.data_length = len(data)
156 target_docket.data_length = len(data)
157 target_docket.tip_rev = revlog.tiprev()
157 target_docket.tip_rev = revlog.tiprev()
158 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
158 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
159 # store vfs
159 # store vfs
160 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
160 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
161 fp.write(target_docket.serialize())
161 fp.write(target_docket.serialize())
162 revlog._nodemap_docket = target_docket
162 revlog._nodemap_docket = target_docket
163 if feed_data:
163 if feed_data:
164 revlog.index.update_nodemap_data(target_docket, new_data)
164 revlog.index.update_nodemap_data(target_docket, new_data)
165
165
166 # EXP-TODO: if the transaction abort, we should remove the new data and
166 # EXP-TODO: if the transaction abort, we should remove the new data and
167 # reinstall the old one.
167 # reinstall the old one.
168
168
169 # search for old index file in all cases, some older process might have
169 # search for old index file in all cases, some older process might have
170 # left one behind.
170 # left one behind.
171 olds = _other_rawdata_filepath(revlog, target_docket)
171 olds = _other_rawdata_filepath(revlog, target_docket)
172 if olds:
172 if olds:
173 realvfs = getattr(revlog, '_realopener', revlog.opener)
173 realvfs = getattr(revlog, '_realopener', revlog.opener)
174
174
175 def cleanup(tr):
175 def cleanup(tr):
176 for oldfile in olds:
176 for oldfile in olds:
177 realvfs.tryunlink(oldfile)
177 realvfs.tryunlink(oldfile)
178
178
179 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
179 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
180 cleaner(callback_id, cleanup)
180 cleaner(callback_id, cleanup)
181
181
182
182
183 ### Nodemap docket file
183 ### Nodemap docket file
184 #
184 #
185 # The nodemap data are stored on disk using 2 files:
185 # The nodemap data are stored on disk using 2 files:
186 #
186 #
187 # * a raw data files containing a persistent nodemap
187 # * a raw data files containing a persistent nodemap
188 # (see `Nodemap Trie` section)
188 # (see `Nodemap Trie` section)
189 #
189 #
190 # * a small "docket" file containing medatadata
190 # * a small "docket" file containing medatadata
191 #
191 #
192 # While the nodemap data can be multiple tens of megabytes, the "docket" is
192 # While the nodemap data can be multiple tens of megabytes, the "docket" is
193 # small, it is easy to update it automatically or to duplicated its content
193 # small, it is easy to update it automatically or to duplicated its content
194 # during a transaction.
194 # during a transaction.
195 #
195 #
196 # Multiple raw data can exist at the same time (The currently valid one and a
196 # Multiple raw data can exist at the same time (The currently valid one and a
197 # new one beind used by an in progress transaction). To accomodate this, the
197 # new one beind used by an in progress transaction). To accomodate this, the
198 # filename hosting the raw data has a variable parts. The exact filename is
198 # filename hosting the raw data has a variable parts. The exact filename is
199 # specified inside the "docket" file.
199 # specified inside the "docket" file.
200 #
200 #
201 # The docket file contains information to find, qualify and validate the raw
201 # The docket file contains information to find, qualify and validate the raw
202 # data. Its content is currently very light, but it will expand as the on disk
202 # data. Its content is currently very light, but it will expand as the on disk
203 # nodemap gains the necessary features to be used in production.
203 # nodemap gains the necessary features to be used in production.
204
204
205 # version 0 is experimental, no BC garantee, do no use outside of tests.
205 # version 0 is experimental, no BC garantee, do no use outside of tests.
206 ONDISK_VERSION = 0
206 ONDISK_VERSION = 0
207 S_VERSION = struct.Struct(">B")
207 S_VERSION = struct.Struct(">B")
208 S_HEADER = struct.Struct(">BQQQ")
208 S_HEADER = struct.Struct(">BQQQ")
209
209
210 ID_SIZE = 8
210 ID_SIZE = 8
211
211
212
212
213 def _make_uid():
213 def _make_uid():
214 """return a new unique identifier.
214 """return a new unique identifier.
215
215
216 The identifier is random and composed of ascii characters."""
216 The identifier is random and composed of ascii characters."""
217 return nodemod.hex(os.urandom(ID_SIZE))
217 return nodemod.hex(os.urandom(ID_SIZE))
218
218
219
219
220 class NodeMapDocket(object):
220 class NodeMapDocket(object):
221 """metadata associated with persistent nodemap data
221 """metadata associated with persistent nodemap data
222
222
223 The persistent data may come from disk or be on their way to disk.
223 The persistent data may come from disk or be on their way to disk.
224 """
224 """
225
225
226 def __init__(self, uid=None):
226 def __init__(self, uid=None):
227 if uid is None:
227 if uid is None:
228 uid = _make_uid()
228 uid = _make_uid()
229 # a unique identifier for the data file:
230 # - When new data are appended, it is preserved.
231 # - When a new data file is created, a new identifier is generated.
229 self.uid = uid
232 self.uid = uid
233 # the tipmost revision stored in the data file. This revision and all
234 # revision before it are expected to be encoded in the data file.
230 self.tip_rev = None
235 self.tip_rev = None
236 # the size (in bytes) of the persisted data to encode the nodemap valid
237 # for `tip_rev`.
238 # - data file shorter than this are corrupted,
239 # - any extra data should be ignored.
231 self.data_length = None
240 self.data_length = None
241 # the amount (in bytes) of "dead" data, still in the data file but no
242 # longer used for the nodemap.
232 self.data_unused = 0
243 self.data_unused = 0
233
244
234 def copy(self):
245 def copy(self):
235 new = NodeMapDocket(uid=self.uid)
246 new = NodeMapDocket(uid=self.uid)
236 new.tip_rev = self.tip_rev
247 new.tip_rev = self.tip_rev
237 new.data_length = self.data_length
248 new.data_length = self.data_length
238 new.data_unused = self.data_unused
249 new.data_unused = self.data_unused
239 return new
250 return new
240
251
241 def __cmp__(self, other):
252 def __cmp__(self, other):
242 if self.uid < other.uid:
253 if self.uid < other.uid:
243 return -1
254 return -1
244 if self.uid > other.uid:
255 if self.uid > other.uid:
245 return 1
256 return 1
246 elif self.data_length < other.data_length:
257 elif self.data_length < other.data_length:
247 return -1
258 return -1
248 elif self.data_length > other.data_length:
259 elif self.data_length > other.data_length:
249 return 1
260 return 1
250 return 0
261 return 0
251
262
252 def __eq__(self, other):
263 def __eq__(self, other):
253 return self.uid == other.uid and self.data_length == other.data_length
264 return self.uid == other.uid and self.data_length == other.data_length
254
265
255 def serialize(self):
266 def serialize(self):
256 """return serialized bytes for a docket using the passed uid"""
267 """return serialized bytes for a docket using the passed uid"""
257 data = []
268 data = []
258 data.append(S_VERSION.pack(ONDISK_VERSION))
269 data.append(S_VERSION.pack(ONDISK_VERSION))
259 headers = (
270 headers = (
260 len(self.uid),
271 len(self.uid),
261 self.tip_rev,
272 self.tip_rev,
262 self.data_length,
273 self.data_length,
263 self.data_unused,
274 self.data_unused,
264 )
275 )
265 data.append(S_HEADER.pack(*headers))
276 data.append(S_HEADER.pack(*headers))
266 data.append(self.uid)
277 data.append(self.uid)
267 return b''.join(data)
278 return b''.join(data)
268
279
269
280
270 def _rawdata_filepath(revlog, docket):
281 def _rawdata_filepath(revlog, docket):
271 """The (vfs relative) nodemap's rawdata file for a given uid"""
282 """The (vfs relative) nodemap's rawdata file for a given uid"""
272 prefix = revlog.nodemap_file[:-2]
283 prefix = revlog.nodemap_file[:-2]
273 return b"%s-%s.nd" % (prefix, docket.uid)
284 return b"%s-%s.nd" % (prefix, docket.uid)
274
285
275
286
276 def _other_rawdata_filepath(revlog, docket):
287 def _other_rawdata_filepath(revlog, docket):
277 prefix = revlog.nodemap_file[:-2]
288 prefix = revlog.nodemap_file[:-2]
278 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
289 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
279 new_file_path = _rawdata_filepath(revlog, docket)
290 new_file_path = _rawdata_filepath(revlog, docket)
280 new_file_name = revlog.opener.basename(new_file_path)
291 new_file_name = revlog.opener.basename(new_file_path)
281 dirpath = revlog.opener.dirname(new_file_path)
292 dirpath = revlog.opener.dirname(new_file_path)
282 others = []
293 others = []
283 for f in revlog.opener.listdir(dirpath):
294 for f in revlog.opener.listdir(dirpath):
284 if pattern.match(f) and f != new_file_name:
295 if pattern.match(f) and f != new_file_name:
285 others.append(f)
296 others.append(f)
286 return others
297 return others
287
298
288
299
289 ### Nodemap Trie
300 ### Nodemap Trie
290 #
301 #
291 # This is a simple reference implementation to compute and persist a nodemap
302 # This is a simple reference implementation to compute and persist a nodemap
292 # trie. This reference implementation is write only. The python version of this
303 # trie. This reference implementation is write only. The python version of this
293 # is not expected to be actually used, since it wont provide performance
304 # is not expected to be actually used, since it wont provide performance
294 # improvement over existing non-persistent C implementation.
305 # improvement over existing non-persistent C implementation.
295 #
306 #
296 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
307 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
297 # revision can be adressed using its node shortest prefix.
308 # revision can be adressed using its node shortest prefix.
298 #
309 #
299 # The trie is stored as a sequence of block. Each block contains 16 entries
310 # The trie is stored as a sequence of block. Each block contains 16 entries
300 # (signed 64bit integer, big endian). Each entry can be one of the following:
311 # (signed 64bit integer, big endian). Each entry can be one of the following:
301 #
312 #
302 # * value >= 0 -> index of sub-block
313 # * value >= 0 -> index of sub-block
303 # * value == -1 -> no value
314 # * value == -1 -> no value
304 # * value < -1 -> a revision value: rev = -(value+10)
315 # * value < -1 -> a revision value: rev = -(value+10)
305 #
316 #
306 # The implementation focus on simplicity, not on performance. A Rust
317 # The implementation focus on simplicity, not on performance. A Rust
307 # implementation should provide a efficient version of the same binary
318 # implementation should provide a efficient version of the same binary
308 # persistence. This reference python implementation is never meant to be
319 # persistence. This reference python implementation is never meant to be
309 # extensively use in production.
320 # extensively use in production.
310
321
311
322
312 def persistent_data(index):
323 def persistent_data(index):
313 """return the persistent binary form for a nodemap for a given index
324 """return the persistent binary form for a nodemap for a given index
314 """
325 """
315 trie = _build_trie(index)
326 trie = _build_trie(index)
316 return _persist_trie(trie)
327 return _persist_trie(trie)
317
328
318
329
319 def update_persistent_data(index, root, max_idx, last_rev):
330 def update_persistent_data(index, root, max_idx, last_rev):
320 """return the incremental update for persistent nodemap from a given index
331 """return the incremental update for persistent nodemap from a given index
321 """
332 """
322 changed_block, trie = _update_trie(index, root, last_rev)
333 changed_block, trie = _update_trie(index, root, last_rev)
323 return (
334 return (
324 changed_block * S_BLOCK.size,
335 changed_block * S_BLOCK.size,
325 _persist_trie(trie, existing_idx=max_idx),
336 _persist_trie(trie, existing_idx=max_idx),
326 )
337 )
327
338
328
339
329 S_BLOCK = struct.Struct(">" + ("l" * 16))
340 S_BLOCK = struct.Struct(">" + ("l" * 16))
330
341
331 NO_ENTRY = -1
342 NO_ENTRY = -1
332 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
343 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
333 REV_OFFSET = 2
344 REV_OFFSET = 2
334
345
335
346
336 def _transform_rev(rev):
347 def _transform_rev(rev):
337 """Return the number used to represent the rev in the tree.
348 """Return the number used to represent the rev in the tree.
338
349
339 (or retrieve a rev number from such representation)
350 (or retrieve a rev number from such representation)
340
351
341 Note that this is an involution, a function equal to its inverse (i.e.
352 Note that this is an involution, a function equal to its inverse (i.e.
342 which gives the identity when applied to itself).
353 which gives the identity when applied to itself).
343 """
354 """
344 return -(rev + REV_OFFSET)
355 return -(rev + REV_OFFSET)
345
356
346
357
347 def _to_int(hex_digit):
358 def _to_int(hex_digit):
348 """turn an hexadecimal digit into a proper integer"""
359 """turn an hexadecimal digit into a proper integer"""
349 return int(hex_digit, 16)
360 return int(hex_digit, 16)
350
361
351
362
352 class Block(dict):
363 class Block(dict):
353 """represent a block of the Trie
364 """represent a block of the Trie
354
365
355 contains up to 16 entry indexed from 0 to 15"""
366 contains up to 16 entry indexed from 0 to 15"""
356
367
357 def __init__(self):
368 def __init__(self):
358 super(Block, self).__init__()
369 super(Block, self).__init__()
359 # If this block exist on disk, here is its ID
370 # If this block exist on disk, here is its ID
360 self.ondisk_id = None
371 self.ondisk_id = None
361
372
362 def __iter__(self):
373 def __iter__(self):
363 return iter(self.get(i) for i in range(16))
374 return iter(self.get(i) for i in range(16))
364
375
365
376
366 def _build_trie(index):
377 def _build_trie(index):
367 """build a nodemap trie
378 """build a nodemap trie
368
379
369 The nodemap stores revision number for each unique prefix.
380 The nodemap stores revision number for each unique prefix.
370
381
371 Each block is a dictionary with keys in `[0, 15]`. Values are either
382 Each block is a dictionary with keys in `[0, 15]`. Values are either
372 another block or a revision number.
383 another block or a revision number.
373 """
384 """
374 root = Block()
385 root = Block()
375 for rev in range(len(index)):
386 for rev in range(len(index)):
376 hex = nodemod.hex(index[rev][7])
387 hex = nodemod.hex(index[rev][7])
377 _insert_into_block(index, 0, root, rev, hex)
388 _insert_into_block(index, 0, root, rev, hex)
378 return root
389 return root
379
390
380
391
381 def _update_trie(index, root, last_rev):
392 def _update_trie(index, root, last_rev):
382 """consume"""
393 """consume"""
383 changed = 0
394 changed = 0
384 for rev in range(last_rev + 1, len(index)):
395 for rev in range(last_rev + 1, len(index)):
385 hex = nodemod.hex(index[rev][7])
396 hex = nodemod.hex(index[rev][7])
386 changed += _insert_into_block(index, 0, root, rev, hex)
397 changed += _insert_into_block(index, 0, root, rev, hex)
387 return changed, root
398 return changed, root
388
399
389
400
390 def _insert_into_block(index, level, block, current_rev, current_hex):
401 def _insert_into_block(index, level, block, current_rev, current_hex):
391 """insert a new revision in a block
402 """insert a new revision in a block
392
403
393 index: the index we are adding revision for
404 index: the index we are adding revision for
394 level: the depth of the current block in the trie
405 level: the depth of the current block in the trie
395 block: the block currently being considered
406 block: the block currently being considered
396 current_rev: the revision number we are adding
407 current_rev: the revision number we are adding
397 current_hex: the hexadecimal representation of the of that revision
408 current_hex: the hexadecimal representation of the of that revision
398 """
409 """
399 changed = 1
410 changed = 1
400 if block.ondisk_id is not None:
411 if block.ondisk_id is not None:
401 block.ondisk_id = None
412 block.ondisk_id = None
402 hex_digit = _to_int(current_hex[level : level + 1])
413 hex_digit = _to_int(current_hex[level : level + 1])
403 entry = block.get(hex_digit)
414 entry = block.get(hex_digit)
404 if entry is None:
415 if entry is None:
405 # no entry, simply store the revision number
416 # no entry, simply store the revision number
406 block[hex_digit] = current_rev
417 block[hex_digit] = current_rev
407 elif isinstance(entry, dict):
418 elif isinstance(entry, dict):
408 # need to recurse to an underlying block
419 # need to recurse to an underlying block
409 changed += _insert_into_block(
420 changed += _insert_into_block(
410 index, level + 1, entry, current_rev, current_hex
421 index, level + 1, entry, current_rev, current_hex
411 )
422 )
412 else:
423 else:
413 # collision with a previously unique prefix, inserting new
424 # collision with a previously unique prefix, inserting new
414 # vertices to fit both entry.
425 # vertices to fit both entry.
415 other_hex = nodemod.hex(index[entry][7])
426 other_hex = nodemod.hex(index[entry][7])
416 other_rev = entry
427 other_rev = entry
417 new = Block()
428 new = Block()
418 block[hex_digit] = new
429 block[hex_digit] = new
419 _insert_into_block(index, level + 1, new, other_rev, other_hex)
430 _insert_into_block(index, level + 1, new, other_rev, other_hex)
420 _insert_into_block(index, level + 1, new, current_rev, current_hex)
431 _insert_into_block(index, level + 1, new, current_rev, current_hex)
421 return changed
432 return changed
422
433
423
434
424 def _persist_trie(root, existing_idx=None):
435 def _persist_trie(root, existing_idx=None):
425 """turn a nodemap trie into persistent binary data
436 """turn a nodemap trie into persistent binary data
426
437
427 See `_build_trie` for nodemap trie structure"""
438 See `_build_trie` for nodemap trie structure"""
428 block_map = {}
439 block_map = {}
429 if existing_idx is not None:
440 if existing_idx is not None:
430 base_idx = existing_idx + 1
441 base_idx = existing_idx + 1
431 else:
442 else:
432 base_idx = 0
443 base_idx = 0
433 chunks = []
444 chunks = []
434 for tn in _walk_trie(root):
445 for tn in _walk_trie(root):
435 if tn.ondisk_id is not None:
446 if tn.ondisk_id is not None:
436 block_map[id(tn)] = tn.ondisk_id
447 block_map[id(tn)] = tn.ondisk_id
437 else:
448 else:
438 block_map[id(tn)] = len(chunks) + base_idx
449 block_map[id(tn)] = len(chunks) + base_idx
439 chunks.append(_persist_block(tn, block_map))
450 chunks.append(_persist_block(tn, block_map))
440 return b''.join(chunks)
451 return b''.join(chunks)
441
452
442
453
443 def _walk_trie(block):
454 def _walk_trie(block):
444 """yield all the block in a trie
455 """yield all the block in a trie
445
456
446 Children blocks are always yield before their parent block.
457 Children blocks are always yield before their parent block.
447 """
458 """
448 for (_, item) in sorted(block.items()):
459 for (_, item) in sorted(block.items()):
449 if isinstance(item, dict):
460 if isinstance(item, dict):
450 for sub_block in _walk_trie(item):
461 for sub_block in _walk_trie(item):
451 yield sub_block
462 yield sub_block
452 yield block
463 yield block
453
464
454
465
455 def _persist_block(block_node, block_map):
466 def _persist_block(block_node, block_map):
456 """produce persistent binary data for a single block
467 """produce persistent binary data for a single block
457
468
458 Children block are assumed to be already persisted and present in
469 Children block are assumed to be already persisted and present in
459 block_map.
470 block_map.
460 """
471 """
461 data = tuple(_to_value(v, block_map) for v in block_node)
472 data = tuple(_to_value(v, block_map) for v in block_node)
462 return S_BLOCK.pack(*data)
473 return S_BLOCK.pack(*data)
463
474
464
475
465 def _to_value(item, block_map):
476 def _to_value(item, block_map):
466 """persist any value as an integer"""
477 """persist any value as an integer"""
467 if item is None:
478 if item is None:
468 return NO_ENTRY
479 return NO_ENTRY
469 elif isinstance(item, dict):
480 elif isinstance(item, dict):
470 return block_map[id(item)]
481 return block_map[id(item)]
471 else:
482 else:
472 return _transform_rev(item)
483 return _transform_rev(item)
473
484
474
485
475 def parse_data(data):
486 def parse_data(data):
476 """parse parse nodemap data into a nodemap Trie"""
487 """parse parse nodemap data into a nodemap Trie"""
477 if (len(data) % S_BLOCK.size) != 0:
488 if (len(data) % S_BLOCK.size) != 0:
478 msg = "nodemap data size is not a multiple of block size (%d): %d"
489 msg = "nodemap data size is not a multiple of block size (%d): %d"
479 raise error.Abort(msg % (S_BLOCK.size, len(data)))
490 raise error.Abort(msg % (S_BLOCK.size, len(data)))
480 if not data:
491 if not data:
481 return Block(), None
492 return Block(), None
482 block_map = {}
493 block_map = {}
483 new_blocks = []
494 new_blocks = []
484 for i in range(0, len(data), S_BLOCK.size):
495 for i in range(0, len(data), S_BLOCK.size):
485 block = Block()
496 block = Block()
486 block.ondisk_id = len(block_map)
497 block.ondisk_id = len(block_map)
487 block_map[block.ondisk_id] = block
498 block_map[block.ondisk_id] = block
488 block_data = data[i : i + S_BLOCK.size]
499 block_data = data[i : i + S_BLOCK.size]
489 values = S_BLOCK.unpack(block_data)
500 values = S_BLOCK.unpack(block_data)
490 new_blocks.append((block, values))
501 new_blocks.append((block, values))
491 for b, values in new_blocks:
502 for b, values in new_blocks:
492 for idx, v in enumerate(values):
503 for idx, v in enumerate(values):
493 if v == NO_ENTRY:
504 if v == NO_ENTRY:
494 continue
505 continue
495 elif v >= 0:
506 elif v >= 0:
496 b[idx] = block_map[v]
507 b[idx] = block_map[v]
497 else:
508 else:
498 b[idx] = _transform_rev(v)
509 b[idx] = _transform_rev(v)
499 return block, i // S_BLOCK.size
510 return block, i // S_BLOCK.size
500
511
501
512
502 # debug utility
513 # debug utility
503
514
504
515
505 def check_data(ui, index, data):
516 def check_data(ui, index, data):
506 """verify that the provided nodemap data are valid for the given idex"""
517 """verify that the provided nodemap data are valid for the given idex"""
507 ret = 0
518 ret = 0
508 ui.status((b"revision in index: %d\n") % len(index))
519 ui.status((b"revision in index: %d\n") % len(index))
509 root, __ = parse_data(data)
520 root, __ = parse_data(data)
510 all_revs = set(_all_revisions(root))
521 all_revs = set(_all_revisions(root))
511 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
522 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
512 for r in range(len(index)):
523 for r in range(len(index)):
513 if r not in all_revs:
524 if r not in all_revs:
514 msg = b" revision missing from nodemap: %d\n" % r
525 msg = b" revision missing from nodemap: %d\n" % r
515 ui.write_err(msg)
526 ui.write_err(msg)
516 ret = 1
527 ret = 1
517 else:
528 else:
518 all_revs.remove(r)
529 all_revs.remove(r)
519 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
530 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
520 if nm_rev is None:
531 if nm_rev is None:
521 msg = b" revision node does not match any entries: %d\n" % r
532 msg = b" revision node does not match any entries: %d\n" % r
522 ui.write_err(msg)
533 ui.write_err(msg)
523 ret = 1
534 ret = 1
524 elif nm_rev != r:
535 elif nm_rev != r:
525 msg = (
536 msg = (
526 b" revision node does not match the expected revision: "
537 b" revision node does not match the expected revision: "
527 b"%d != %d\n" % (r, nm_rev)
538 b"%d != %d\n" % (r, nm_rev)
528 )
539 )
529 ui.write_err(msg)
540 ui.write_err(msg)
530 ret = 1
541 ret = 1
531
542
532 if all_revs:
543 if all_revs:
533 for r in sorted(all_revs):
544 for r in sorted(all_revs):
534 msg = b" extra revision in nodemap: %d\n" % r
545 msg = b" extra revision in nodemap: %d\n" % r
535 ui.write_err(msg)
546 ui.write_err(msg)
536 ret = 1
547 ret = 1
537 return ret
548 return ret
538
549
539
550
540 def _all_revisions(root):
551 def _all_revisions(root):
541 """return all revisions stored in a Trie"""
552 """return all revisions stored in a Trie"""
542 for block in _walk_trie(root):
553 for block in _walk_trie(root):
543 for v in block:
554 for v in block:
544 if v is None or isinstance(v, Block):
555 if v is None or isinstance(v, Block):
545 continue
556 continue
546 yield v
557 yield v
547
558
548
559
549 def _find_node(block, node):
560 def _find_node(block, node):
550 """find the revision associated with a given node"""
561 """find the revision associated with a given node"""
551 entry = block.get(_to_int(node[0:1]))
562 entry = block.get(_to_int(node[0:1]))
552 if isinstance(entry, dict):
563 if isinstance(entry, dict):
553 return _find_node(entry, node[1:])
564 return _find_node(entry, node[1:])
554 return entry
565 return entry
General Comments 0
You need to be logged in to leave comments. Login now