##// END OF EJS Templates
nodemap: never read more than the expected data amount...
marmoute -
r44811:c7eebdb1 default
parent child Browse files
Show More
@@ -1,508 +1,513
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import os
11 import os
12 import re
12 import re
13 import struct
13 import struct
14
14
15 from .. import (
15 from .. import (
16 error,
16 error,
17 node as nodemod,
17 node as nodemod,
18 util,
18 util,
19 )
19 )
20
20
21
21
22 class NodeMap(dict):
22 class NodeMap(dict):
23 def __missing__(self, x):
23 def __missing__(self, x):
24 raise error.RevlogError(b'unknown node: %s' % x)
24 raise error.RevlogError(b'unknown node: %s' % x)
25
25
26
26
27 def persisted_data(revlog):
27 def persisted_data(revlog):
28 """read the nodemap for a revlog from disk"""
28 """read the nodemap for a revlog from disk"""
29 if revlog.nodemap_file is None:
29 if revlog.nodemap_file is None:
30 return None
30 return None
31 pdata = revlog.opener.tryread(revlog.nodemap_file)
31 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 if not pdata:
32 if not pdata:
33 return None
33 return None
34 offset = 0
34 offset = 0
35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 if version != ONDISK_VERSION:
36 if version != ONDISK_VERSION:
37 return None
37 return None
38 offset += S_VERSION.size
38 offset += S_VERSION.size
39 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
39 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 uid_size, tip_rev, data_length, data_unused = headers
40 uid_size, tip_rev, data_length, data_unused = headers
41 offset += S_HEADER.size
41 offset += S_HEADER.size
42 docket = NodeMapDocket(pdata[offset : offset + uid_size])
42 docket = NodeMapDocket(pdata[offset : offset + uid_size])
43 docket.tip_rev = tip_rev
43 docket.tip_rev = tip_rev
44 docket.data_length = data_length
44 docket.data_length = data_length
45 docket.data_unused = data_unused
45 docket.data_unused = data_unused
46
46
47 filename = _rawdata_filepath(revlog, docket)
47 filename = _rawdata_filepath(revlog, docket)
48 return docket, revlog.opener.tryread(filename)
48 data = revlog.opener.tryread(filename)
49 if len(data) < data_length:
50 return None
51 elif len(data) > data_length:
52 data = data[:data_length]
53 return docket, data
49
54
50
55
51 def setup_persistent_nodemap(tr, revlog):
56 def setup_persistent_nodemap(tr, revlog):
52 """Install whatever is needed transaction side to persist a nodemap on disk
57 """Install whatever is needed transaction side to persist a nodemap on disk
53
58
54 (only actually persist the nodemap if this is relevant for this revlog)
59 (only actually persist the nodemap if this is relevant for this revlog)
55 """
60 """
56 if revlog._inline:
61 if revlog._inline:
57 return # inlined revlog are too small for this to be relevant
62 return # inlined revlog are too small for this to be relevant
58 if revlog.nodemap_file is None:
63 if revlog.nodemap_file is None:
59 return # we do not use persistent_nodemap on this revlog
64 return # we do not use persistent_nodemap on this revlog
60 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
65 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
61 if tr.hasfinalize(callback_id):
66 if tr.hasfinalize(callback_id):
62 return # no need to register again
67 return # no need to register again
63 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
68 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
64
69
65
70
66 def _persist_nodemap(tr, revlog):
71 def _persist_nodemap(tr, revlog):
67 """Write nodemap data on disk for a given revlog
72 """Write nodemap data on disk for a given revlog
68 """
73 """
69 if getattr(revlog, 'filteredrevs', ()):
74 if getattr(revlog, 'filteredrevs', ()):
70 raise error.ProgrammingError(
75 raise error.ProgrammingError(
71 "cannot persist nodemap of a filtered changelog"
76 "cannot persist nodemap of a filtered changelog"
72 )
77 )
73 if revlog.nodemap_file is None:
78 if revlog.nodemap_file is None:
74 msg = "calling persist nodemap on a revlog without the feature enableb"
79 msg = "calling persist nodemap on a revlog without the feature enableb"
75 raise error.ProgrammingError(msg)
80 raise error.ProgrammingError(msg)
76
81
77 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
82 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
78 ondisk_docket = revlog._nodemap_docket
83 ondisk_docket = revlog._nodemap_docket
79
84
80 data = None
85 data = None
81 # first attemp an incremental update of the data
86 # first attemp an incremental update of the data
82 if can_incremental and ondisk_docket is not None:
87 if can_incremental and ondisk_docket is not None:
83 target_docket = revlog._nodemap_docket.copy()
88 target_docket = revlog._nodemap_docket.copy()
84 (
89 (
85 src_docket,
90 src_docket,
86 data_changed_count,
91 data_changed_count,
87 data,
92 data,
88 ) = revlog.index.nodemap_data_incremental()
93 ) = revlog.index.nodemap_data_incremental()
89 if src_docket != target_docket:
94 if src_docket != target_docket:
90 data = None
95 data = None
91 else:
96 else:
92 datafile = _rawdata_filepath(revlog, target_docket)
97 datafile = _rawdata_filepath(revlog, target_docket)
93 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
98 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
94 # store vfs
99 # store vfs
95 with revlog.opener(datafile, b'r+') as fd:
100 with revlog.opener(datafile, b'r+') as fd:
96 fd.seek(target_docket.data_length)
101 fd.seek(target_docket.data_length)
97 fd.write(data)
102 fd.write(data)
98 target_docket.data_length += len(data)
103 target_docket.data_length += len(data)
99 target_docket.data_unused += data_changed_count
104 target_docket.data_unused += data_changed_count
100
105
101 if data is None:
106 if data is None:
102 # otherwise fallback to a full new export
107 # otherwise fallback to a full new export
103 target_docket = NodeMapDocket()
108 target_docket = NodeMapDocket()
104 datafile = _rawdata_filepath(revlog, target_docket)
109 datafile = _rawdata_filepath(revlog, target_docket)
105 if util.safehasattr(revlog.index, "nodemap_data_all"):
110 if util.safehasattr(revlog.index, "nodemap_data_all"):
106 data = revlog.index.nodemap_data_all()
111 data = revlog.index.nodemap_data_all()
107 else:
112 else:
108 data = persistent_data(revlog.index)
113 data = persistent_data(revlog.index)
109 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
114 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
110 # store vfs
115 # store vfs
111 with revlog.opener(datafile, b'w') as fd:
116 with revlog.opener(datafile, b'w') as fd:
112 fd.write(data)
117 fd.write(data)
113 target_docket.data_length = len(data)
118 target_docket.data_length = len(data)
114 target_docket.tip_rev = revlog.tiprev()
119 target_docket.tip_rev = revlog.tiprev()
115 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
120 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
116 # store vfs
121 # store vfs
117 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
122 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
118 fp.write(target_docket.serialize())
123 fp.write(target_docket.serialize())
119 revlog._nodemap_docket = target_docket
124 revlog._nodemap_docket = target_docket
120 # EXP-TODO: if the transaction abort, we should remove the new data and
125 # EXP-TODO: if the transaction abort, we should remove the new data and
121 # reinstall the old one.
126 # reinstall the old one.
122
127
123 # search for old index file in all cases, some older process might have
128 # search for old index file in all cases, some older process might have
124 # left one behind.
129 # left one behind.
125 olds = _other_rawdata_filepath(revlog, target_docket)
130 olds = _other_rawdata_filepath(revlog, target_docket)
126 if olds:
131 if olds:
127 realvfs = getattr(revlog, '_realopener', revlog.opener)
132 realvfs = getattr(revlog, '_realopener', revlog.opener)
128
133
129 def cleanup(tr):
134 def cleanup(tr):
130 for oldfile in olds:
135 for oldfile in olds:
131 realvfs.tryunlink(oldfile)
136 realvfs.tryunlink(oldfile)
132
137
133 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
138 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
134 tr.addpostclose(callback_id, cleanup)
139 tr.addpostclose(callback_id, cleanup)
135
140
136
141
137 ### Nodemap docket file
142 ### Nodemap docket file
138 #
143 #
139 # The nodemap data are stored on disk using 2 files:
144 # The nodemap data are stored on disk using 2 files:
140 #
145 #
141 # * a raw data files containing a persistent nodemap
146 # * a raw data files containing a persistent nodemap
142 # (see `Nodemap Trie` section)
147 # (see `Nodemap Trie` section)
143 #
148 #
144 # * a small "docket" file containing medatadata
149 # * a small "docket" file containing medatadata
145 #
150 #
146 # While the nodemap data can be multiple tens of megabytes, the "docket" is
151 # While the nodemap data can be multiple tens of megabytes, the "docket" is
147 # small, it is easy to update it automatically or to duplicated its content
152 # small, it is easy to update it automatically or to duplicated its content
148 # during a transaction.
153 # during a transaction.
149 #
154 #
150 # Multiple raw data can exist at the same time (The currently valid one and a
155 # Multiple raw data can exist at the same time (The currently valid one and a
151 # new one beind used by an in progress transaction). To accomodate this, the
156 # new one beind used by an in progress transaction). To accomodate this, the
152 # filename hosting the raw data has a variable parts. The exact filename is
157 # filename hosting the raw data has a variable parts. The exact filename is
153 # specified inside the "docket" file.
158 # specified inside the "docket" file.
154 #
159 #
155 # The docket file contains information to find, qualify and validate the raw
160 # The docket file contains information to find, qualify and validate the raw
156 # data. Its content is currently very light, but it will expand as the on disk
161 # data. Its content is currently very light, but it will expand as the on disk
157 # nodemap gains the necessary features to be used in production.
162 # nodemap gains the necessary features to be used in production.
158
163
159 # version 0 is experimental, no BC garantee, do no use outside of tests.
164 # version 0 is experimental, no BC garantee, do no use outside of tests.
160 ONDISK_VERSION = 0
165 ONDISK_VERSION = 0
161 S_VERSION = struct.Struct(">B")
166 S_VERSION = struct.Struct(">B")
162 S_HEADER = struct.Struct(">BQQQ")
167 S_HEADER = struct.Struct(">BQQQ")
163
168
164 ID_SIZE = 8
169 ID_SIZE = 8
165
170
166
171
167 def _make_uid():
172 def _make_uid():
168 """return a new unique identifier.
173 """return a new unique identifier.
169
174
170 The identifier is random and composed of ascii characters."""
175 The identifier is random and composed of ascii characters."""
171 return nodemod.hex(os.urandom(ID_SIZE))
176 return nodemod.hex(os.urandom(ID_SIZE))
172
177
173
178
174 class NodeMapDocket(object):
179 class NodeMapDocket(object):
175 """metadata associated with persistent nodemap data
180 """metadata associated with persistent nodemap data
176
181
177 The persistent data may come from disk or be on their way to disk.
182 The persistent data may come from disk or be on their way to disk.
178 """
183 """
179
184
180 def __init__(self, uid=None):
185 def __init__(self, uid=None):
181 if uid is None:
186 if uid is None:
182 uid = _make_uid()
187 uid = _make_uid()
183 self.uid = uid
188 self.uid = uid
184 self.tip_rev = None
189 self.tip_rev = None
185 self.data_length = None
190 self.data_length = None
186 self.data_unused = 0
191 self.data_unused = 0
187
192
188 def copy(self):
193 def copy(self):
189 new = NodeMapDocket(uid=self.uid)
194 new = NodeMapDocket(uid=self.uid)
190 new.tip_rev = self.tip_rev
195 new.tip_rev = self.tip_rev
191 new.data_length = self.data_length
196 new.data_length = self.data_length
192 new.data_unused = self.data_unused
197 new.data_unused = self.data_unused
193 return new
198 return new
194
199
195 def __cmp__(self, other):
200 def __cmp__(self, other):
196 if self.uid < other.uid:
201 if self.uid < other.uid:
197 return -1
202 return -1
198 if self.uid > other.uid:
203 if self.uid > other.uid:
199 return 1
204 return 1
200 elif self.data_length < other.data_length:
205 elif self.data_length < other.data_length:
201 return -1
206 return -1
202 elif self.data_length > other.data_length:
207 elif self.data_length > other.data_length:
203 return 1
208 return 1
204 return 0
209 return 0
205
210
206 def __eq__(self, other):
211 def __eq__(self, other):
207 return self.uid == other.uid and self.data_length == other.data_length
212 return self.uid == other.uid and self.data_length == other.data_length
208
213
209 def serialize(self):
214 def serialize(self):
210 """return serialized bytes for a docket using the passed uid"""
215 """return serialized bytes for a docket using the passed uid"""
211 data = []
216 data = []
212 data.append(S_VERSION.pack(ONDISK_VERSION))
217 data.append(S_VERSION.pack(ONDISK_VERSION))
213 headers = (
218 headers = (
214 len(self.uid),
219 len(self.uid),
215 self.tip_rev,
220 self.tip_rev,
216 self.data_length,
221 self.data_length,
217 self.data_unused,
222 self.data_unused,
218 )
223 )
219 data.append(S_HEADER.pack(*headers))
224 data.append(S_HEADER.pack(*headers))
220 data.append(self.uid)
225 data.append(self.uid)
221 return b''.join(data)
226 return b''.join(data)
222
227
223
228
224 def _rawdata_filepath(revlog, docket):
229 def _rawdata_filepath(revlog, docket):
225 """The (vfs relative) nodemap's rawdata file for a given uid"""
230 """The (vfs relative) nodemap's rawdata file for a given uid"""
226 prefix = revlog.nodemap_file[:-2]
231 prefix = revlog.nodemap_file[:-2]
227 return b"%s-%s.nd" % (prefix, docket.uid)
232 return b"%s-%s.nd" % (prefix, docket.uid)
228
233
229
234
230 def _other_rawdata_filepath(revlog, docket):
235 def _other_rawdata_filepath(revlog, docket):
231 prefix = revlog.nodemap_file[:-2]
236 prefix = revlog.nodemap_file[:-2]
232 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
237 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
233 new_file_path = _rawdata_filepath(revlog, docket)
238 new_file_path = _rawdata_filepath(revlog, docket)
234 new_file_name = revlog.opener.basename(new_file_path)
239 new_file_name = revlog.opener.basename(new_file_path)
235 dirpath = revlog.opener.dirname(new_file_path)
240 dirpath = revlog.opener.dirname(new_file_path)
236 others = []
241 others = []
237 for f in revlog.opener.listdir(dirpath):
242 for f in revlog.opener.listdir(dirpath):
238 if pattern.match(f) and f != new_file_name:
243 if pattern.match(f) and f != new_file_name:
239 others.append(f)
244 others.append(f)
240 return others
245 return others
241
246
242
247
243 ### Nodemap Trie
248 ### Nodemap Trie
244 #
249 #
245 # This is a simple reference implementation to compute and persist a nodemap
250 # This is a simple reference implementation to compute and persist a nodemap
246 # trie. This reference implementation is write only. The python version of this
251 # trie. This reference implementation is write only. The python version of this
247 # is not expected to be actually used, since it wont provide performance
252 # is not expected to be actually used, since it wont provide performance
248 # improvement over existing non-persistent C implementation.
253 # improvement over existing non-persistent C implementation.
249 #
254 #
250 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
255 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
251 # revision can be adressed using its node shortest prefix.
256 # revision can be adressed using its node shortest prefix.
252 #
257 #
253 # The trie is stored as a sequence of block. Each block contains 16 entries
258 # The trie is stored as a sequence of block. Each block contains 16 entries
254 # (signed 64bit integer, big endian). Each entry can be one of the following:
259 # (signed 64bit integer, big endian). Each entry can be one of the following:
255 #
260 #
256 # * value >= 0 -> index of sub-block
261 # * value >= 0 -> index of sub-block
257 # * value == -1 -> no value
262 # * value == -1 -> no value
258 # * value < -1 -> a revision value: rev = -(value+10)
263 # * value < -1 -> a revision value: rev = -(value+10)
259 #
264 #
260 # The implementation focus on simplicity, not on performance. A Rust
265 # The implementation focus on simplicity, not on performance. A Rust
261 # implementation should provide a efficient version of the same binary
266 # implementation should provide a efficient version of the same binary
262 # persistence. This reference python implementation is never meant to be
267 # persistence. This reference python implementation is never meant to be
263 # extensively use in production.
268 # extensively use in production.
264
269
265
270
266 def persistent_data(index):
271 def persistent_data(index):
267 """return the persistent binary form for a nodemap for a given index
272 """return the persistent binary form for a nodemap for a given index
268 """
273 """
269 trie = _build_trie(index)
274 trie = _build_trie(index)
270 return _persist_trie(trie)
275 return _persist_trie(trie)
271
276
272
277
273 def update_persistent_data(index, root, max_idx, last_rev):
278 def update_persistent_data(index, root, max_idx, last_rev):
274 """return the incremental update for persistent nodemap from a given index
279 """return the incremental update for persistent nodemap from a given index
275 """
280 """
276 changed_block, trie = _update_trie(index, root, last_rev)
281 changed_block, trie = _update_trie(index, root, last_rev)
277 return (
282 return (
278 changed_block * S_BLOCK.size,
283 changed_block * S_BLOCK.size,
279 _persist_trie(trie, existing_idx=max_idx),
284 _persist_trie(trie, existing_idx=max_idx),
280 )
285 )
281
286
282
287
283 S_BLOCK = struct.Struct(">" + ("l" * 16))
288 S_BLOCK = struct.Struct(">" + ("l" * 16))
284
289
285 NO_ENTRY = -1
290 NO_ENTRY = -1
286 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
291 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
287 REV_OFFSET = 2
292 REV_OFFSET = 2
288
293
289
294
290 def _transform_rev(rev):
295 def _transform_rev(rev):
291 """Return the number used to represent the rev in the tree.
296 """Return the number used to represent the rev in the tree.
292
297
293 (or retrieve a rev number from such representation)
298 (or retrieve a rev number from such representation)
294
299
295 Note that this is an involution, a function equal to its inverse (i.e.
300 Note that this is an involution, a function equal to its inverse (i.e.
296 which gives the identity when applied to itself).
301 which gives the identity when applied to itself).
297 """
302 """
298 return -(rev + REV_OFFSET)
303 return -(rev + REV_OFFSET)
299
304
300
305
301 def _to_int(hex_digit):
306 def _to_int(hex_digit):
302 """turn an hexadecimal digit into a proper integer"""
307 """turn an hexadecimal digit into a proper integer"""
303 return int(hex_digit, 16)
308 return int(hex_digit, 16)
304
309
305
310
306 class Block(dict):
311 class Block(dict):
307 """represent a block of the Trie
312 """represent a block of the Trie
308
313
309 contains up to 16 entry indexed from 0 to 15"""
314 contains up to 16 entry indexed from 0 to 15"""
310
315
311 def __init__(self):
316 def __init__(self):
312 super(Block, self).__init__()
317 super(Block, self).__init__()
313 # If this block exist on disk, here is its ID
318 # If this block exist on disk, here is its ID
314 self.ondisk_id = None
319 self.ondisk_id = None
315
320
316 def __iter__(self):
321 def __iter__(self):
317 return iter(self.get(i) for i in range(16))
322 return iter(self.get(i) for i in range(16))
318
323
319
324
320 def _build_trie(index):
325 def _build_trie(index):
321 """build a nodemap trie
326 """build a nodemap trie
322
327
323 The nodemap stores revision number for each unique prefix.
328 The nodemap stores revision number for each unique prefix.
324
329
325 Each block is a dictionary with keys in `[0, 15]`. Values are either
330 Each block is a dictionary with keys in `[0, 15]`. Values are either
326 another block or a revision number.
331 another block or a revision number.
327 """
332 """
328 root = Block()
333 root = Block()
329 for rev in range(len(index)):
334 for rev in range(len(index)):
330 hex = nodemod.hex(index[rev][7])
335 hex = nodemod.hex(index[rev][7])
331 _insert_into_block(index, 0, root, rev, hex)
336 _insert_into_block(index, 0, root, rev, hex)
332 return root
337 return root
333
338
334
339
335 def _update_trie(index, root, last_rev):
340 def _update_trie(index, root, last_rev):
336 """consume"""
341 """consume"""
337 changed = 0
342 changed = 0
338 for rev in range(last_rev + 1, len(index)):
343 for rev in range(last_rev + 1, len(index)):
339 hex = nodemod.hex(index[rev][7])
344 hex = nodemod.hex(index[rev][7])
340 changed += _insert_into_block(index, 0, root, rev, hex)
345 changed += _insert_into_block(index, 0, root, rev, hex)
341 return changed, root
346 return changed, root
342
347
343
348
344 def _insert_into_block(index, level, block, current_rev, current_hex):
349 def _insert_into_block(index, level, block, current_rev, current_hex):
345 """insert a new revision in a block
350 """insert a new revision in a block
346
351
347 index: the index we are adding revision for
352 index: the index we are adding revision for
348 level: the depth of the current block in the trie
353 level: the depth of the current block in the trie
349 block: the block currently being considered
354 block: the block currently being considered
350 current_rev: the revision number we are adding
355 current_rev: the revision number we are adding
351 current_hex: the hexadecimal representation of the of that revision
356 current_hex: the hexadecimal representation of the of that revision
352 """
357 """
353 changed = 1
358 changed = 1
354 if block.ondisk_id is not None:
359 if block.ondisk_id is not None:
355 block.ondisk_id = None
360 block.ondisk_id = None
356 hex_digit = _to_int(current_hex[level : level + 1])
361 hex_digit = _to_int(current_hex[level : level + 1])
357 entry = block.get(hex_digit)
362 entry = block.get(hex_digit)
358 if entry is None:
363 if entry is None:
359 # no entry, simply store the revision number
364 # no entry, simply store the revision number
360 block[hex_digit] = current_rev
365 block[hex_digit] = current_rev
361 elif isinstance(entry, dict):
366 elif isinstance(entry, dict):
362 # need to recurse to an underlying block
367 # need to recurse to an underlying block
363 changed += _insert_into_block(
368 changed += _insert_into_block(
364 index, level + 1, entry, current_rev, current_hex
369 index, level + 1, entry, current_rev, current_hex
365 )
370 )
366 else:
371 else:
367 # collision with a previously unique prefix, inserting new
372 # collision with a previously unique prefix, inserting new
368 # vertices to fit both entry.
373 # vertices to fit both entry.
369 other_hex = nodemod.hex(index[entry][7])
374 other_hex = nodemod.hex(index[entry][7])
370 other_rev = entry
375 other_rev = entry
371 new = Block()
376 new = Block()
372 block[hex_digit] = new
377 block[hex_digit] = new
373 _insert_into_block(index, level + 1, new, other_rev, other_hex)
378 _insert_into_block(index, level + 1, new, other_rev, other_hex)
374 _insert_into_block(index, level + 1, new, current_rev, current_hex)
379 _insert_into_block(index, level + 1, new, current_rev, current_hex)
375 return changed
380 return changed
376
381
377
382
378 def _persist_trie(root, existing_idx=None):
383 def _persist_trie(root, existing_idx=None):
379 """turn a nodemap trie into persistent binary data
384 """turn a nodemap trie into persistent binary data
380
385
381 See `_build_trie` for nodemap trie structure"""
386 See `_build_trie` for nodemap trie structure"""
382 block_map = {}
387 block_map = {}
383 if existing_idx is not None:
388 if existing_idx is not None:
384 base_idx = existing_idx + 1
389 base_idx = existing_idx + 1
385 else:
390 else:
386 base_idx = 0
391 base_idx = 0
387 chunks = []
392 chunks = []
388 for tn in _walk_trie(root):
393 for tn in _walk_trie(root):
389 if tn.ondisk_id is not None:
394 if tn.ondisk_id is not None:
390 block_map[id(tn)] = tn.ondisk_id
395 block_map[id(tn)] = tn.ondisk_id
391 else:
396 else:
392 block_map[id(tn)] = len(chunks) + base_idx
397 block_map[id(tn)] = len(chunks) + base_idx
393 chunks.append(_persist_block(tn, block_map))
398 chunks.append(_persist_block(tn, block_map))
394 return b''.join(chunks)
399 return b''.join(chunks)
395
400
396
401
397 def _walk_trie(block):
402 def _walk_trie(block):
398 """yield all the block in a trie
403 """yield all the block in a trie
399
404
400 Children blocks are always yield before their parent block.
405 Children blocks are always yield before their parent block.
401 """
406 """
402 for (_, item) in sorted(block.items()):
407 for (_, item) in sorted(block.items()):
403 if isinstance(item, dict):
408 if isinstance(item, dict):
404 for sub_block in _walk_trie(item):
409 for sub_block in _walk_trie(item):
405 yield sub_block
410 yield sub_block
406 yield block
411 yield block
407
412
408
413
409 def _persist_block(block_node, block_map):
414 def _persist_block(block_node, block_map):
410 """produce persistent binary data for a single block
415 """produce persistent binary data for a single block
411
416
412 Children block are assumed to be already persisted and present in
417 Children block are assumed to be already persisted and present in
413 block_map.
418 block_map.
414 """
419 """
415 data = tuple(_to_value(v, block_map) for v in block_node)
420 data = tuple(_to_value(v, block_map) for v in block_node)
416 return S_BLOCK.pack(*data)
421 return S_BLOCK.pack(*data)
417
422
418
423
419 def _to_value(item, block_map):
424 def _to_value(item, block_map):
420 """persist any value as an integer"""
425 """persist any value as an integer"""
421 if item is None:
426 if item is None:
422 return NO_ENTRY
427 return NO_ENTRY
423 elif isinstance(item, dict):
428 elif isinstance(item, dict):
424 return block_map[id(item)]
429 return block_map[id(item)]
425 else:
430 else:
426 return _transform_rev(item)
431 return _transform_rev(item)
427
432
428
433
429 def parse_data(data):
434 def parse_data(data):
430 """parse parse nodemap data into a nodemap Trie"""
435 """parse parse nodemap data into a nodemap Trie"""
431 if (len(data) % S_BLOCK.size) != 0:
436 if (len(data) % S_BLOCK.size) != 0:
432 msg = "nodemap data size is not a multiple of block size (%d): %d"
437 msg = "nodemap data size is not a multiple of block size (%d): %d"
433 raise error.Abort(msg % (S_BLOCK.size, len(data)))
438 raise error.Abort(msg % (S_BLOCK.size, len(data)))
434 if not data:
439 if not data:
435 return Block(), None
440 return Block(), None
436 block_map = {}
441 block_map = {}
437 new_blocks = []
442 new_blocks = []
438 for i in range(0, len(data), S_BLOCK.size):
443 for i in range(0, len(data), S_BLOCK.size):
439 block = Block()
444 block = Block()
440 block.ondisk_id = len(block_map)
445 block.ondisk_id = len(block_map)
441 block_map[block.ondisk_id] = block
446 block_map[block.ondisk_id] = block
442 block_data = data[i : i + S_BLOCK.size]
447 block_data = data[i : i + S_BLOCK.size]
443 values = S_BLOCK.unpack(block_data)
448 values = S_BLOCK.unpack(block_data)
444 new_blocks.append((block, values))
449 new_blocks.append((block, values))
445 for b, values in new_blocks:
450 for b, values in new_blocks:
446 for idx, v in enumerate(values):
451 for idx, v in enumerate(values):
447 if v == NO_ENTRY:
452 if v == NO_ENTRY:
448 continue
453 continue
449 elif v >= 0:
454 elif v >= 0:
450 b[idx] = block_map[v]
455 b[idx] = block_map[v]
451 else:
456 else:
452 b[idx] = _transform_rev(v)
457 b[idx] = _transform_rev(v)
453 return block, i // S_BLOCK.size
458 return block, i // S_BLOCK.size
454
459
455
460
456 # debug utility
461 # debug utility
457
462
458
463
459 def check_data(ui, index, data):
464 def check_data(ui, index, data):
460 """verify that the provided nodemap data are valid for the given idex"""
465 """verify that the provided nodemap data are valid for the given idex"""
461 ret = 0
466 ret = 0
462 ui.status((b"revision in index: %d\n") % len(index))
467 ui.status((b"revision in index: %d\n") % len(index))
463 root, __ = parse_data(data)
468 root, __ = parse_data(data)
464 all_revs = set(_all_revisions(root))
469 all_revs = set(_all_revisions(root))
465 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
470 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
466 for r in range(len(index)):
471 for r in range(len(index)):
467 if r not in all_revs:
472 if r not in all_revs:
468 msg = b" revision missing from nodemap: %d\n" % r
473 msg = b" revision missing from nodemap: %d\n" % r
469 ui.write_err(msg)
474 ui.write_err(msg)
470 ret = 1
475 ret = 1
471 else:
476 else:
472 all_revs.remove(r)
477 all_revs.remove(r)
473 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
478 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
474 if nm_rev is None:
479 if nm_rev is None:
475 msg = b" revision node does not match any entries: %d\n" % r
480 msg = b" revision node does not match any entries: %d\n" % r
476 ui.write_err(msg)
481 ui.write_err(msg)
477 ret = 1
482 ret = 1
478 elif nm_rev != r:
483 elif nm_rev != r:
479 msg = (
484 msg = (
480 b" revision node does not match the expected revision: "
485 b" revision node does not match the expected revision: "
481 b"%d != %d\n" % (r, nm_rev)
486 b"%d != %d\n" % (r, nm_rev)
482 )
487 )
483 ui.write_err(msg)
488 ui.write_err(msg)
484 ret = 1
489 ret = 1
485
490
486 if all_revs:
491 if all_revs:
487 for r in sorted(all_revs):
492 for r in sorted(all_revs):
488 msg = b" extra revision in nodemap: %d\n" % r
493 msg = b" extra revision in nodemap: %d\n" % r
489 ui.write_err(msg)
494 ui.write_err(msg)
490 ret = 1
495 ret = 1
491 return ret
496 return ret
492
497
493
498
494 def _all_revisions(root):
499 def _all_revisions(root):
495 """return all revisions stored in a Trie"""
500 """return all revisions stored in a Trie"""
496 for block in _walk_trie(root):
501 for block in _walk_trie(root):
497 for v in block:
502 for v in block:
498 if v is None or isinstance(v, Block):
503 if v is None or isinstance(v, Block):
499 continue
504 continue
500 yield v
505 yield v
501
506
502
507
503 def _find_node(block, node):
508 def _find_node(block, node):
504 """find the revision associated with a given node"""
509 """find the revision associated with a given node"""
505 entry = block.get(_to_int(node[0:1]))
510 entry = block.get(_to_int(node[0:1]))
506 if isinstance(entry, dict):
511 if isinstance(entry, dict):
507 return _find_node(entry, node[1:])
512 return _find_node(entry, node[1:])
508 return entry
513 return entry
General Comments 0
You need to be logged in to leave comments. Login now