##// END OF EJS Templates
nodemap: fix missing r-prefix on regular expression...
Augie Fackler -
r44952:6aee0647 default
parent child Browse files
Show More
@@ -1,554 +1,554 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from .. import (
16 from .. import (
17 error,
17 error,
18 node as nodemod,
18 node as nodemod,
19 util,
19 util,
20 )
20 )
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def persisted_data(revlog):
28 def persisted_data(revlog):
29 """read the nodemap for a revlog from disk"""
29 """read the nodemap for a revlog from disk"""
30 if revlog.nodemap_file is None:
30 if revlog.nodemap_file is None:
31 return None
31 return None
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 if not pdata:
33 if not pdata:
34 return None
34 return None
35 offset = 0
35 offset = 0
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 if version != ONDISK_VERSION:
37 if version != ONDISK_VERSION:
38 return None
38 return None
39 offset += S_VERSION.size
39 offset += S_VERSION.size
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 uid_size, tip_rev, data_length, data_unused = headers
41 uid_size, tip_rev, data_length, data_unused = headers
42 offset += S_HEADER.size
42 offset += S_HEADER.size
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket.tip_rev = tip_rev
44 docket.tip_rev = tip_rev
45 docket.data_length = data_length
45 docket.data_length = data_length
46 docket.data_unused = data_unused
46 docket.data_unused = data_unused
47
47
48 filename = _rawdata_filepath(revlog, docket)
48 filename = _rawdata_filepath(revlog, docket)
49 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
49 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
50 try:
50 try:
51 with revlog.opener(filename) as fd:
51 with revlog.opener(filename) as fd:
52 if use_mmap:
52 if use_mmap:
53 data = util.buffer(util.mmapread(fd, data_length))
53 data = util.buffer(util.mmapread(fd, data_length))
54 else:
54 else:
55 data = fd.read(data_length)
55 data = fd.read(data_length)
56 except OSError as e:
56 except OSError as e:
57 if e.errno != errno.ENOENT:
57 if e.errno != errno.ENOENT:
58 raise
58 raise
59 if len(data) < data_length:
59 if len(data) < data_length:
60 return None
60 return None
61 return docket, data
61 return docket, data
62
62
63
63
64 def setup_persistent_nodemap(tr, revlog):
64 def setup_persistent_nodemap(tr, revlog):
65 """Install whatever is needed transaction side to persist a nodemap on disk
65 """Install whatever is needed transaction side to persist a nodemap on disk
66
66
67 (only actually persist the nodemap if this is relevant for this revlog)
67 (only actually persist the nodemap if this is relevant for this revlog)
68 """
68 """
69 if revlog._inline:
69 if revlog._inline:
70 return # inlined revlog are too small for this to be relevant
70 return # inlined revlog are too small for this to be relevant
71 if revlog.nodemap_file is None:
71 if revlog.nodemap_file is None:
72 return # we do not use persistent_nodemap on this revlog
72 return # we do not use persistent_nodemap on this revlog
73 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
73 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
74 if tr.hasfinalize(callback_id):
74 if tr.hasfinalize(callback_id):
75 return # no need to register again
75 return # no need to register again
76 tr.addfinalize(
76 tr.addfinalize(
77 callback_id, lambda tr: _persist_nodemap(tr.addpostclose, revlog)
77 callback_id, lambda tr: _persist_nodemap(tr.addpostclose, revlog)
78 )
78 )
79
79
80
80
81 def update_persistent_nodemap(revlog):
81 def update_persistent_nodemap(revlog):
82 """update the persistent nodemap right now
82 """update the persistent nodemap right now
83
83
84 To be used for updating the nodemap on disk outside of a normal transaction
84 To be used for updating the nodemap on disk outside of a normal transaction
85 setup (eg, `debugupdatecache`).
85 setup (eg, `debugupdatecache`).
86 """
86 """
87 cleanups = []
87 cleanups = []
88 _persist_nodemap((lambda x, y: cleanups.append(y)), revlog)
88 _persist_nodemap((lambda x, y: cleanups.append(y)), revlog)
89 for c in cleanups:
89 for c in cleanups:
90 c(None)
90 c(None)
91
91
92
92
93 def _persist_nodemap(cleaner, revlog):
93 def _persist_nodemap(cleaner, revlog):
94 """Write nodemap data on disk for a given revlog
94 """Write nodemap data on disk for a given revlog
95 """
95 """
96 if getattr(revlog, 'filteredrevs', ()):
96 if getattr(revlog, 'filteredrevs', ()):
97 raise error.ProgrammingError(
97 raise error.ProgrammingError(
98 "cannot persist nodemap of a filtered changelog"
98 "cannot persist nodemap of a filtered changelog"
99 )
99 )
100 if revlog.nodemap_file is None:
100 if revlog.nodemap_file is None:
101 msg = "calling persist nodemap on a revlog without the feature enableb"
101 msg = "calling persist nodemap on a revlog without the feature enableb"
102 raise error.ProgrammingError(msg)
102 raise error.ProgrammingError(msg)
103
103
104 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
104 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
105 ondisk_docket = revlog._nodemap_docket
105 ondisk_docket = revlog._nodemap_docket
106 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
106 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
107 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
107 use_mmap = revlog.opener.options.get("exp-persistent-nodemap.mmap")
108
108
109 data = None
109 data = None
110 # first attemp an incremental update of the data
110 # first attemp an incremental update of the data
111 if can_incremental and ondisk_docket is not None:
111 if can_incremental and ondisk_docket is not None:
112 target_docket = revlog._nodemap_docket.copy()
112 target_docket = revlog._nodemap_docket.copy()
113 (
113 (
114 src_docket,
114 src_docket,
115 data_changed_count,
115 data_changed_count,
116 data,
116 data,
117 ) = revlog.index.nodemap_data_incremental()
117 ) = revlog.index.nodemap_data_incremental()
118 if src_docket != target_docket:
118 if src_docket != target_docket:
119 data = None
119 data = None
120 else:
120 else:
121 datafile = _rawdata_filepath(revlog, target_docket)
121 datafile = _rawdata_filepath(revlog, target_docket)
122 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
122 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
123 # store vfs
123 # store vfs
124 new_length = target_docket.data_length + len(data)
124 new_length = target_docket.data_length + len(data)
125 with revlog.opener(datafile, b'r+') as fd:
125 with revlog.opener(datafile, b'r+') as fd:
126 fd.seek(target_docket.data_length)
126 fd.seek(target_docket.data_length)
127 fd.write(data)
127 fd.write(data)
128 if feed_data:
128 if feed_data:
129 if use_mmap:
129 if use_mmap:
130 fd.seek(0)
130 fd.seek(0)
131 new_data = fd.read(new_length)
131 new_data = fd.read(new_length)
132 else:
132 else:
133 fd.flush()
133 fd.flush()
134 new_data = util.buffer(util.mmapread(fd, new_length))
134 new_data = util.buffer(util.mmapread(fd, new_length))
135 target_docket.data_length = new_length
135 target_docket.data_length = new_length
136 target_docket.data_unused += data_changed_count
136 target_docket.data_unused += data_changed_count
137
137
138 if data is None:
138 if data is None:
139 # otherwise fallback to a full new export
139 # otherwise fallback to a full new export
140 target_docket = NodeMapDocket()
140 target_docket = NodeMapDocket()
141 datafile = _rawdata_filepath(revlog, target_docket)
141 datafile = _rawdata_filepath(revlog, target_docket)
142 if util.safehasattr(revlog.index, "nodemap_data_all"):
142 if util.safehasattr(revlog.index, "nodemap_data_all"):
143 data = revlog.index.nodemap_data_all()
143 data = revlog.index.nodemap_data_all()
144 else:
144 else:
145 data = persistent_data(revlog.index)
145 data = persistent_data(revlog.index)
146 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
146 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
147 # store vfs
147 # store vfs
148 with revlog.opener(datafile, b'w+') as fd:
148 with revlog.opener(datafile, b'w+') as fd:
149 fd.write(data)
149 fd.write(data)
150 if feed_data:
150 if feed_data:
151 if use_mmap:
151 if use_mmap:
152 new_data = data
152 new_data = data
153 else:
153 else:
154 fd.flush()
154 fd.flush()
155 new_data = util.buffer(util.mmapread(fd, len(data)))
155 new_data = util.buffer(util.mmapread(fd, len(data)))
156 target_docket.data_length = len(data)
156 target_docket.data_length = len(data)
157 target_docket.tip_rev = revlog.tiprev()
157 target_docket.tip_rev = revlog.tiprev()
158 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
158 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
159 # store vfs
159 # store vfs
160 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
160 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
161 fp.write(target_docket.serialize())
161 fp.write(target_docket.serialize())
162 revlog._nodemap_docket = target_docket
162 revlog._nodemap_docket = target_docket
163 if feed_data:
163 if feed_data:
164 revlog.index.update_nodemap_data(target_docket, new_data)
164 revlog.index.update_nodemap_data(target_docket, new_data)
165
165
166 # EXP-TODO: if the transaction abort, we should remove the new data and
166 # EXP-TODO: if the transaction abort, we should remove the new data and
167 # reinstall the old one.
167 # reinstall the old one.
168
168
169 # search for old index file in all cases, some older process might have
169 # search for old index file in all cases, some older process might have
170 # left one behind.
170 # left one behind.
171 olds = _other_rawdata_filepath(revlog, target_docket)
171 olds = _other_rawdata_filepath(revlog, target_docket)
172 if olds:
172 if olds:
173 realvfs = getattr(revlog, '_realopener', revlog.opener)
173 realvfs = getattr(revlog, '_realopener', revlog.opener)
174
174
175 def cleanup(tr):
175 def cleanup(tr):
176 for oldfile in olds:
176 for oldfile in olds:
177 realvfs.tryunlink(oldfile)
177 realvfs.tryunlink(oldfile)
178
178
179 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
179 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
180 cleaner(callback_id, cleanup)
180 cleaner(callback_id, cleanup)
181
181
182
182
183 ### Nodemap docket file
183 ### Nodemap docket file
184 #
184 #
185 # The nodemap data are stored on disk using 2 files:
185 # The nodemap data are stored on disk using 2 files:
186 #
186 #
187 # * a raw data files containing a persistent nodemap
187 # * a raw data files containing a persistent nodemap
188 # (see `Nodemap Trie` section)
188 # (see `Nodemap Trie` section)
189 #
189 #
190 # * a small "docket" file containing medatadata
190 # * a small "docket" file containing medatadata
191 #
191 #
192 # While the nodemap data can be multiple tens of megabytes, the "docket" is
192 # While the nodemap data can be multiple tens of megabytes, the "docket" is
193 # small, it is easy to update it automatically or to duplicated its content
193 # small, it is easy to update it automatically or to duplicated its content
194 # during a transaction.
194 # during a transaction.
195 #
195 #
196 # Multiple raw data can exist at the same time (The currently valid one and a
196 # Multiple raw data can exist at the same time (The currently valid one and a
197 # new one beind used by an in progress transaction). To accomodate this, the
197 # new one beind used by an in progress transaction). To accomodate this, the
198 # filename hosting the raw data has a variable parts. The exact filename is
198 # filename hosting the raw data has a variable parts. The exact filename is
199 # specified inside the "docket" file.
199 # specified inside the "docket" file.
200 #
200 #
201 # The docket file contains information to find, qualify and validate the raw
201 # The docket file contains information to find, qualify and validate the raw
202 # data. Its content is currently very light, but it will expand as the on disk
202 # data. Its content is currently very light, but it will expand as the on disk
203 # nodemap gains the necessary features to be used in production.
203 # nodemap gains the necessary features to be used in production.
204
204
205 # version 0 is experimental, no BC garantee, do no use outside of tests.
205 # version 0 is experimental, no BC garantee, do no use outside of tests.
206 ONDISK_VERSION = 0
206 ONDISK_VERSION = 0
207 S_VERSION = struct.Struct(">B")
207 S_VERSION = struct.Struct(">B")
208 S_HEADER = struct.Struct(">BQQQ")
208 S_HEADER = struct.Struct(">BQQQ")
209
209
210 ID_SIZE = 8
210 ID_SIZE = 8
211
211
212
212
213 def _make_uid():
213 def _make_uid():
214 """return a new unique identifier.
214 """return a new unique identifier.
215
215
216 The identifier is random and composed of ascii characters."""
216 The identifier is random and composed of ascii characters."""
217 return nodemod.hex(os.urandom(ID_SIZE))
217 return nodemod.hex(os.urandom(ID_SIZE))
218
218
219
219
220 class NodeMapDocket(object):
220 class NodeMapDocket(object):
221 """metadata associated with persistent nodemap data
221 """metadata associated with persistent nodemap data
222
222
223 The persistent data may come from disk or be on their way to disk.
223 The persistent data may come from disk or be on their way to disk.
224 """
224 """
225
225
226 def __init__(self, uid=None):
226 def __init__(self, uid=None):
227 if uid is None:
227 if uid is None:
228 uid = _make_uid()
228 uid = _make_uid()
229 self.uid = uid
229 self.uid = uid
230 self.tip_rev = None
230 self.tip_rev = None
231 self.data_length = None
231 self.data_length = None
232 self.data_unused = 0
232 self.data_unused = 0
233
233
234 def copy(self):
234 def copy(self):
235 new = NodeMapDocket(uid=self.uid)
235 new = NodeMapDocket(uid=self.uid)
236 new.tip_rev = self.tip_rev
236 new.tip_rev = self.tip_rev
237 new.data_length = self.data_length
237 new.data_length = self.data_length
238 new.data_unused = self.data_unused
238 new.data_unused = self.data_unused
239 return new
239 return new
240
240
241 def __cmp__(self, other):
241 def __cmp__(self, other):
242 if self.uid < other.uid:
242 if self.uid < other.uid:
243 return -1
243 return -1
244 if self.uid > other.uid:
244 if self.uid > other.uid:
245 return 1
245 return 1
246 elif self.data_length < other.data_length:
246 elif self.data_length < other.data_length:
247 return -1
247 return -1
248 elif self.data_length > other.data_length:
248 elif self.data_length > other.data_length:
249 return 1
249 return 1
250 return 0
250 return 0
251
251
252 def __eq__(self, other):
252 def __eq__(self, other):
253 return self.uid == other.uid and self.data_length == other.data_length
253 return self.uid == other.uid and self.data_length == other.data_length
254
254
255 def serialize(self):
255 def serialize(self):
256 """return serialized bytes for a docket using the passed uid"""
256 """return serialized bytes for a docket using the passed uid"""
257 data = []
257 data = []
258 data.append(S_VERSION.pack(ONDISK_VERSION))
258 data.append(S_VERSION.pack(ONDISK_VERSION))
259 headers = (
259 headers = (
260 len(self.uid),
260 len(self.uid),
261 self.tip_rev,
261 self.tip_rev,
262 self.data_length,
262 self.data_length,
263 self.data_unused,
263 self.data_unused,
264 )
264 )
265 data.append(S_HEADER.pack(*headers))
265 data.append(S_HEADER.pack(*headers))
266 data.append(self.uid)
266 data.append(self.uid)
267 return b''.join(data)
267 return b''.join(data)
268
268
269
269
270 def _rawdata_filepath(revlog, docket):
270 def _rawdata_filepath(revlog, docket):
271 """The (vfs relative) nodemap's rawdata file for a given uid"""
271 """The (vfs relative) nodemap's rawdata file for a given uid"""
272 prefix = revlog.nodemap_file[:-2]
272 prefix = revlog.nodemap_file[:-2]
273 return b"%s-%s.nd" % (prefix, docket.uid)
273 return b"%s-%s.nd" % (prefix, docket.uid)
274
274
275
275
276 def _other_rawdata_filepath(revlog, docket):
276 def _other_rawdata_filepath(revlog, docket):
277 prefix = revlog.nodemap_file[:-2]
277 prefix = revlog.nodemap_file[:-2]
278 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
278 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
279 new_file_path = _rawdata_filepath(revlog, docket)
279 new_file_path = _rawdata_filepath(revlog, docket)
280 new_file_name = revlog.opener.basename(new_file_path)
280 new_file_name = revlog.opener.basename(new_file_path)
281 dirpath = revlog.opener.dirname(new_file_path)
281 dirpath = revlog.opener.dirname(new_file_path)
282 others = []
282 others = []
283 for f in revlog.opener.listdir(dirpath):
283 for f in revlog.opener.listdir(dirpath):
284 if pattern.match(f) and f != new_file_name:
284 if pattern.match(f) and f != new_file_name:
285 others.append(f)
285 others.append(f)
286 return others
286 return others
287
287
288
288
289 ### Nodemap Trie
289 ### Nodemap Trie
290 #
290 #
291 # This is a simple reference implementation to compute and persist a nodemap
291 # This is a simple reference implementation to compute and persist a nodemap
292 # trie. This reference implementation is write only. The python version of this
292 # trie. This reference implementation is write only. The python version of this
293 # is not expected to be actually used, since it wont provide performance
293 # is not expected to be actually used, since it wont provide performance
294 # improvement over existing non-persistent C implementation.
294 # improvement over existing non-persistent C implementation.
295 #
295 #
296 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
296 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
297 # revision can be adressed using its node shortest prefix.
297 # revision can be adressed using its node shortest prefix.
298 #
298 #
299 # The trie is stored as a sequence of block. Each block contains 16 entries
299 # The trie is stored as a sequence of block. Each block contains 16 entries
300 # (signed 64bit integer, big endian). Each entry can be one of the following:
300 # (signed 64bit integer, big endian). Each entry can be one of the following:
301 #
301 #
302 # * value >= 0 -> index of sub-block
302 # * value >= 0 -> index of sub-block
303 # * value == -1 -> no value
303 # * value == -1 -> no value
304 # * value < -1 -> a revision value: rev = -(value+10)
304 # * value < -1 -> a revision value: rev = -(value+10)
305 #
305 #
306 # The implementation focus on simplicity, not on performance. A Rust
306 # The implementation focus on simplicity, not on performance. A Rust
307 # implementation should provide a efficient version of the same binary
307 # implementation should provide a efficient version of the same binary
308 # persistence. This reference python implementation is never meant to be
308 # persistence. This reference python implementation is never meant to be
309 # extensively use in production.
309 # extensively use in production.
310
310
311
311
312 def persistent_data(index):
312 def persistent_data(index):
313 """return the persistent binary form for a nodemap for a given index
313 """return the persistent binary form for a nodemap for a given index
314 """
314 """
315 trie = _build_trie(index)
315 trie = _build_trie(index)
316 return _persist_trie(trie)
316 return _persist_trie(trie)
317
317
318
318
319 def update_persistent_data(index, root, max_idx, last_rev):
319 def update_persistent_data(index, root, max_idx, last_rev):
320 """return the incremental update for persistent nodemap from a given index
320 """return the incremental update for persistent nodemap from a given index
321 """
321 """
322 changed_block, trie = _update_trie(index, root, last_rev)
322 changed_block, trie = _update_trie(index, root, last_rev)
323 return (
323 return (
324 changed_block * S_BLOCK.size,
324 changed_block * S_BLOCK.size,
325 _persist_trie(trie, existing_idx=max_idx),
325 _persist_trie(trie, existing_idx=max_idx),
326 )
326 )
327
327
328
328
329 S_BLOCK = struct.Struct(">" + ("l" * 16))
329 S_BLOCK = struct.Struct(">" + ("l" * 16))
330
330
331 NO_ENTRY = -1
331 NO_ENTRY = -1
332 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
332 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
333 REV_OFFSET = 2
333 REV_OFFSET = 2
334
334
335
335
336 def _transform_rev(rev):
336 def _transform_rev(rev):
337 """Return the number used to represent the rev in the tree.
337 """Return the number used to represent the rev in the tree.
338
338
339 (or retrieve a rev number from such representation)
339 (or retrieve a rev number from such representation)
340
340
341 Note that this is an involution, a function equal to its inverse (i.e.
341 Note that this is an involution, a function equal to its inverse (i.e.
342 which gives the identity when applied to itself).
342 which gives the identity when applied to itself).
343 """
343 """
344 return -(rev + REV_OFFSET)
344 return -(rev + REV_OFFSET)
345
345
346
346
347 def _to_int(hex_digit):
347 def _to_int(hex_digit):
348 """turn an hexadecimal digit into a proper integer"""
348 """turn an hexadecimal digit into a proper integer"""
349 return int(hex_digit, 16)
349 return int(hex_digit, 16)
350
350
351
351
352 class Block(dict):
352 class Block(dict):
353 """represent a block of the Trie
353 """represent a block of the Trie
354
354
355 contains up to 16 entry indexed from 0 to 15"""
355 contains up to 16 entry indexed from 0 to 15"""
356
356
357 def __init__(self):
357 def __init__(self):
358 super(Block, self).__init__()
358 super(Block, self).__init__()
359 # If this block exist on disk, here is its ID
359 # If this block exist on disk, here is its ID
360 self.ondisk_id = None
360 self.ondisk_id = None
361
361
362 def __iter__(self):
362 def __iter__(self):
363 return iter(self.get(i) for i in range(16))
363 return iter(self.get(i) for i in range(16))
364
364
365
365
366 def _build_trie(index):
366 def _build_trie(index):
367 """build a nodemap trie
367 """build a nodemap trie
368
368
369 The nodemap stores revision number for each unique prefix.
369 The nodemap stores revision number for each unique prefix.
370
370
371 Each block is a dictionary with keys in `[0, 15]`. Values are either
371 Each block is a dictionary with keys in `[0, 15]`. Values are either
372 another block or a revision number.
372 another block or a revision number.
373 """
373 """
374 root = Block()
374 root = Block()
375 for rev in range(len(index)):
375 for rev in range(len(index)):
376 hex = nodemod.hex(index[rev][7])
376 hex = nodemod.hex(index[rev][7])
377 _insert_into_block(index, 0, root, rev, hex)
377 _insert_into_block(index, 0, root, rev, hex)
378 return root
378 return root
379
379
380
380
381 def _update_trie(index, root, last_rev):
381 def _update_trie(index, root, last_rev):
382 """consume"""
382 """consume"""
383 changed = 0
383 changed = 0
384 for rev in range(last_rev + 1, len(index)):
384 for rev in range(last_rev + 1, len(index)):
385 hex = nodemod.hex(index[rev][7])
385 hex = nodemod.hex(index[rev][7])
386 changed += _insert_into_block(index, 0, root, rev, hex)
386 changed += _insert_into_block(index, 0, root, rev, hex)
387 return changed, root
387 return changed, root
388
388
389
389
390 def _insert_into_block(index, level, block, current_rev, current_hex):
390 def _insert_into_block(index, level, block, current_rev, current_hex):
391 """insert a new revision in a block
391 """insert a new revision in a block
392
392
393 index: the index we are adding revision for
393 index: the index we are adding revision for
394 level: the depth of the current block in the trie
394 level: the depth of the current block in the trie
395 block: the block currently being considered
395 block: the block currently being considered
396 current_rev: the revision number we are adding
396 current_rev: the revision number we are adding
397 current_hex: the hexadecimal representation of the of that revision
397 current_hex: the hexadecimal representation of the of that revision
398 """
398 """
399 changed = 1
399 changed = 1
400 if block.ondisk_id is not None:
400 if block.ondisk_id is not None:
401 block.ondisk_id = None
401 block.ondisk_id = None
402 hex_digit = _to_int(current_hex[level : level + 1])
402 hex_digit = _to_int(current_hex[level : level + 1])
403 entry = block.get(hex_digit)
403 entry = block.get(hex_digit)
404 if entry is None:
404 if entry is None:
405 # no entry, simply store the revision number
405 # no entry, simply store the revision number
406 block[hex_digit] = current_rev
406 block[hex_digit] = current_rev
407 elif isinstance(entry, dict):
407 elif isinstance(entry, dict):
408 # need to recurse to an underlying block
408 # need to recurse to an underlying block
409 changed += _insert_into_block(
409 changed += _insert_into_block(
410 index, level + 1, entry, current_rev, current_hex
410 index, level + 1, entry, current_rev, current_hex
411 )
411 )
412 else:
412 else:
413 # collision with a previously unique prefix, inserting new
413 # collision with a previously unique prefix, inserting new
414 # vertices to fit both entry.
414 # vertices to fit both entry.
415 other_hex = nodemod.hex(index[entry][7])
415 other_hex = nodemod.hex(index[entry][7])
416 other_rev = entry
416 other_rev = entry
417 new = Block()
417 new = Block()
418 block[hex_digit] = new
418 block[hex_digit] = new
419 _insert_into_block(index, level + 1, new, other_rev, other_hex)
419 _insert_into_block(index, level + 1, new, other_rev, other_hex)
420 _insert_into_block(index, level + 1, new, current_rev, current_hex)
420 _insert_into_block(index, level + 1, new, current_rev, current_hex)
421 return changed
421 return changed
422
422
423
423
424 def _persist_trie(root, existing_idx=None):
424 def _persist_trie(root, existing_idx=None):
425 """turn a nodemap trie into persistent binary data
425 """turn a nodemap trie into persistent binary data
426
426
427 See `_build_trie` for nodemap trie structure"""
427 See `_build_trie` for nodemap trie structure"""
428 block_map = {}
428 block_map = {}
429 if existing_idx is not None:
429 if existing_idx is not None:
430 base_idx = existing_idx + 1
430 base_idx = existing_idx + 1
431 else:
431 else:
432 base_idx = 0
432 base_idx = 0
433 chunks = []
433 chunks = []
434 for tn in _walk_trie(root):
434 for tn in _walk_trie(root):
435 if tn.ondisk_id is not None:
435 if tn.ondisk_id is not None:
436 block_map[id(tn)] = tn.ondisk_id
436 block_map[id(tn)] = tn.ondisk_id
437 else:
437 else:
438 block_map[id(tn)] = len(chunks) + base_idx
438 block_map[id(tn)] = len(chunks) + base_idx
439 chunks.append(_persist_block(tn, block_map))
439 chunks.append(_persist_block(tn, block_map))
440 return b''.join(chunks)
440 return b''.join(chunks)
441
441
442
442
443 def _walk_trie(block):
443 def _walk_trie(block):
444 """yield all the block in a trie
444 """yield all the block in a trie
445
445
446 Children blocks are always yield before their parent block.
446 Children blocks are always yield before their parent block.
447 """
447 """
448 for (_, item) in sorted(block.items()):
448 for (_, item) in sorted(block.items()):
449 if isinstance(item, dict):
449 if isinstance(item, dict):
450 for sub_block in _walk_trie(item):
450 for sub_block in _walk_trie(item):
451 yield sub_block
451 yield sub_block
452 yield block
452 yield block
453
453
454
454
455 def _persist_block(block_node, block_map):
455 def _persist_block(block_node, block_map):
456 """produce persistent binary data for a single block
456 """produce persistent binary data for a single block
457
457
458 Children block are assumed to be already persisted and present in
458 Children block are assumed to be already persisted and present in
459 block_map.
459 block_map.
460 """
460 """
461 data = tuple(_to_value(v, block_map) for v in block_node)
461 data = tuple(_to_value(v, block_map) for v in block_node)
462 return S_BLOCK.pack(*data)
462 return S_BLOCK.pack(*data)
463
463
464
464
465 def _to_value(item, block_map):
465 def _to_value(item, block_map):
466 """persist any value as an integer"""
466 """persist any value as an integer"""
467 if item is None:
467 if item is None:
468 return NO_ENTRY
468 return NO_ENTRY
469 elif isinstance(item, dict):
469 elif isinstance(item, dict):
470 return block_map[id(item)]
470 return block_map[id(item)]
471 else:
471 else:
472 return _transform_rev(item)
472 return _transform_rev(item)
473
473
474
474
475 def parse_data(data):
475 def parse_data(data):
476 """parse parse nodemap data into a nodemap Trie"""
476 """parse parse nodemap data into a nodemap Trie"""
477 if (len(data) % S_BLOCK.size) != 0:
477 if (len(data) % S_BLOCK.size) != 0:
478 msg = "nodemap data size is not a multiple of block size (%d): %d"
478 msg = "nodemap data size is not a multiple of block size (%d): %d"
479 raise error.Abort(msg % (S_BLOCK.size, len(data)))
479 raise error.Abort(msg % (S_BLOCK.size, len(data)))
480 if not data:
480 if not data:
481 return Block(), None
481 return Block(), None
482 block_map = {}
482 block_map = {}
483 new_blocks = []
483 new_blocks = []
484 for i in range(0, len(data), S_BLOCK.size):
484 for i in range(0, len(data), S_BLOCK.size):
485 block = Block()
485 block = Block()
486 block.ondisk_id = len(block_map)
486 block.ondisk_id = len(block_map)
487 block_map[block.ondisk_id] = block
487 block_map[block.ondisk_id] = block
488 block_data = data[i : i + S_BLOCK.size]
488 block_data = data[i : i + S_BLOCK.size]
489 values = S_BLOCK.unpack(block_data)
489 values = S_BLOCK.unpack(block_data)
490 new_blocks.append((block, values))
490 new_blocks.append((block, values))
491 for b, values in new_blocks:
491 for b, values in new_blocks:
492 for idx, v in enumerate(values):
492 for idx, v in enumerate(values):
493 if v == NO_ENTRY:
493 if v == NO_ENTRY:
494 continue
494 continue
495 elif v >= 0:
495 elif v >= 0:
496 b[idx] = block_map[v]
496 b[idx] = block_map[v]
497 else:
497 else:
498 b[idx] = _transform_rev(v)
498 b[idx] = _transform_rev(v)
499 return block, i // S_BLOCK.size
499 return block, i // S_BLOCK.size
500
500
501
501
502 # debug utility
502 # debug utility
503
503
504
504
505 def check_data(ui, index, data):
505 def check_data(ui, index, data):
506 """verify that the provided nodemap data are valid for the given idex"""
506 """verify that the provided nodemap data are valid for the given idex"""
507 ret = 0
507 ret = 0
508 ui.status((b"revision in index: %d\n") % len(index))
508 ui.status((b"revision in index: %d\n") % len(index))
509 root, __ = parse_data(data)
509 root, __ = parse_data(data)
510 all_revs = set(_all_revisions(root))
510 all_revs = set(_all_revisions(root))
511 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
511 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
512 for r in range(len(index)):
512 for r in range(len(index)):
513 if r not in all_revs:
513 if r not in all_revs:
514 msg = b" revision missing from nodemap: %d\n" % r
514 msg = b" revision missing from nodemap: %d\n" % r
515 ui.write_err(msg)
515 ui.write_err(msg)
516 ret = 1
516 ret = 1
517 else:
517 else:
518 all_revs.remove(r)
518 all_revs.remove(r)
519 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
519 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
520 if nm_rev is None:
520 if nm_rev is None:
521 msg = b" revision node does not match any entries: %d\n" % r
521 msg = b" revision node does not match any entries: %d\n" % r
522 ui.write_err(msg)
522 ui.write_err(msg)
523 ret = 1
523 ret = 1
524 elif nm_rev != r:
524 elif nm_rev != r:
525 msg = (
525 msg = (
526 b" revision node does not match the expected revision: "
526 b" revision node does not match the expected revision: "
527 b"%d != %d\n" % (r, nm_rev)
527 b"%d != %d\n" % (r, nm_rev)
528 )
528 )
529 ui.write_err(msg)
529 ui.write_err(msg)
530 ret = 1
530 ret = 1
531
531
532 if all_revs:
532 if all_revs:
533 for r in sorted(all_revs):
533 for r in sorted(all_revs):
534 msg = b" extra revision in nodemap: %d\n" % r
534 msg = b" extra revision in nodemap: %d\n" % r
535 ui.write_err(msg)
535 ui.write_err(msg)
536 ret = 1
536 ret = 1
537 return ret
537 return ret
538
538
539
539
540 def _all_revisions(root):
540 def _all_revisions(root):
541 """return all revisions stored in a Trie"""
541 """return all revisions stored in a Trie"""
542 for block in _walk_trie(root):
542 for block in _walk_trie(root):
543 for v in block:
543 for v in block:
544 if v is None or isinstance(v, Block):
544 if v is None or isinstance(v, Block):
545 continue
545 continue
546 yield v
546 yield v
547
547
548
548
549 def _find_node(block, node):
549 def _find_node(block, node):
550 """find the revision associated with a given node"""
550 """find the revision associated with a given node"""
551 entry = block.get(_to_int(node[0:1]))
551 entry = block.get(_to_int(node[0:1]))
552 if isinstance(entry, dict):
552 if isinstance(entry, dict):
553 return _find_node(entry, node[1:])
553 return _find_node(entry, node[1:])
554 return entry
554 return entry
General Comments 0
You need to be logged in to leave comments. Login now