##// END OF EJS Templates
nodemap: skip persistent nodemap warming for revlog not using it...
marmoute -
r45247:b4537125 stable
parent child Browse files
Show More
@@ -1,628 +1,633
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from .. import (
16 from .. import (
17 error,
17 error,
18 node as nodemod,
18 node as nodemod,
19 util,
19 util,
20 )
20 )
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def persisted_data(revlog):
28 def persisted_data(revlog):
29 """read the nodemap for a revlog from disk"""
29 """read the nodemap for a revlog from disk"""
30 if revlog.nodemap_file is None:
30 if revlog.nodemap_file is None:
31 return None
31 return None
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 if not pdata:
33 if not pdata:
34 return None
34 return None
35 offset = 0
35 offset = 0
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 if version != ONDISK_VERSION:
37 if version != ONDISK_VERSION:
38 return None
38 return None
39 offset += S_VERSION.size
39 offset += S_VERSION.size
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
41 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 offset += S_HEADER.size
42 offset += S_HEADER.size
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
43 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 offset += uid_size
44 offset += uid_size
45 docket.tip_rev = tip_rev
45 docket.tip_rev = tip_rev
46 docket.tip_node = pdata[offset : offset + tip_node_size]
46 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.data_length = data_length
47 docket.data_length = data_length
48 docket.data_unused = data_unused
48 docket.data_unused = data_unused
49
49
50 filename = _rawdata_filepath(revlog, docket)
50 filename = _rawdata_filepath(revlog, docket)
51 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
51 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
52 try:
52 try:
53 with revlog.opener(filename) as fd:
53 with revlog.opener(filename) as fd:
54 if use_mmap:
54 if use_mmap:
55 data = util.buffer(util.mmapread(fd, data_length))
55 data = util.buffer(util.mmapread(fd, data_length))
56 else:
56 else:
57 data = fd.read(data_length)
57 data = fd.read(data_length)
58 except OSError as e:
58 except OSError as e:
59 if e.errno != errno.ENOENT:
59 if e.errno != errno.ENOENT:
60 raise
60 raise
61 if len(data) < data_length:
61 if len(data) < data_length:
62 return None
62 return None
63 return docket, data
63 return docket, data
64
64
65
65
66 def setup_persistent_nodemap(tr, revlog):
66 def setup_persistent_nodemap(tr, revlog):
67 """Install whatever is needed transaction side to persist a nodemap on disk
67 """Install whatever is needed transaction side to persist a nodemap on disk
68
68
69 (only actually persist the nodemap if this is relevant for this revlog)
69 (only actually persist the nodemap if this is relevant for this revlog)
70 """
70 """
71 if revlog._inline:
71 if revlog._inline:
72 return # inlined revlog are too small for this to be relevant
72 return # inlined revlog are too small for this to be relevant
73 if revlog.nodemap_file is None:
73 if revlog.nodemap_file is None:
74 return # we do not use persistent_nodemap on this revlog
74 return # we do not use persistent_nodemap on this revlog
75
75
76 # we need to happen after the changelog finalization, in that use "cl-"
76 # we need to happen after the changelog finalization, in that use "cl-"
77 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
77 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
78 if tr.hasfinalize(callback_id):
78 if tr.hasfinalize(callback_id):
79 return # no need to register again
79 return # no need to register again
80 tr.addpending(
80 tr.addpending(
81 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
81 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
82 )
82 )
83 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
83 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
84
84
85
85
86 class _NoTransaction(object):
86 class _NoTransaction(object):
87 """transaction like object to update the nodemap outside a transaction
87 """transaction like object to update the nodemap outside a transaction
88 """
88 """
89
89
90 def __init__(self):
90 def __init__(self):
91 self._postclose = {}
91 self._postclose = {}
92
92
93 def addpostclose(self, callback_id, callback_func):
93 def addpostclose(self, callback_id, callback_func):
94 self._postclose[callback_id] = callback_func
94 self._postclose[callback_id] = callback_func
95
95
96 def registertmp(self, *args, **kwargs):
96 def registertmp(self, *args, **kwargs):
97 pass
97 pass
98
98
99 def addbackup(self, *args, **kwargs):
99 def addbackup(self, *args, **kwargs):
100 pass
100 pass
101
101
102 def add(self, *args, **kwargs):
102 def add(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def addabort(self, *args, **kwargs):
105 def addabort(self, *args, **kwargs):
106 pass
106 pass
107
107
108
108
109 def update_persistent_nodemap(revlog):
109 def update_persistent_nodemap(revlog):
110 """update the persistent nodemap right now
110 """update the persistent nodemap right now
111
111
112 To be used for updating the nodemap on disk outside of a normal transaction
112 To be used for updating the nodemap on disk outside of a normal transaction
113 setup (eg, `debugupdatecache`).
113 setup (eg, `debugupdatecache`).
114 """
114 """
115 if revlog._inline:
116 return # inlined revlog are too small for this to be relevant
117 if revlog.nodemap_file is None:
118 return # we do not use persistent_nodemap on this revlog
119
115 notr = _NoTransaction()
120 notr = _NoTransaction()
116 _persist_nodemap(notr, revlog)
121 _persist_nodemap(notr, revlog)
117 for k in sorted(notr._postclose):
122 for k in sorted(notr._postclose):
118 notr._postclose[k](None)
123 notr._postclose[k](None)
119
124
120
125
121 def _persist_nodemap(tr, revlog, pending=False):
126 def _persist_nodemap(tr, revlog, pending=False):
122 """Write nodemap data on disk for a given revlog
127 """Write nodemap data on disk for a given revlog
123 """
128 """
124 if getattr(revlog, 'filteredrevs', ()):
129 if getattr(revlog, 'filteredrevs', ()):
125 raise error.ProgrammingError(
130 raise error.ProgrammingError(
126 "cannot persist nodemap of a filtered changelog"
131 "cannot persist nodemap of a filtered changelog"
127 )
132 )
128 if revlog.nodemap_file is None:
133 if revlog.nodemap_file is None:
129 msg = "calling persist nodemap on a revlog without the feature enableb"
134 msg = "calling persist nodemap on a revlog without the feature enableb"
130 raise error.ProgrammingError(msg)
135 raise error.ProgrammingError(msg)
131
136
132 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
137 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
133 ondisk_docket = revlog._nodemap_docket
138 ondisk_docket = revlog._nodemap_docket
134 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
139 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
135 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
140 use_mmap = revlog.opener.options.get(b"exp-persistent-nodemap.mmap")
136
141
137 data = None
142 data = None
138 # first attemp an incremental update of the data
143 # first attemp an incremental update of the data
139 if can_incremental and ondisk_docket is not None:
144 if can_incremental and ondisk_docket is not None:
140 target_docket = revlog._nodemap_docket.copy()
145 target_docket = revlog._nodemap_docket.copy()
141 (
146 (
142 src_docket,
147 src_docket,
143 data_changed_count,
148 data_changed_count,
144 data,
149 data,
145 ) = revlog.index.nodemap_data_incremental()
150 ) = revlog.index.nodemap_data_incremental()
146 new_length = target_docket.data_length + len(data)
151 new_length = target_docket.data_length + len(data)
147 new_unused = target_docket.data_unused + data_changed_count
152 new_unused = target_docket.data_unused + data_changed_count
148 if src_docket != target_docket:
153 if src_docket != target_docket:
149 data = None
154 data = None
150 elif new_length <= (new_unused * 10): # under 10% of unused data
155 elif new_length <= (new_unused * 10): # under 10% of unused data
151 data = None
156 data = None
152 else:
157 else:
153 datafile = _rawdata_filepath(revlog, target_docket)
158 datafile = _rawdata_filepath(revlog, target_docket)
154 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
159 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
155 # store vfs
160 # store vfs
156 tr.add(datafile, target_docket.data_length)
161 tr.add(datafile, target_docket.data_length)
157 with revlog.opener(datafile, b'r+') as fd:
162 with revlog.opener(datafile, b'r+') as fd:
158 fd.seek(target_docket.data_length)
163 fd.seek(target_docket.data_length)
159 fd.write(data)
164 fd.write(data)
160 if feed_data:
165 if feed_data:
161 if use_mmap:
166 if use_mmap:
162 fd.seek(0)
167 fd.seek(0)
163 new_data = fd.read(new_length)
168 new_data = fd.read(new_length)
164 else:
169 else:
165 fd.flush()
170 fd.flush()
166 new_data = util.buffer(util.mmapread(fd, new_length))
171 new_data = util.buffer(util.mmapread(fd, new_length))
167 target_docket.data_length = new_length
172 target_docket.data_length = new_length
168 target_docket.data_unused = new_unused
173 target_docket.data_unused = new_unused
169
174
170 if data is None:
175 if data is None:
171 # otherwise fallback to a full new export
176 # otherwise fallback to a full new export
172 target_docket = NodeMapDocket()
177 target_docket = NodeMapDocket()
173 datafile = _rawdata_filepath(revlog, target_docket)
178 datafile = _rawdata_filepath(revlog, target_docket)
174 if util.safehasattr(revlog.index, "nodemap_data_all"):
179 if util.safehasattr(revlog.index, "nodemap_data_all"):
175 data = revlog.index.nodemap_data_all()
180 data = revlog.index.nodemap_data_all()
176 else:
181 else:
177 data = persistent_data(revlog.index)
182 data = persistent_data(revlog.index)
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
183 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
179 # store vfs
184 # store vfs
180
185
181 tryunlink = revlog.opener.tryunlink
186 tryunlink = revlog.opener.tryunlink
182
187
183 def abortck(tr):
188 def abortck(tr):
184 tryunlink(datafile)
189 tryunlink(datafile)
185
190
186 callback_id = b"delete-%s" % datafile
191 callback_id = b"delete-%s" % datafile
187
192
188 # some flavor of the transaction abort does not cleanup new file, it
193 # some flavor of the transaction abort does not cleanup new file, it
189 # simply empty them.
194 # simply empty them.
190 tr.addabort(callback_id, abortck)
195 tr.addabort(callback_id, abortck)
191 with revlog.opener(datafile, b'w+') as fd:
196 with revlog.opener(datafile, b'w+') as fd:
192 fd.write(data)
197 fd.write(data)
193 if feed_data:
198 if feed_data:
194 if use_mmap:
199 if use_mmap:
195 new_data = data
200 new_data = data
196 else:
201 else:
197 fd.flush()
202 fd.flush()
198 new_data = util.buffer(util.mmapread(fd, len(data)))
203 new_data = util.buffer(util.mmapread(fd, len(data)))
199 target_docket.data_length = len(data)
204 target_docket.data_length = len(data)
200 target_docket.tip_rev = revlog.tiprev()
205 target_docket.tip_rev = revlog.tiprev()
201 target_docket.tip_node = revlog.node(target_docket.tip_rev)
206 target_docket.tip_node = revlog.node(target_docket.tip_rev)
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
207 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
203 # store vfs
208 # store vfs
204 file_path = revlog.nodemap_file
209 file_path = revlog.nodemap_file
205 if pending:
210 if pending:
206 file_path += b'.a'
211 file_path += b'.a'
207 tr.registertmp(file_path)
212 tr.registertmp(file_path)
208 else:
213 else:
209 tr.addbackup(file_path)
214 tr.addbackup(file_path)
210
215
211 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
216 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
212 fp.write(target_docket.serialize())
217 fp.write(target_docket.serialize())
213 revlog._nodemap_docket = target_docket
218 revlog._nodemap_docket = target_docket
214 if feed_data:
219 if feed_data:
215 revlog.index.update_nodemap_data(target_docket, new_data)
220 revlog.index.update_nodemap_data(target_docket, new_data)
216
221
217 # search for old index file in all cases, some older process might have
222 # search for old index file in all cases, some older process might have
218 # left one behind.
223 # left one behind.
219 olds = _other_rawdata_filepath(revlog, target_docket)
224 olds = _other_rawdata_filepath(revlog, target_docket)
220 if olds:
225 if olds:
221 realvfs = getattr(revlog, '_realopener', revlog.opener)
226 realvfs = getattr(revlog, '_realopener', revlog.opener)
222
227
223 def cleanup(tr):
228 def cleanup(tr):
224 for oldfile in olds:
229 for oldfile in olds:
225 realvfs.tryunlink(oldfile)
230 realvfs.tryunlink(oldfile)
226
231
227 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
232 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
228 tr.addpostclose(callback_id, cleanup)
233 tr.addpostclose(callback_id, cleanup)
229
234
230
235
231 ### Nodemap docket file
236 ### Nodemap docket file
232 #
237 #
233 # The nodemap data are stored on disk using 2 files:
238 # The nodemap data are stored on disk using 2 files:
234 #
239 #
235 # * a raw data files containing a persistent nodemap
240 # * a raw data files containing a persistent nodemap
236 # (see `Nodemap Trie` section)
241 # (see `Nodemap Trie` section)
237 #
242 #
238 # * a small "docket" file containing medatadata
243 # * a small "docket" file containing medatadata
239 #
244 #
240 # While the nodemap data can be multiple tens of megabytes, the "docket" is
245 # While the nodemap data can be multiple tens of megabytes, the "docket" is
241 # small, it is easy to update it automatically or to duplicated its content
246 # small, it is easy to update it automatically or to duplicated its content
242 # during a transaction.
247 # during a transaction.
243 #
248 #
244 # Multiple raw data can exist at the same time (The currently valid one and a
249 # Multiple raw data can exist at the same time (The currently valid one and a
245 # new one beind used by an in progress transaction). To accomodate this, the
250 # new one beind used by an in progress transaction). To accomodate this, the
246 # filename hosting the raw data has a variable parts. The exact filename is
251 # filename hosting the raw data has a variable parts. The exact filename is
247 # specified inside the "docket" file.
252 # specified inside the "docket" file.
248 #
253 #
249 # The docket file contains information to find, qualify and validate the raw
254 # The docket file contains information to find, qualify and validate the raw
250 # data. Its content is currently very light, but it will expand as the on disk
255 # data. Its content is currently very light, but it will expand as the on disk
251 # nodemap gains the necessary features to be used in production.
256 # nodemap gains the necessary features to be used in production.
252
257
253 # version 0 is experimental, no BC garantee, do no use outside of tests.
258 # version 0 is experimental, no BC garantee, do no use outside of tests.
254 ONDISK_VERSION = 0
259 ONDISK_VERSION = 0
255 S_VERSION = struct.Struct(">B")
260 S_VERSION = struct.Struct(">B")
256 S_HEADER = struct.Struct(">BQQQQ")
261 S_HEADER = struct.Struct(">BQQQQ")
257
262
258 ID_SIZE = 8
263 ID_SIZE = 8
259
264
260
265
261 def _make_uid():
266 def _make_uid():
262 """return a new unique identifier.
267 """return a new unique identifier.
263
268
264 The identifier is random and composed of ascii characters."""
269 The identifier is random and composed of ascii characters."""
265 return nodemod.hex(os.urandom(ID_SIZE))
270 return nodemod.hex(os.urandom(ID_SIZE))
266
271
267
272
268 class NodeMapDocket(object):
273 class NodeMapDocket(object):
269 """metadata associated with persistent nodemap data
274 """metadata associated with persistent nodemap data
270
275
271 The persistent data may come from disk or be on their way to disk.
276 The persistent data may come from disk or be on their way to disk.
272 """
277 """
273
278
274 def __init__(self, uid=None):
279 def __init__(self, uid=None):
275 if uid is None:
280 if uid is None:
276 uid = _make_uid()
281 uid = _make_uid()
277 # a unique identifier for the data file:
282 # a unique identifier for the data file:
278 # - When new data are appended, it is preserved.
283 # - When new data are appended, it is preserved.
279 # - When a new data file is created, a new identifier is generated.
284 # - When a new data file is created, a new identifier is generated.
280 self.uid = uid
285 self.uid = uid
281 # the tipmost revision stored in the data file. This revision and all
286 # the tipmost revision stored in the data file. This revision and all
282 # revision before it are expected to be encoded in the data file.
287 # revision before it are expected to be encoded in the data file.
283 self.tip_rev = None
288 self.tip_rev = None
284 # the node of that tipmost revision, if it mismatch the current index
289 # the node of that tipmost revision, if it mismatch the current index
285 # data the docket is not valid for the current index and should be
290 # data the docket is not valid for the current index and should be
286 # discarded.
291 # discarded.
287 #
292 #
288 # note: this method is not perfect as some destructive operation could
293 # note: this method is not perfect as some destructive operation could
289 # preserve the same tip_rev + tip_node while altering lower revision.
294 # preserve the same tip_rev + tip_node while altering lower revision.
290 # However this multiple other caches have the same vulnerability (eg:
295 # However this multiple other caches have the same vulnerability (eg:
291 # brancmap cache).
296 # brancmap cache).
292 self.tip_node = None
297 self.tip_node = None
293 # the size (in bytes) of the persisted data to encode the nodemap valid
298 # the size (in bytes) of the persisted data to encode the nodemap valid
294 # for `tip_rev`.
299 # for `tip_rev`.
295 # - data file shorter than this are corrupted,
300 # - data file shorter than this are corrupted,
296 # - any extra data should be ignored.
301 # - any extra data should be ignored.
297 self.data_length = None
302 self.data_length = None
298 # the amount (in bytes) of "dead" data, still in the data file but no
303 # the amount (in bytes) of "dead" data, still in the data file but no
299 # longer used for the nodemap.
304 # longer used for the nodemap.
300 self.data_unused = 0
305 self.data_unused = 0
301
306
302 def copy(self):
307 def copy(self):
303 new = NodeMapDocket(uid=self.uid)
308 new = NodeMapDocket(uid=self.uid)
304 new.tip_rev = self.tip_rev
309 new.tip_rev = self.tip_rev
305 new.tip_node = self.tip_node
310 new.tip_node = self.tip_node
306 new.data_length = self.data_length
311 new.data_length = self.data_length
307 new.data_unused = self.data_unused
312 new.data_unused = self.data_unused
308 return new
313 return new
309
314
310 def __cmp__(self, other):
315 def __cmp__(self, other):
311 if self.uid < other.uid:
316 if self.uid < other.uid:
312 return -1
317 return -1
313 if self.uid > other.uid:
318 if self.uid > other.uid:
314 return 1
319 return 1
315 elif self.data_length < other.data_length:
320 elif self.data_length < other.data_length:
316 return -1
321 return -1
317 elif self.data_length > other.data_length:
322 elif self.data_length > other.data_length:
318 return 1
323 return 1
319 return 0
324 return 0
320
325
321 def __eq__(self, other):
326 def __eq__(self, other):
322 return self.uid == other.uid and self.data_length == other.data_length
327 return self.uid == other.uid and self.data_length == other.data_length
323
328
324 def serialize(self):
329 def serialize(self):
325 """return serialized bytes for a docket using the passed uid"""
330 """return serialized bytes for a docket using the passed uid"""
326 data = []
331 data = []
327 data.append(S_VERSION.pack(ONDISK_VERSION))
332 data.append(S_VERSION.pack(ONDISK_VERSION))
328 headers = (
333 headers = (
329 len(self.uid),
334 len(self.uid),
330 self.tip_rev,
335 self.tip_rev,
331 self.data_length,
336 self.data_length,
332 self.data_unused,
337 self.data_unused,
333 len(self.tip_node),
338 len(self.tip_node),
334 )
339 )
335 data.append(S_HEADER.pack(*headers))
340 data.append(S_HEADER.pack(*headers))
336 data.append(self.uid)
341 data.append(self.uid)
337 data.append(self.tip_node)
342 data.append(self.tip_node)
338 return b''.join(data)
343 return b''.join(data)
339
344
340
345
341 def _rawdata_filepath(revlog, docket):
346 def _rawdata_filepath(revlog, docket):
342 """The (vfs relative) nodemap's rawdata file for a given uid"""
347 """The (vfs relative) nodemap's rawdata file for a given uid"""
343 if revlog.nodemap_file.endswith(b'.n.a'):
348 if revlog.nodemap_file.endswith(b'.n.a'):
344 prefix = revlog.nodemap_file[:-4]
349 prefix = revlog.nodemap_file[:-4]
345 else:
350 else:
346 prefix = revlog.nodemap_file[:-2]
351 prefix = revlog.nodemap_file[:-2]
347 return b"%s-%s.nd" % (prefix, docket.uid)
352 return b"%s-%s.nd" % (prefix, docket.uid)
348
353
349
354
350 def _other_rawdata_filepath(revlog, docket):
355 def _other_rawdata_filepath(revlog, docket):
351 prefix = revlog.nodemap_file[:-2]
356 prefix = revlog.nodemap_file[:-2]
352 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
357 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
353 new_file_path = _rawdata_filepath(revlog, docket)
358 new_file_path = _rawdata_filepath(revlog, docket)
354 new_file_name = revlog.opener.basename(new_file_path)
359 new_file_name = revlog.opener.basename(new_file_path)
355 dirpath = revlog.opener.dirname(new_file_path)
360 dirpath = revlog.opener.dirname(new_file_path)
356 others = []
361 others = []
357 for f in revlog.opener.listdir(dirpath):
362 for f in revlog.opener.listdir(dirpath):
358 if pattern.match(f) and f != new_file_name:
363 if pattern.match(f) and f != new_file_name:
359 others.append(f)
364 others.append(f)
360 return others
365 return others
361
366
362
367
363 ### Nodemap Trie
368 ### Nodemap Trie
364 #
369 #
365 # This is a simple reference implementation to compute and persist a nodemap
370 # This is a simple reference implementation to compute and persist a nodemap
366 # trie. This reference implementation is write only. The python version of this
371 # trie. This reference implementation is write only. The python version of this
367 # is not expected to be actually used, since it wont provide performance
372 # is not expected to be actually used, since it wont provide performance
368 # improvement over existing non-persistent C implementation.
373 # improvement over existing non-persistent C implementation.
369 #
374 #
370 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
375 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
371 # revision can be adressed using its node shortest prefix.
376 # revision can be adressed using its node shortest prefix.
372 #
377 #
373 # The trie is stored as a sequence of block. Each block contains 16 entries
378 # The trie is stored as a sequence of block. Each block contains 16 entries
374 # (signed 64bit integer, big endian). Each entry can be one of the following:
379 # (signed 64bit integer, big endian). Each entry can be one of the following:
375 #
380 #
376 # * value >= 0 -> index of sub-block
381 # * value >= 0 -> index of sub-block
377 # * value == -1 -> no value
382 # * value == -1 -> no value
378 # * value < -1 -> a revision value: rev = -(value+10)
383 # * value < -1 -> a revision value: rev = -(value+10)
379 #
384 #
380 # The implementation focus on simplicity, not on performance. A Rust
385 # The implementation focus on simplicity, not on performance. A Rust
381 # implementation should provide a efficient version of the same binary
386 # implementation should provide a efficient version of the same binary
382 # persistence. This reference python implementation is never meant to be
387 # persistence. This reference python implementation is never meant to be
383 # extensively use in production.
388 # extensively use in production.
384
389
385
390
386 def persistent_data(index):
391 def persistent_data(index):
387 """return the persistent binary form for a nodemap for a given index
392 """return the persistent binary form for a nodemap for a given index
388 """
393 """
389 trie = _build_trie(index)
394 trie = _build_trie(index)
390 return _persist_trie(trie)
395 return _persist_trie(trie)
391
396
392
397
393 def update_persistent_data(index, root, max_idx, last_rev):
398 def update_persistent_data(index, root, max_idx, last_rev):
394 """return the incremental update for persistent nodemap from a given index
399 """return the incremental update for persistent nodemap from a given index
395 """
400 """
396 changed_block, trie = _update_trie(index, root, last_rev)
401 changed_block, trie = _update_trie(index, root, last_rev)
397 return (
402 return (
398 changed_block * S_BLOCK.size,
403 changed_block * S_BLOCK.size,
399 _persist_trie(trie, existing_idx=max_idx),
404 _persist_trie(trie, existing_idx=max_idx),
400 )
405 )
401
406
402
407
403 S_BLOCK = struct.Struct(">" + ("l" * 16))
408 S_BLOCK = struct.Struct(">" + ("l" * 16))
404
409
405 NO_ENTRY = -1
410 NO_ENTRY = -1
406 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
411 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
407 REV_OFFSET = 2
412 REV_OFFSET = 2
408
413
409
414
410 def _transform_rev(rev):
415 def _transform_rev(rev):
411 """Return the number used to represent the rev in the tree.
416 """Return the number used to represent the rev in the tree.
412
417
413 (or retrieve a rev number from such representation)
418 (or retrieve a rev number from such representation)
414
419
415 Note that this is an involution, a function equal to its inverse (i.e.
420 Note that this is an involution, a function equal to its inverse (i.e.
416 which gives the identity when applied to itself).
421 which gives the identity when applied to itself).
417 """
422 """
418 return -(rev + REV_OFFSET)
423 return -(rev + REV_OFFSET)
419
424
420
425
421 def _to_int(hex_digit):
426 def _to_int(hex_digit):
422 """turn an hexadecimal digit into a proper integer"""
427 """turn an hexadecimal digit into a proper integer"""
423 return int(hex_digit, 16)
428 return int(hex_digit, 16)
424
429
425
430
426 class Block(dict):
431 class Block(dict):
427 """represent a block of the Trie
432 """represent a block of the Trie
428
433
429 contains up to 16 entry indexed from 0 to 15"""
434 contains up to 16 entry indexed from 0 to 15"""
430
435
431 def __init__(self):
436 def __init__(self):
432 super(Block, self).__init__()
437 super(Block, self).__init__()
433 # If this block exist on disk, here is its ID
438 # If this block exist on disk, here is its ID
434 self.ondisk_id = None
439 self.ondisk_id = None
435
440
436 def __iter__(self):
441 def __iter__(self):
437 return iter(self.get(i) for i in range(16))
442 return iter(self.get(i) for i in range(16))
438
443
439
444
440 def _build_trie(index):
445 def _build_trie(index):
441 """build a nodemap trie
446 """build a nodemap trie
442
447
443 The nodemap stores revision number for each unique prefix.
448 The nodemap stores revision number for each unique prefix.
444
449
445 Each block is a dictionary with keys in `[0, 15]`. Values are either
450 Each block is a dictionary with keys in `[0, 15]`. Values are either
446 another block or a revision number.
451 another block or a revision number.
447 """
452 """
448 root = Block()
453 root = Block()
449 for rev in range(len(index)):
454 for rev in range(len(index)):
450 hex = nodemod.hex(index[rev][7])
455 hex = nodemod.hex(index[rev][7])
451 _insert_into_block(index, 0, root, rev, hex)
456 _insert_into_block(index, 0, root, rev, hex)
452 return root
457 return root
453
458
454
459
455 def _update_trie(index, root, last_rev):
460 def _update_trie(index, root, last_rev):
456 """consume"""
461 """consume"""
457 changed = 0
462 changed = 0
458 for rev in range(last_rev + 1, len(index)):
463 for rev in range(last_rev + 1, len(index)):
459 hex = nodemod.hex(index[rev][7])
464 hex = nodemod.hex(index[rev][7])
460 changed += _insert_into_block(index, 0, root, rev, hex)
465 changed += _insert_into_block(index, 0, root, rev, hex)
461 return changed, root
466 return changed, root
462
467
463
468
464 def _insert_into_block(index, level, block, current_rev, current_hex):
469 def _insert_into_block(index, level, block, current_rev, current_hex):
465 """insert a new revision in a block
470 """insert a new revision in a block
466
471
467 index: the index we are adding revision for
472 index: the index we are adding revision for
468 level: the depth of the current block in the trie
473 level: the depth of the current block in the trie
469 block: the block currently being considered
474 block: the block currently being considered
470 current_rev: the revision number we are adding
475 current_rev: the revision number we are adding
471 current_hex: the hexadecimal representation of the of that revision
476 current_hex: the hexadecimal representation of the of that revision
472 """
477 """
473 changed = 1
478 changed = 1
474 if block.ondisk_id is not None:
479 if block.ondisk_id is not None:
475 block.ondisk_id = None
480 block.ondisk_id = None
476 hex_digit = _to_int(current_hex[level : level + 1])
481 hex_digit = _to_int(current_hex[level : level + 1])
477 entry = block.get(hex_digit)
482 entry = block.get(hex_digit)
478 if entry is None:
483 if entry is None:
479 # no entry, simply store the revision number
484 # no entry, simply store the revision number
480 block[hex_digit] = current_rev
485 block[hex_digit] = current_rev
481 elif isinstance(entry, dict):
486 elif isinstance(entry, dict):
482 # need to recurse to an underlying block
487 # need to recurse to an underlying block
483 changed += _insert_into_block(
488 changed += _insert_into_block(
484 index, level + 1, entry, current_rev, current_hex
489 index, level + 1, entry, current_rev, current_hex
485 )
490 )
486 else:
491 else:
487 # collision with a previously unique prefix, inserting new
492 # collision with a previously unique prefix, inserting new
488 # vertices to fit both entry.
493 # vertices to fit both entry.
489 other_hex = nodemod.hex(index[entry][7])
494 other_hex = nodemod.hex(index[entry][7])
490 other_rev = entry
495 other_rev = entry
491 new = Block()
496 new = Block()
492 block[hex_digit] = new
497 block[hex_digit] = new
493 _insert_into_block(index, level + 1, new, other_rev, other_hex)
498 _insert_into_block(index, level + 1, new, other_rev, other_hex)
494 _insert_into_block(index, level + 1, new, current_rev, current_hex)
499 _insert_into_block(index, level + 1, new, current_rev, current_hex)
495 return changed
500 return changed
496
501
497
502
498 def _persist_trie(root, existing_idx=None):
503 def _persist_trie(root, existing_idx=None):
499 """turn a nodemap trie into persistent binary data
504 """turn a nodemap trie into persistent binary data
500
505
501 See `_build_trie` for nodemap trie structure"""
506 See `_build_trie` for nodemap trie structure"""
502 block_map = {}
507 block_map = {}
503 if existing_idx is not None:
508 if existing_idx is not None:
504 base_idx = existing_idx + 1
509 base_idx = existing_idx + 1
505 else:
510 else:
506 base_idx = 0
511 base_idx = 0
507 chunks = []
512 chunks = []
508 for tn in _walk_trie(root):
513 for tn in _walk_trie(root):
509 if tn.ondisk_id is not None:
514 if tn.ondisk_id is not None:
510 block_map[id(tn)] = tn.ondisk_id
515 block_map[id(tn)] = tn.ondisk_id
511 else:
516 else:
512 block_map[id(tn)] = len(chunks) + base_idx
517 block_map[id(tn)] = len(chunks) + base_idx
513 chunks.append(_persist_block(tn, block_map))
518 chunks.append(_persist_block(tn, block_map))
514 return b''.join(chunks)
519 return b''.join(chunks)
515
520
516
521
517 def _walk_trie(block):
522 def _walk_trie(block):
518 """yield all the block in a trie
523 """yield all the block in a trie
519
524
520 Children blocks are always yield before their parent block.
525 Children blocks are always yield before their parent block.
521 """
526 """
522 for (__, item) in sorted(block.items()):
527 for (__, item) in sorted(block.items()):
523 if isinstance(item, dict):
528 if isinstance(item, dict):
524 for sub_block in _walk_trie(item):
529 for sub_block in _walk_trie(item):
525 yield sub_block
530 yield sub_block
526 yield block
531 yield block
527
532
528
533
529 def _persist_block(block_node, block_map):
534 def _persist_block(block_node, block_map):
530 """produce persistent binary data for a single block
535 """produce persistent binary data for a single block
531
536
532 Children block are assumed to be already persisted and present in
537 Children block are assumed to be already persisted and present in
533 block_map.
538 block_map.
534 """
539 """
535 data = tuple(_to_value(v, block_map) for v in block_node)
540 data = tuple(_to_value(v, block_map) for v in block_node)
536 return S_BLOCK.pack(*data)
541 return S_BLOCK.pack(*data)
537
542
538
543
539 def _to_value(item, block_map):
544 def _to_value(item, block_map):
540 """persist any value as an integer"""
545 """persist any value as an integer"""
541 if item is None:
546 if item is None:
542 return NO_ENTRY
547 return NO_ENTRY
543 elif isinstance(item, dict):
548 elif isinstance(item, dict):
544 return block_map[id(item)]
549 return block_map[id(item)]
545 else:
550 else:
546 return _transform_rev(item)
551 return _transform_rev(item)
547
552
548
553
549 def parse_data(data):
554 def parse_data(data):
550 """parse parse nodemap data into a nodemap Trie"""
555 """parse parse nodemap data into a nodemap Trie"""
551 if (len(data) % S_BLOCK.size) != 0:
556 if (len(data) % S_BLOCK.size) != 0:
552 msg = "nodemap data size is not a multiple of block size (%d): %d"
557 msg = "nodemap data size is not a multiple of block size (%d): %d"
553 raise error.Abort(msg % (S_BLOCK.size, len(data)))
558 raise error.Abort(msg % (S_BLOCK.size, len(data)))
554 if not data:
559 if not data:
555 return Block(), None
560 return Block(), None
556 block_map = {}
561 block_map = {}
557 new_blocks = []
562 new_blocks = []
558 for i in range(0, len(data), S_BLOCK.size):
563 for i in range(0, len(data), S_BLOCK.size):
559 block = Block()
564 block = Block()
560 block.ondisk_id = len(block_map)
565 block.ondisk_id = len(block_map)
561 block_map[block.ondisk_id] = block
566 block_map[block.ondisk_id] = block
562 block_data = data[i : i + S_BLOCK.size]
567 block_data = data[i : i + S_BLOCK.size]
563 values = S_BLOCK.unpack(block_data)
568 values = S_BLOCK.unpack(block_data)
564 new_blocks.append((block, values))
569 new_blocks.append((block, values))
565 for b, values in new_blocks:
570 for b, values in new_blocks:
566 for idx, v in enumerate(values):
571 for idx, v in enumerate(values):
567 if v == NO_ENTRY:
572 if v == NO_ENTRY:
568 continue
573 continue
569 elif v >= 0:
574 elif v >= 0:
570 b[idx] = block_map[v]
575 b[idx] = block_map[v]
571 else:
576 else:
572 b[idx] = _transform_rev(v)
577 b[idx] = _transform_rev(v)
573 return block, i // S_BLOCK.size
578 return block, i // S_BLOCK.size
574
579
575
580
576 # debug utility
581 # debug utility
577
582
578
583
579 def check_data(ui, index, data):
584 def check_data(ui, index, data):
580 """verify that the provided nodemap data are valid for the given idex"""
585 """verify that the provided nodemap data are valid for the given idex"""
581 ret = 0
586 ret = 0
582 ui.status((b"revision in index: %d\n") % len(index))
587 ui.status((b"revision in index: %d\n") % len(index))
583 root, __ = parse_data(data)
588 root, __ = parse_data(data)
584 all_revs = set(_all_revisions(root))
589 all_revs = set(_all_revisions(root))
585 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
590 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
586 for r in range(len(index)):
591 for r in range(len(index)):
587 if r not in all_revs:
592 if r not in all_revs:
588 msg = b" revision missing from nodemap: %d\n" % r
593 msg = b" revision missing from nodemap: %d\n" % r
589 ui.write_err(msg)
594 ui.write_err(msg)
590 ret = 1
595 ret = 1
591 else:
596 else:
592 all_revs.remove(r)
597 all_revs.remove(r)
593 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
598 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
594 if nm_rev is None:
599 if nm_rev is None:
595 msg = b" revision node does not match any entries: %d\n" % r
600 msg = b" revision node does not match any entries: %d\n" % r
596 ui.write_err(msg)
601 ui.write_err(msg)
597 ret = 1
602 ret = 1
598 elif nm_rev != r:
603 elif nm_rev != r:
599 msg = (
604 msg = (
600 b" revision node does not match the expected revision: "
605 b" revision node does not match the expected revision: "
601 b"%d != %d\n" % (r, nm_rev)
606 b"%d != %d\n" % (r, nm_rev)
602 )
607 )
603 ui.write_err(msg)
608 ui.write_err(msg)
604 ret = 1
609 ret = 1
605
610
606 if all_revs:
611 if all_revs:
607 for r in sorted(all_revs):
612 for r in sorted(all_revs):
608 msg = b" extra revision in nodemap: %d\n" % r
613 msg = b" extra revision in nodemap: %d\n" % r
609 ui.write_err(msg)
614 ui.write_err(msg)
610 ret = 1
615 ret = 1
611 return ret
616 return ret
612
617
613
618
614 def _all_revisions(root):
619 def _all_revisions(root):
615 """return all revisions stored in a Trie"""
620 """return all revisions stored in a Trie"""
616 for block in _walk_trie(root):
621 for block in _walk_trie(root):
617 for v in block:
622 for v in block:
618 if v is None or isinstance(v, Block):
623 if v is None or isinstance(v, Block):
619 continue
624 continue
620 yield v
625 yield v
621
626
622
627
623 def _find_node(block, node):
628 def _find_node(block, node):
624 """find the revision associated with a given node"""
629 """find the revision associated with a given node"""
625 entry = block.get(_to_int(node[0:1]))
630 entry = block.get(_to_int(node[0:1]))
626 if isinstance(entry, dict):
631 if isinstance(entry, dict):
627 return _find_node(entry, node[1:])
632 return _find_node(entry, node[1:])
628 return entry
633 return entry
General Comments 0
You need to be logged in to leave comments. Login now