##// END OF EJS Templates
nodemap: match comment to actual code...
Joerg Sonnenberger -
r46815:8e7ce655 default
parent child Browse files
Show More
@@ -1,642 +1,644
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..i18n import _
16 from ..i18n import _
17 from ..node import hex
17 from ..node import hex
18
18
19 from .. import (
19 from .. import (
20 error,
20 error,
21 util,
21 util,
22 )
22 )
23
23
24
24
25 class NodeMap(dict):
25 class NodeMap(dict):
26 def __missing__(self, x):
26 def __missing__(self, x):
27 raise error.RevlogError(b'unknown node: %s' % x)
27 raise error.RevlogError(b'unknown node: %s' % x)
28
28
29
29
30 def persisted_data(revlog):
30 def persisted_data(revlog):
31 """read the nodemap for a revlog from disk"""
31 """read the nodemap for a revlog from disk"""
32 if revlog.nodemap_file is None:
32 if revlog.nodemap_file is None:
33 return None
33 return None
34 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 pdata = revlog.opener.tryread(revlog.nodemap_file)
35 if not pdata:
35 if not pdata:
36 return None
36 return None
37 offset = 0
37 offset = 0
38 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
39 if version != ONDISK_VERSION:
39 if version != ONDISK_VERSION:
40 return None
40 return None
41 offset += S_VERSION.size
41 offset += S_VERSION.size
42 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
43 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
44 offset += S_HEADER.size
44 offset += S_HEADER.size
45 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 docket = NodeMapDocket(pdata[offset : offset + uid_size])
46 offset += uid_size
46 offset += uid_size
47 docket.tip_rev = tip_rev
47 docket.tip_rev = tip_rev
48 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.tip_node = pdata[offset : offset + tip_node_size]
49 docket.data_length = data_length
49 docket.data_length = data_length
50 docket.data_unused = data_unused
50 docket.data_unused = data_unused
51
51
52 filename = _rawdata_filepath(revlog, docket)
52 filename = _rawdata_filepath(revlog, docket)
53 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
54 try:
54 try:
55 with revlog.opener(filename) as fd:
55 with revlog.opener(filename) as fd:
56 if use_mmap:
56 if use_mmap:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 else:
58 else:
59 data = fd.read(data_length)
59 data = fd.read(data_length)
60 except OSError as e:
60 except OSError as e:
61 if e.errno == errno.ENOENT:
61 if e.errno == errno.ENOENT:
62 return None
62 return None
63 else:
63 else:
64 raise
64 raise
65 if len(data) < data_length:
65 if len(data) < data_length:
66 return None
66 return None
67 return docket, data
67 return docket, data
68
68
69
69
70 def setup_persistent_nodemap(tr, revlog):
70 def setup_persistent_nodemap(tr, revlog):
71 """Install whatever is needed transaction side to persist a nodemap on disk
71 """Install whatever is needed transaction side to persist a nodemap on disk
72
72
73 (only actually persist the nodemap if this is relevant for this revlog)
73 (only actually persist the nodemap if this is relevant for this revlog)
74 """
74 """
75 if revlog._inline:
75 if revlog._inline:
76 return # inlined revlog are too small for this to be relevant
76 return # inlined revlog are too small for this to be relevant
77 if revlog.nodemap_file is None:
77 if revlog.nodemap_file is None:
78 return # we do not use persistent_nodemap on this revlog
78 return # we do not use persistent_nodemap on this revlog
79
79
80 # we need to happen after the changelog finalization, in that use "cl-"
80 # we need to happen after the changelog finalization, in that use "cl-"
81 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
82 if tr.hasfinalize(callback_id):
82 if tr.hasfinalize(callback_id):
83 return # no need to register again
83 return # no need to register again
84 tr.addpending(
84 tr.addpending(
85 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
85 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
86 )
86 )
87 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
87 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
88
88
89
89
90 class _NoTransaction(object):
90 class _NoTransaction(object):
91 """transaction like object to update the nodemap outside a transaction"""
91 """transaction like object to update the nodemap outside a transaction"""
92
92
93 def __init__(self):
93 def __init__(self):
94 self._postclose = {}
94 self._postclose = {}
95
95
96 def addpostclose(self, callback_id, callback_func):
96 def addpostclose(self, callback_id, callback_func):
97 self._postclose[callback_id] = callback_func
97 self._postclose[callback_id] = callback_func
98
98
99 def registertmp(self, *args, **kwargs):
99 def registertmp(self, *args, **kwargs):
100 pass
100 pass
101
101
102 def addbackup(self, *args, **kwargs):
102 def addbackup(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def add(self, *args, **kwargs):
105 def add(self, *args, **kwargs):
106 pass
106 pass
107
107
108 def addabort(self, *args, **kwargs):
108 def addabort(self, *args, **kwargs):
109 pass
109 pass
110
110
111 def _report(self, *args):
111 def _report(self, *args):
112 pass
112 pass
113
113
114
114
115 def update_persistent_nodemap(revlog):
115 def update_persistent_nodemap(revlog):
116 """update the persistent nodemap right now
116 """update the persistent nodemap right now
117
117
118 To be used for updating the nodemap on disk outside of a normal transaction
118 To be used for updating the nodemap on disk outside of a normal transaction
119 setup (eg, `debugupdatecache`).
119 setup (eg, `debugupdatecache`).
120 """
120 """
121 if revlog._inline:
121 if revlog._inline:
122 return # inlined revlog are too small for this to be relevant
122 return # inlined revlog are too small for this to be relevant
123 if revlog.nodemap_file is None:
123 if revlog.nodemap_file is None:
124 return # we do not use persistent_nodemap on this revlog
124 return # we do not use persistent_nodemap on this revlog
125
125
126 notr = _NoTransaction()
126 notr = _NoTransaction()
127 _persist_nodemap(notr, revlog)
127 _persist_nodemap(notr, revlog)
128 for k in sorted(notr._postclose):
128 for k in sorted(notr._postclose):
129 notr._postclose[k](None)
129 notr._postclose[k](None)
130
130
131
131
132 def _persist_nodemap(tr, revlog, pending=False):
132 def _persist_nodemap(tr, revlog, pending=False):
133 """Write nodemap data on disk for a given revlog"""
133 """Write nodemap data on disk for a given revlog"""
134 if getattr(revlog, 'filteredrevs', ()):
134 if getattr(revlog, 'filteredrevs', ()):
135 raise error.ProgrammingError(
135 raise error.ProgrammingError(
136 "cannot persist nodemap of a filtered changelog"
136 "cannot persist nodemap of a filtered changelog"
137 )
137 )
138 if revlog.nodemap_file is None:
138 if revlog.nodemap_file is None:
139 msg = "calling persist nodemap on a revlog without the feature enableb"
139 msg = "calling persist nodemap on a revlog without the feature enableb"
140 raise error.ProgrammingError(msg)
140 raise error.ProgrammingError(msg)
141
141
142 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
143 ondisk_docket = revlog._nodemap_docket
143 ondisk_docket = revlog._nodemap_docket
144 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
145 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
146 mode = revlog.opener.options.get(b"persistent-nodemap.mode")
146 mode = revlog.opener.options.get(b"persistent-nodemap.mode")
147 if not can_incremental:
147 if not can_incremental:
148 msg = _(b"persistent nodemap in strict mode without efficient method")
148 msg = _(b"persistent nodemap in strict mode without efficient method")
149 if mode == b'warn':
149 if mode == b'warn':
150 tr._report(b"%s\n" % msg)
150 tr._report(b"%s\n" % msg)
151 elif mode == b'strict':
151 elif mode == b'strict':
152 raise error.Abort(msg)
152 raise error.Abort(msg)
153
153
154 data = None
154 data = None
155 # first attemp an incremental update of the data
155 # first attemp an incremental update of the data
156 if can_incremental and ondisk_docket is not None:
156 if can_incremental and ondisk_docket is not None:
157 target_docket = revlog._nodemap_docket.copy()
157 target_docket = revlog._nodemap_docket.copy()
158 (
158 (
159 src_docket,
159 src_docket,
160 data_changed_count,
160 data_changed_count,
161 data,
161 data,
162 ) = revlog.index.nodemap_data_incremental()
162 ) = revlog.index.nodemap_data_incremental()
163 new_length = target_docket.data_length + len(data)
163 new_length = target_docket.data_length + len(data)
164 new_unused = target_docket.data_unused + data_changed_count
164 new_unused = target_docket.data_unused + data_changed_count
165 if src_docket != target_docket:
165 if src_docket != target_docket:
166 data = None
166 data = None
167 elif new_length <= (new_unused * 10): # under 10% of unused data
167 elif new_length <= (new_unused * 10): # under 10% of unused data
168 data = None
168 data = None
169 else:
169 else:
170 datafile = _rawdata_filepath(revlog, target_docket)
170 datafile = _rawdata_filepath(revlog, target_docket)
171 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
171 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
172 # store vfs
172 # store vfs
173 tr.add(datafile, target_docket.data_length)
173 tr.add(datafile, target_docket.data_length)
174 with revlog.opener(datafile, b'r+') as fd:
174 with revlog.opener(datafile, b'r+') as fd:
175 fd.seek(target_docket.data_length)
175 fd.seek(target_docket.data_length)
176 fd.write(data)
176 fd.write(data)
177 if feed_data:
177 if feed_data:
178 if use_mmap:
178 if use_mmap:
179 fd.seek(0)
179 fd.seek(0)
180 new_data = fd.read(new_length)
180 new_data = fd.read(new_length)
181 else:
181 else:
182 fd.flush()
182 fd.flush()
183 new_data = util.buffer(util.mmapread(fd, new_length))
183 new_data = util.buffer(util.mmapread(fd, new_length))
184 target_docket.data_length = new_length
184 target_docket.data_length = new_length
185 target_docket.data_unused = new_unused
185 target_docket.data_unused = new_unused
186
186
187 if data is None:
187 if data is None:
188 # otherwise fallback to a full new export
188 # otherwise fallback to a full new export
189 target_docket = NodeMapDocket()
189 target_docket = NodeMapDocket()
190 datafile = _rawdata_filepath(revlog, target_docket)
190 datafile = _rawdata_filepath(revlog, target_docket)
191 if util.safehasattr(revlog.index, "nodemap_data_all"):
191 if util.safehasattr(revlog.index, "nodemap_data_all"):
192 data = revlog.index.nodemap_data_all()
192 data = revlog.index.nodemap_data_all()
193 else:
193 else:
194 data = persistent_data(revlog.index)
194 data = persistent_data(revlog.index)
195 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
195 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
196 # store vfs
196 # store vfs
197
197
198 tryunlink = revlog.opener.tryunlink
198 tryunlink = revlog.opener.tryunlink
199
199
200 def abortck(tr):
200 def abortck(tr):
201 tryunlink(datafile)
201 tryunlink(datafile)
202
202
203 callback_id = b"delete-%s" % datafile
203 callback_id = b"delete-%s" % datafile
204
204
205 # some flavor of the transaction abort does not cleanup new file, it
205 # some flavor of the transaction abort does not cleanup new file, it
206 # simply empty them.
206 # simply empty them.
207 tr.addabort(callback_id, abortck)
207 tr.addabort(callback_id, abortck)
208 with revlog.opener(datafile, b'w+') as fd:
208 with revlog.opener(datafile, b'w+') as fd:
209 fd.write(data)
209 fd.write(data)
210 if feed_data:
210 if feed_data:
211 if use_mmap:
211 if use_mmap:
212 new_data = data
212 new_data = data
213 else:
213 else:
214 fd.flush()
214 fd.flush()
215 new_data = util.buffer(util.mmapread(fd, len(data)))
215 new_data = util.buffer(util.mmapread(fd, len(data)))
216 target_docket.data_length = len(data)
216 target_docket.data_length = len(data)
217 target_docket.tip_rev = revlog.tiprev()
217 target_docket.tip_rev = revlog.tiprev()
218 target_docket.tip_node = revlog.node(target_docket.tip_rev)
218 target_docket.tip_node = revlog.node(target_docket.tip_rev)
219 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
219 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
220 # store vfs
220 # store vfs
221 file_path = revlog.nodemap_file
221 file_path = revlog.nodemap_file
222 if pending:
222 if pending:
223 file_path += b'.a'
223 file_path += b'.a'
224 tr.registertmp(file_path)
224 tr.registertmp(file_path)
225 else:
225 else:
226 tr.addbackup(file_path)
226 tr.addbackup(file_path)
227
227
228 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
228 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
229 fp.write(target_docket.serialize())
229 fp.write(target_docket.serialize())
230 revlog._nodemap_docket = target_docket
230 revlog._nodemap_docket = target_docket
231 if feed_data:
231 if feed_data:
232 revlog.index.update_nodemap_data(target_docket, new_data)
232 revlog.index.update_nodemap_data(target_docket, new_data)
233
233
234 # search for old index file in all cases, some older process might have
234 # search for old index file in all cases, some older process might have
235 # left one behind.
235 # left one behind.
236 olds = _other_rawdata_filepath(revlog, target_docket)
236 olds = _other_rawdata_filepath(revlog, target_docket)
237 if olds:
237 if olds:
238 realvfs = getattr(revlog, '_realopener', revlog.opener)
238 realvfs = getattr(revlog, '_realopener', revlog.opener)
239
239
240 def cleanup(tr):
240 def cleanup(tr):
241 for oldfile in olds:
241 for oldfile in olds:
242 realvfs.tryunlink(oldfile)
242 realvfs.tryunlink(oldfile)
243
243
244 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
244 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
245 tr.addpostclose(callback_id, cleanup)
245 tr.addpostclose(callback_id, cleanup)
246
246
247
247
248 ### Nodemap docket file
248 ### Nodemap docket file
249 #
249 #
250 # The nodemap data are stored on disk using 2 files:
250 # The nodemap data are stored on disk using 2 files:
251 #
251 #
252 # * a raw data files containing a persistent nodemap
252 # * a raw data files containing a persistent nodemap
253 # (see `Nodemap Trie` section)
253 # (see `Nodemap Trie` section)
254 #
254 #
255 # * a small "docket" file containing medatadata
255 # * a small "docket" file containing medatadata
256 #
256 #
257 # While the nodemap data can be multiple tens of megabytes, the "docket" is
257 # While the nodemap data can be multiple tens of megabytes, the "docket" is
258 # small, it is easy to update it automatically or to duplicated its content
258 # small, it is easy to update it automatically or to duplicated its content
259 # during a transaction.
259 # during a transaction.
260 #
260 #
261 # Multiple raw data can exist at the same time (The currently valid one and a
261 # Multiple raw data can exist at the same time (The currently valid one and a
262 # new one beind used by an in progress transaction). To accomodate this, the
262 # new one beind used by an in progress transaction). To accomodate this, the
263 # filename hosting the raw data has a variable parts. The exact filename is
263 # filename hosting the raw data has a variable parts. The exact filename is
264 # specified inside the "docket" file.
264 # specified inside the "docket" file.
265 #
265 #
266 # The docket file contains information to find, qualify and validate the raw
266 # The docket file contains information to find, qualify and validate the raw
267 # data. Its content is currently very light, but it will expand as the on disk
267 # data. Its content is currently very light, but it will expand as the on disk
268 # nodemap gains the necessary features to be used in production.
268 # nodemap gains the necessary features to be used in production.
269
269
270 ONDISK_VERSION = 1
270 ONDISK_VERSION = 1
271 S_VERSION = struct.Struct(">B")
271 S_VERSION = struct.Struct(">B")
272 S_HEADER = struct.Struct(">BQQQQ")
272 S_HEADER = struct.Struct(">BQQQQ")
273
273
274 ID_SIZE = 8
274 ID_SIZE = 8
275
275
276
276
277 def _make_uid():
277 def _make_uid():
278 """return a new unique identifier.
278 """return a new unique identifier.
279
279
280 The identifier is random and composed of ascii characters."""
280 The identifier is random and composed of ascii characters."""
281 return hex(os.urandom(ID_SIZE))
281 return hex(os.urandom(ID_SIZE))
282
282
283
283
284 class NodeMapDocket(object):
284 class NodeMapDocket(object):
285 """metadata associated with persistent nodemap data
285 """metadata associated with persistent nodemap data
286
286
287 The persistent data may come from disk or be on their way to disk.
287 The persistent data may come from disk or be on their way to disk.
288 """
288 """
289
289
290 def __init__(self, uid=None):
290 def __init__(self, uid=None):
291 if uid is None:
291 if uid is None:
292 uid = _make_uid()
292 uid = _make_uid()
293 # a unique identifier for the data file:
293 # a unique identifier for the data file:
294 # - When new data are appended, it is preserved.
294 # - When new data are appended, it is preserved.
295 # - When a new data file is created, a new identifier is generated.
295 # - When a new data file is created, a new identifier is generated.
296 self.uid = uid
296 self.uid = uid
297 # the tipmost revision stored in the data file. This revision and all
297 # the tipmost revision stored in the data file. This revision and all
298 # revision before it are expected to be encoded in the data file.
298 # revision before it are expected to be encoded in the data file.
299 self.tip_rev = None
299 self.tip_rev = None
300 # the node of that tipmost revision, if it mismatch the current index
300 # the node of that tipmost revision, if it mismatch the current index
301 # data the docket is not valid for the current index and should be
301 # data the docket is not valid for the current index and should be
302 # discarded.
302 # discarded.
303 #
303 #
304 # note: this method is not perfect as some destructive operation could
304 # note: this method is not perfect as some destructive operation could
305 # preserve the same tip_rev + tip_node while altering lower revision.
305 # preserve the same tip_rev + tip_node while altering lower revision.
306 # However this multiple other caches have the same vulnerability (eg:
306 # However this multiple other caches have the same vulnerability (eg:
307 # brancmap cache).
307 # brancmap cache).
308 self.tip_node = None
308 self.tip_node = None
309 # the size (in bytes) of the persisted data to encode the nodemap valid
309 # the size (in bytes) of the persisted data to encode the nodemap valid
310 # for `tip_rev`.
310 # for `tip_rev`.
311 # - data file shorter than this are corrupted,
311 # - data file shorter than this are corrupted,
312 # - any extra data should be ignored.
312 # - any extra data should be ignored.
313 self.data_length = None
313 self.data_length = None
314 # the amount (in bytes) of "dead" data, still in the data file but no
314 # the amount (in bytes) of "dead" data, still in the data file but no
315 # longer used for the nodemap.
315 # longer used for the nodemap.
316 self.data_unused = 0
316 self.data_unused = 0
317
317
318 def copy(self):
318 def copy(self):
319 new = NodeMapDocket(uid=self.uid)
319 new = NodeMapDocket(uid=self.uid)
320 new.tip_rev = self.tip_rev
320 new.tip_rev = self.tip_rev
321 new.tip_node = self.tip_node
321 new.tip_node = self.tip_node
322 new.data_length = self.data_length
322 new.data_length = self.data_length
323 new.data_unused = self.data_unused
323 new.data_unused = self.data_unused
324 return new
324 return new
325
325
326 def __cmp__(self, other):
326 def __cmp__(self, other):
327 if self.uid < other.uid:
327 if self.uid < other.uid:
328 return -1
328 return -1
329 if self.uid > other.uid:
329 if self.uid > other.uid:
330 return 1
330 return 1
331 elif self.data_length < other.data_length:
331 elif self.data_length < other.data_length:
332 return -1
332 return -1
333 elif self.data_length > other.data_length:
333 elif self.data_length > other.data_length:
334 return 1
334 return 1
335 return 0
335 return 0
336
336
337 def __eq__(self, other):
337 def __eq__(self, other):
338 return self.uid == other.uid and self.data_length == other.data_length
338 return self.uid == other.uid and self.data_length == other.data_length
339
339
340 def serialize(self):
340 def serialize(self):
341 """return serialized bytes for a docket using the passed uid"""
341 """return serialized bytes for a docket using the passed uid"""
342 data = []
342 data = []
343 data.append(S_VERSION.pack(ONDISK_VERSION))
343 data.append(S_VERSION.pack(ONDISK_VERSION))
344 headers = (
344 headers = (
345 len(self.uid),
345 len(self.uid),
346 self.tip_rev,
346 self.tip_rev,
347 self.data_length,
347 self.data_length,
348 self.data_unused,
348 self.data_unused,
349 len(self.tip_node),
349 len(self.tip_node),
350 )
350 )
351 data.append(S_HEADER.pack(*headers))
351 data.append(S_HEADER.pack(*headers))
352 data.append(self.uid)
352 data.append(self.uid)
353 data.append(self.tip_node)
353 data.append(self.tip_node)
354 return b''.join(data)
354 return b''.join(data)
355
355
356
356
357 def _rawdata_filepath(revlog, docket):
357 def _rawdata_filepath(revlog, docket):
358 """The (vfs relative) nodemap's rawdata file for a given uid"""
358 """The (vfs relative) nodemap's rawdata file for a given uid"""
359 if revlog.nodemap_file.endswith(b'.n.a'):
359 if revlog.nodemap_file.endswith(b'.n.a'):
360 prefix = revlog.nodemap_file[:-4]
360 prefix = revlog.nodemap_file[:-4]
361 else:
361 else:
362 prefix = revlog.nodemap_file[:-2]
362 prefix = revlog.nodemap_file[:-2]
363 return b"%s-%s.nd" % (prefix, docket.uid)
363 return b"%s-%s.nd" % (prefix, docket.uid)
364
364
365
365
366 def _other_rawdata_filepath(revlog, docket):
366 def _other_rawdata_filepath(revlog, docket):
367 prefix = revlog.nodemap_file[:-2]
367 prefix = revlog.nodemap_file[:-2]
368 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
368 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
369 new_file_path = _rawdata_filepath(revlog, docket)
369 new_file_path = _rawdata_filepath(revlog, docket)
370 new_file_name = revlog.opener.basename(new_file_path)
370 new_file_name = revlog.opener.basename(new_file_path)
371 dirpath = revlog.opener.dirname(new_file_path)
371 dirpath = revlog.opener.dirname(new_file_path)
372 others = []
372 others = []
373 for f in revlog.opener.listdir(dirpath):
373 for f in revlog.opener.listdir(dirpath):
374 if pattern.match(f) and f != new_file_name:
374 if pattern.match(f) and f != new_file_name:
375 others.append(f)
375 others.append(f)
376 return others
376 return others
377
377
378
378
379 ### Nodemap Trie
379 ### Nodemap Trie
380 #
380 #
381 # This is a simple reference implementation to compute and persist a nodemap
381 # This is a simple reference implementation to compute and persist a nodemap
382 # trie. This reference implementation is write only. The python version of this
382 # trie. This reference implementation is write only. The python version of this
383 # is not expected to be actually used, since it wont provide performance
383 # is not expected to be actually used, since it wont provide performance
384 # improvement over existing non-persistent C implementation.
384 # improvement over existing non-persistent C implementation.
385 #
385 #
386 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
386 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
387 # revision can be adressed using its node shortest prefix.
387 # revision can be adressed using its node shortest prefix.
388 #
388 #
389 # The trie is stored as a sequence of block. Each block contains 16 entries
389 # The trie is stored as a sequence of block. Each block contains 16 entries
390 # (signed 64bit integer, big endian). Each entry can be one of the following:
390 # (signed 64bit integer, big endian). Each entry can be one of the following:
391 #
391 #
392 # * value >= 0 -> index of sub-block
392 # * value >= 0 -> index of sub-block
393 # * value == -1 -> no value
393 # * value == -1 -> no value
394 # * value < -1 -> a revision value: rev = -(value+10)
394 # * value < -1 -> encoded revision: rev = -(value+2)
395 #
396 # See REV_OFFSET and _transform_rev below.
395 #
397 #
396 # The implementation focus on simplicity, not on performance. A Rust
398 # The implementation focus on simplicity, not on performance. A Rust
397 # implementation should provide a efficient version of the same binary
399 # implementation should provide a efficient version of the same binary
398 # persistence. This reference python implementation is never meant to be
400 # persistence. This reference python implementation is never meant to be
399 # extensively use in production.
401 # extensively use in production.
400
402
401
403
402 def persistent_data(index):
404 def persistent_data(index):
403 """return the persistent binary form for a nodemap for a given index"""
405 """return the persistent binary form for a nodemap for a given index"""
404 trie = _build_trie(index)
406 trie = _build_trie(index)
405 return _persist_trie(trie)
407 return _persist_trie(trie)
406
408
407
409
408 def update_persistent_data(index, root, max_idx, last_rev):
410 def update_persistent_data(index, root, max_idx, last_rev):
409 """return the incremental update for persistent nodemap from a given index"""
411 """return the incremental update for persistent nodemap from a given index"""
410 changed_block, trie = _update_trie(index, root, last_rev)
412 changed_block, trie = _update_trie(index, root, last_rev)
411 return (
413 return (
412 changed_block * S_BLOCK.size,
414 changed_block * S_BLOCK.size,
413 _persist_trie(trie, existing_idx=max_idx),
415 _persist_trie(trie, existing_idx=max_idx),
414 )
416 )
415
417
416
418
417 S_BLOCK = struct.Struct(">" + ("l" * 16))
419 S_BLOCK = struct.Struct(">" + ("l" * 16))
418
420
419 NO_ENTRY = -1
421 NO_ENTRY = -1
420 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
422 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
421 REV_OFFSET = 2
423 REV_OFFSET = 2
422
424
423
425
424 def _transform_rev(rev):
426 def _transform_rev(rev):
425 """Return the number used to represent the rev in the tree.
427 """Return the number used to represent the rev in the tree.
426
428
427 (or retrieve a rev number from such representation)
429 (or retrieve a rev number from such representation)
428
430
429 Note that this is an involution, a function equal to its inverse (i.e.
431 Note that this is an involution, a function equal to its inverse (i.e.
430 which gives the identity when applied to itself).
432 which gives the identity when applied to itself).
431 """
433 """
432 return -(rev + REV_OFFSET)
434 return -(rev + REV_OFFSET)
433
435
434
436
435 def _to_int(hex_digit):
437 def _to_int(hex_digit):
436 """turn an hexadecimal digit into a proper integer"""
438 """turn an hexadecimal digit into a proper integer"""
437 return int(hex_digit, 16)
439 return int(hex_digit, 16)
438
440
439
441
440 class Block(dict):
442 class Block(dict):
441 """represent a block of the Trie
443 """represent a block of the Trie
442
444
443 contains up to 16 entry indexed from 0 to 15"""
445 contains up to 16 entry indexed from 0 to 15"""
444
446
445 def __init__(self):
447 def __init__(self):
446 super(Block, self).__init__()
448 super(Block, self).__init__()
447 # If this block exist on disk, here is its ID
449 # If this block exist on disk, here is its ID
448 self.ondisk_id = None
450 self.ondisk_id = None
449
451
450 def __iter__(self):
452 def __iter__(self):
451 return iter(self.get(i) for i in range(16))
453 return iter(self.get(i) for i in range(16))
452
454
453
455
454 def _build_trie(index):
456 def _build_trie(index):
455 """build a nodemap trie
457 """build a nodemap trie
456
458
457 The nodemap stores revision number for each unique prefix.
459 The nodemap stores revision number for each unique prefix.
458
460
459 Each block is a dictionary with keys in `[0, 15]`. Values are either
461 Each block is a dictionary with keys in `[0, 15]`. Values are either
460 another block or a revision number.
462 another block or a revision number.
461 """
463 """
462 root = Block()
464 root = Block()
463 for rev in range(len(index)):
465 for rev in range(len(index)):
464 current_hex = hex(index[rev][7])
466 current_hex = hex(index[rev][7])
465 _insert_into_block(index, 0, root, rev, current_hex)
467 _insert_into_block(index, 0, root, rev, current_hex)
466 return root
468 return root
467
469
468
470
469 def _update_trie(index, root, last_rev):
471 def _update_trie(index, root, last_rev):
470 """consume"""
472 """consume"""
471 changed = 0
473 changed = 0
472 for rev in range(last_rev + 1, len(index)):
474 for rev in range(last_rev + 1, len(index)):
473 current_hex = hex(index[rev][7])
475 current_hex = hex(index[rev][7])
474 changed += _insert_into_block(index, 0, root, rev, current_hex)
476 changed += _insert_into_block(index, 0, root, rev, current_hex)
475 return changed, root
477 return changed, root
476
478
477
479
478 def _insert_into_block(index, level, block, current_rev, current_hex):
480 def _insert_into_block(index, level, block, current_rev, current_hex):
479 """insert a new revision in a block
481 """insert a new revision in a block
480
482
481 index: the index we are adding revision for
483 index: the index we are adding revision for
482 level: the depth of the current block in the trie
484 level: the depth of the current block in the trie
483 block: the block currently being considered
485 block: the block currently being considered
484 current_rev: the revision number we are adding
486 current_rev: the revision number we are adding
485 current_hex: the hexadecimal representation of the of that revision
487 current_hex: the hexadecimal representation of the of that revision
486 """
488 """
487 changed = 1
489 changed = 1
488 if block.ondisk_id is not None:
490 if block.ondisk_id is not None:
489 block.ondisk_id = None
491 block.ondisk_id = None
490 hex_digit = _to_int(current_hex[level : level + 1])
492 hex_digit = _to_int(current_hex[level : level + 1])
491 entry = block.get(hex_digit)
493 entry = block.get(hex_digit)
492 if entry is None:
494 if entry is None:
493 # no entry, simply store the revision number
495 # no entry, simply store the revision number
494 block[hex_digit] = current_rev
496 block[hex_digit] = current_rev
495 elif isinstance(entry, dict):
497 elif isinstance(entry, dict):
496 # need to recurse to an underlying block
498 # need to recurse to an underlying block
497 changed += _insert_into_block(
499 changed += _insert_into_block(
498 index, level + 1, entry, current_rev, current_hex
500 index, level + 1, entry, current_rev, current_hex
499 )
501 )
500 else:
502 else:
501 # collision with a previously unique prefix, inserting new
503 # collision with a previously unique prefix, inserting new
502 # vertices to fit both entry.
504 # vertices to fit both entry.
503 other_hex = hex(index[entry][7])
505 other_hex = hex(index[entry][7])
504 other_rev = entry
506 other_rev = entry
505 new = Block()
507 new = Block()
506 block[hex_digit] = new
508 block[hex_digit] = new
507 _insert_into_block(index, level + 1, new, other_rev, other_hex)
509 _insert_into_block(index, level + 1, new, other_rev, other_hex)
508 _insert_into_block(index, level + 1, new, current_rev, current_hex)
510 _insert_into_block(index, level + 1, new, current_rev, current_hex)
509 return changed
511 return changed
510
512
511
513
512 def _persist_trie(root, existing_idx=None):
514 def _persist_trie(root, existing_idx=None):
513 """turn a nodemap trie into persistent binary data
515 """turn a nodemap trie into persistent binary data
514
516
515 See `_build_trie` for nodemap trie structure"""
517 See `_build_trie` for nodemap trie structure"""
516 block_map = {}
518 block_map = {}
517 if existing_idx is not None:
519 if existing_idx is not None:
518 base_idx = existing_idx + 1
520 base_idx = existing_idx + 1
519 else:
521 else:
520 base_idx = 0
522 base_idx = 0
521 chunks = []
523 chunks = []
522 for tn in _walk_trie(root):
524 for tn in _walk_trie(root):
523 if tn.ondisk_id is not None:
525 if tn.ondisk_id is not None:
524 block_map[id(tn)] = tn.ondisk_id
526 block_map[id(tn)] = tn.ondisk_id
525 else:
527 else:
526 block_map[id(tn)] = len(chunks) + base_idx
528 block_map[id(tn)] = len(chunks) + base_idx
527 chunks.append(_persist_block(tn, block_map))
529 chunks.append(_persist_block(tn, block_map))
528 return b''.join(chunks)
530 return b''.join(chunks)
529
531
530
532
531 def _walk_trie(block):
533 def _walk_trie(block):
532 """yield all the block in a trie
534 """yield all the block in a trie
533
535
534 Children blocks are always yield before their parent block.
536 Children blocks are always yield before their parent block.
535 """
537 """
536 for (__, item) in sorted(block.items()):
538 for (__, item) in sorted(block.items()):
537 if isinstance(item, dict):
539 if isinstance(item, dict):
538 for sub_block in _walk_trie(item):
540 for sub_block in _walk_trie(item):
539 yield sub_block
541 yield sub_block
540 yield block
542 yield block
541
543
542
544
543 def _persist_block(block_node, block_map):
545 def _persist_block(block_node, block_map):
544 """produce persistent binary data for a single block
546 """produce persistent binary data for a single block
545
547
546 Children block are assumed to be already persisted and present in
548 Children block are assumed to be already persisted and present in
547 block_map.
549 block_map.
548 """
550 """
549 data = tuple(_to_value(v, block_map) for v in block_node)
551 data = tuple(_to_value(v, block_map) for v in block_node)
550 return S_BLOCK.pack(*data)
552 return S_BLOCK.pack(*data)
551
553
552
554
553 def _to_value(item, block_map):
555 def _to_value(item, block_map):
554 """persist any value as an integer"""
556 """persist any value as an integer"""
555 if item is None:
557 if item is None:
556 return NO_ENTRY
558 return NO_ENTRY
557 elif isinstance(item, dict):
559 elif isinstance(item, dict):
558 return block_map[id(item)]
560 return block_map[id(item)]
559 else:
561 else:
560 return _transform_rev(item)
562 return _transform_rev(item)
561
563
562
564
563 def parse_data(data):
565 def parse_data(data):
564 """parse parse nodemap data into a nodemap Trie"""
566 """parse parse nodemap data into a nodemap Trie"""
565 if (len(data) % S_BLOCK.size) != 0:
567 if (len(data) % S_BLOCK.size) != 0:
566 msg = "nodemap data size is not a multiple of block size (%d): %d"
568 msg = "nodemap data size is not a multiple of block size (%d): %d"
567 raise error.Abort(msg % (S_BLOCK.size, len(data)))
569 raise error.Abort(msg % (S_BLOCK.size, len(data)))
568 if not data:
570 if not data:
569 return Block(), None
571 return Block(), None
570 block_map = {}
572 block_map = {}
571 new_blocks = []
573 new_blocks = []
572 for i in range(0, len(data), S_BLOCK.size):
574 for i in range(0, len(data), S_BLOCK.size):
573 block = Block()
575 block = Block()
574 block.ondisk_id = len(block_map)
576 block.ondisk_id = len(block_map)
575 block_map[block.ondisk_id] = block
577 block_map[block.ondisk_id] = block
576 block_data = data[i : i + S_BLOCK.size]
578 block_data = data[i : i + S_BLOCK.size]
577 values = S_BLOCK.unpack(block_data)
579 values = S_BLOCK.unpack(block_data)
578 new_blocks.append((block, values))
580 new_blocks.append((block, values))
579 for b, values in new_blocks:
581 for b, values in new_blocks:
580 for idx, v in enumerate(values):
582 for idx, v in enumerate(values):
581 if v == NO_ENTRY:
583 if v == NO_ENTRY:
582 continue
584 continue
583 elif v >= 0:
585 elif v >= 0:
584 b[idx] = block_map[v]
586 b[idx] = block_map[v]
585 else:
587 else:
586 b[idx] = _transform_rev(v)
588 b[idx] = _transform_rev(v)
587 return block, i // S_BLOCK.size
589 return block, i // S_BLOCK.size
588
590
589
591
590 # debug utility
592 # debug utility
591
593
592
594
593 def check_data(ui, index, data):
595 def check_data(ui, index, data):
594 """verify that the provided nodemap data are valid for the given idex"""
596 """verify that the provided nodemap data are valid for the given idex"""
595 ret = 0
597 ret = 0
596 ui.status((b"revision in index: %d\n") % len(index))
598 ui.status((b"revision in index: %d\n") % len(index))
597 root, __ = parse_data(data)
599 root, __ = parse_data(data)
598 all_revs = set(_all_revisions(root))
600 all_revs = set(_all_revisions(root))
599 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
601 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
600 for r in range(len(index)):
602 for r in range(len(index)):
601 if r not in all_revs:
603 if r not in all_revs:
602 msg = b" revision missing from nodemap: %d\n" % r
604 msg = b" revision missing from nodemap: %d\n" % r
603 ui.write_err(msg)
605 ui.write_err(msg)
604 ret = 1
606 ret = 1
605 else:
607 else:
606 all_revs.remove(r)
608 all_revs.remove(r)
607 nm_rev = _find_node(root, hex(index[r][7]))
609 nm_rev = _find_node(root, hex(index[r][7]))
608 if nm_rev is None:
610 if nm_rev is None:
609 msg = b" revision node does not match any entries: %d\n" % r
611 msg = b" revision node does not match any entries: %d\n" % r
610 ui.write_err(msg)
612 ui.write_err(msg)
611 ret = 1
613 ret = 1
612 elif nm_rev != r:
614 elif nm_rev != r:
613 msg = (
615 msg = (
614 b" revision node does not match the expected revision: "
616 b" revision node does not match the expected revision: "
615 b"%d != %d\n" % (r, nm_rev)
617 b"%d != %d\n" % (r, nm_rev)
616 )
618 )
617 ui.write_err(msg)
619 ui.write_err(msg)
618 ret = 1
620 ret = 1
619
621
620 if all_revs:
622 if all_revs:
621 for r in sorted(all_revs):
623 for r in sorted(all_revs):
622 msg = b" extra revision in nodemap: %d\n" % r
624 msg = b" extra revision in nodemap: %d\n" % r
623 ui.write_err(msg)
625 ui.write_err(msg)
624 ret = 1
626 ret = 1
625 return ret
627 return ret
626
628
627
629
628 def _all_revisions(root):
630 def _all_revisions(root):
629 """return all revisions stored in a Trie"""
631 """return all revisions stored in a Trie"""
630 for block in _walk_trie(root):
632 for block in _walk_trie(root):
631 for v in block:
633 for v in block:
632 if v is None or isinstance(v, Block):
634 if v is None or isinstance(v, Block):
633 continue
635 continue
634 yield v
636 yield v
635
637
636
638
637 def _find_node(block, node):
639 def _find_node(block, node):
638 """find the revision associated with a given node"""
640 """find the revision associated with a given node"""
639 entry = block.get(_to_int(node[0:1]))
641 entry = block.get(_to_int(node[0:1]))
640 if isinstance(entry, dict):
642 if isinstance(entry, dict):
641 return _find_node(entry, node[1:])
643 return _find_node(entry, node[1:])
642 return entry
644 return entry
General Comments 0
You need to be logged in to leave comments. Login now