##// END OF EJS Templates
persistent-nodemap: properly ignore non-existent `.nd` data file...
Simon Sapin -
r46705:8ff2d835 default
parent child Browse files
Show More
@@ -1,640 +1,642
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..i18n import _
16 from ..i18n import _
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 node as nodemod,
20 node as nodemod,
21 util,
21 util,
22 )
22 )
23
23
24
24
25 class NodeMap(dict):
25 class NodeMap(dict):
26 def __missing__(self, x):
26 def __missing__(self, x):
27 raise error.RevlogError(b'unknown node: %s' % x)
27 raise error.RevlogError(b'unknown node: %s' % x)
28
28
29
29
30 def persisted_data(revlog):
30 def persisted_data(revlog):
31 """read the nodemap for a revlog from disk"""
31 """read the nodemap for a revlog from disk"""
32 if revlog.nodemap_file is None:
32 if revlog.nodemap_file is None:
33 return None
33 return None
34 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 pdata = revlog.opener.tryread(revlog.nodemap_file)
35 if not pdata:
35 if not pdata:
36 return None
36 return None
37 offset = 0
37 offset = 0
38 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
39 if version != ONDISK_VERSION:
39 if version != ONDISK_VERSION:
40 return None
40 return None
41 offset += S_VERSION.size
41 offset += S_VERSION.size
42 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
43 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
44 offset += S_HEADER.size
44 offset += S_HEADER.size
45 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 docket = NodeMapDocket(pdata[offset : offset + uid_size])
46 offset += uid_size
46 offset += uid_size
47 docket.tip_rev = tip_rev
47 docket.tip_rev = tip_rev
48 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.tip_node = pdata[offset : offset + tip_node_size]
49 docket.data_length = data_length
49 docket.data_length = data_length
50 docket.data_unused = data_unused
50 docket.data_unused = data_unused
51
51
52 filename = _rawdata_filepath(revlog, docket)
52 filename = _rawdata_filepath(revlog, docket)
53 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
54 try:
54 try:
55 with revlog.opener(filename) as fd:
55 with revlog.opener(filename) as fd:
56 if use_mmap:
56 if use_mmap:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 else:
58 else:
59 data = fd.read(data_length)
59 data = fd.read(data_length)
60 except OSError as e:
60 except OSError as e:
61 if e.errno != errno.ENOENT:
61 if e.errno == errno.ENOENT:
62 return None
63 else:
62 raise
64 raise
63 if len(data) < data_length:
65 if len(data) < data_length:
64 return None
66 return None
65 return docket, data
67 return docket, data
66
68
67
69
68 def setup_persistent_nodemap(tr, revlog):
70 def setup_persistent_nodemap(tr, revlog):
69 """Install whatever is needed transaction side to persist a nodemap on disk
71 """Install whatever is needed transaction side to persist a nodemap on disk
70
72
71 (only actually persist the nodemap if this is relevant for this revlog)
73 (only actually persist the nodemap if this is relevant for this revlog)
72 """
74 """
73 if revlog._inline:
75 if revlog._inline:
74 return # inlined revlog are too small for this to be relevant
76 return # inlined revlog are too small for this to be relevant
75 if revlog.nodemap_file is None:
77 if revlog.nodemap_file is None:
76 return # we do not use persistent_nodemap on this revlog
78 return # we do not use persistent_nodemap on this revlog
77
79
78 # we need to happen after the changelog finalization, in that use "cl-"
80 # we need to happen after the changelog finalization, in that use "cl-"
79 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
80 if tr.hasfinalize(callback_id):
82 if tr.hasfinalize(callback_id):
81 return # no need to register again
83 return # no need to register again
82 tr.addpending(
84 tr.addpending(
83 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
85 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
84 )
86 )
85 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
87 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
86
88
87
89
88 class _NoTransaction(object):
90 class _NoTransaction(object):
89 """transaction like object to update the nodemap outside a transaction"""
91 """transaction like object to update the nodemap outside a transaction"""
90
92
91 def __init__(self):
93 def __init__(self):
92 self._postclose = {}
94 self._postclose = {}
93
95
94 def addpostclose(self, callback_id, callback_func):
96 def addpostclose(self, callback_id, callback_func):
95 self._postclose[callback_id] = callback_func
97 self._postclose[callback_id] = callback_func
96
98
97 def registertmp(self, *args, **kwargs):
99 def registertmp(self, *args, **kwargs):
98 pass
100 pass
99
101
100 def addbackup(self, *args, **kwargs):
102 def addbackup(self, *args, **kwargs):
101 pass
103 pass
102
104
103 def add(self, *args, **kwargs):
105 def add(self, *args, **kwargs):
104 pass
106 pass
105
107
106 def addabort(self, *args, **kwargs):
108 def addabort(self, *args, **kwargs):
107 pass
109 pass
108
110
109 def _report(self, *args):
111 def _report(self, *args):
110 pass
112 pass
111
113
112
114
113 def update_persistent_nodemap(revlog):
115 def update_persistent_nodemap(revlog):
114 """update the persistent nodemap right now
116 """update the persistent nodemap right now
115
117
116 To be used for updating the nodemap on disk outside of a normal transaction
118 To be used for updating the nodemap on disk outside of a normal transaction
117 setup (eg, `debugupdatecache`).
119 setup (eg, `debugupdatecache`).
118 """
120 """
119 if revlog._inline:
121 if revlog._inline:
120 return # inlined revlog are too small for this to be relevant
122 return # inlined revlog are too small for this to be relevant
121 if revlog.nodemap_file is None:
123 if revlog.nodemap_file is None:
122 return # we do not use persistent_nodemap on this revlog
124 return # we do not use persistent_nodemap on this revlog
123
125
124 notr = _NoTransaction()
126 notr = _NoTransaction()
125 _persist_nodemap(notr, revlog)
127 _persist_nodemap(notr, revlog)
126 for k in sorted(notr._postclose):
128 for k in sorted(notr._postclose):
127 notr._postclose[k](None)
129 notr._postclose[k](None)
128
130
129
131
130 def _persist_nodemap(tr, revlog, pending=False):
132 def _persist_nodemap(tr, revlog, pending=False):
131 """Write nodemap data on disk for a given revlog"""
133 """Write nodemap data on disk for a given revlog"""
132 if getattr(revlog, 'filteredrevs', ()):
134 if getattr(revlog, 'filteredrevs', ()):
133 raise error.ProgrammingError(
135 raise error.ProgrammingError(
134 "cannot persist nodemap of a filtered changelog"
136 "cannot persist nodemap of a filtered changelog"
135 )
137 )
136 if revlog.nodemap_file is None:
138 if revlog.nodemap_file is None:
137 msg = "calling persist nodemap on a revlog without the feature enableb"
139 msg = "calling persist nodemap on a revlog without the feature enableb"
138 raise error.ProgrammingError(msg)
140 raise error.ProgrammingError(msg)
139
141
140 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
141 ondisk_docket = revlog._nodemap_docket
143 ondisk_docket = revlog._nodemap_docket
142 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
143 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
144 mode = revlog.opener.options.get(b"persistent-nodemap.mode")
146 mode = revlog.opener.options.get(b"persistent-nodemap.mode")
145 if not can_incremental:
147 if not can_incremental:
146 msg = _(b"persistent nodemap in strict mode without efficient method")
148 msg = _(b"persistent nodemap in strict mode without efficient method")
147 if mode == b'warn':
149 if mode == b'warn':
148 tr._report(b"%s\n" % msg)
150 tr._report(b"%s\n" % msg)
149 elif mode == b'strict':
151 elif mode == b'strict':
150 raise error.Abort(msg)
152 raise error.Abort(msg)
151
153
152 data = None
154 data = None
153 # first attemp an incremental update of the data
155 # first attemp an incremental update of the data
154 if can_incremental and ondisk_docket is not None:
156 if can_incremental and ondisk_docket is not None:
155 target_docket = revlog._nodemap_docket.copy()
157 target_docket = revlog._nodemap_docket.copy()
156 (
158 (
157 src_docket,
159 src_docket,
158 data_changed_count,
160 data_changed_count,
159 data,
161 data,
160 ) = revlog.index.nodemap_data_incremental()
162 ) = revlog.index.nodemap_data_incremental()
161 new_length = target_docket.data_length + len(data)
163 new_length = target_docket.data_length + len(data)
162 new_unused = target_docket.data_unused + data_changed_count
164 new_unused = target_docket.data_unused + data_changed_count
163 if src_docket != target_docket:
165 if src_docket != target_docket:
164 data = None
166 data = None
165 elif new_length <= (new_unused * 10): # under 10% of unused data
167 elif new_length <= (new_unused * 10): # under 10% of unused data
166 data = None
168 data = None
167 else:
169 else:
168 datafile = _rawdata_filepath(revlog, target_docket)
170 datafile = _rawdata_filepath(revlog, target_docket)
169 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
171 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
170 # store vfs
172 # store vfs
171 tr.add(datafile, target_docket.data_length)
173 tr.add(datafile, target_docket.data_length)
172 with revlog.opener(datafile, b'r+') as fd:
174 with revlog.opener(datafile, b'r+') as fd:
173 fd.seek(target_docket.data_length)
175 fd.seek(target_docket.data_length)
174 fd.write(data)
176 fd.write(data)
175 if feed_data:
177 if feed_data:
176 if use_mmap:
178 if use_mmap:
177 fd.seek(0)
179 fd.seek(0)
178 new_data = fd.read(new_length)
180 new_data = fd.read(new_length)
179 else:
181 else:
180 fd.flush()
182 fd.flush()
181 new_data = util.buffer(util.mmapread(fd, new_length))
183 new_data = util.buffer(util.mmapread(fd, new_length))
182 target_docket.data_length = new_length
184 target_docket.data_length = new_length
183 target_docket.data_unused = new_unused
185 target_docket.data_unused = new_unused
184
186
185 if data is None:
187 if data is None:
186 # otherwise fallback to a full new export
188 # otherwise fallback to a full new export
187 target_docket = NodeMapDocket()
189 target_docket = NodeMapDocket()
188 datafile = _rawdata_filepath(revlog, target_docket)
190 datafile = _rawdata_filepath(revlog, target_docket)
189 if util.safehasattr(revlog.index, "nodemap_data_all"):
191 if util.safehasattr(revlog.index, "nodemap_data_all"):
190 data = revlog.index.nodemap_data_all()
192 data = revlog.index.nodemap_data_all()
191 else:
193 else:
192 data = persistent_data(revlog.index)
194 data = persistent_data(revlog.index)
193 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
195 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
194 # store vfs
196 # store vfs
195
197
196 tryunlink = revlog.opener.tryunlink
198 tryunlink = revlog.opener.tryunlink
197
199
198 def abortck(tr):
200 def abortck(tr):
199 tryunlink(datafile)
201 tryunlink(datafile)
200
202
201 callback_id = b"delete-%s" % datafile
203 callback_id = b"delete-%s" % datafile
202
204
203 # some flavor of the transaction abort does not cleanup new file, it
205 # some flavor of the transaction abort does not cleanup new file, it
204 # simply empty them.
206 # simply empty them.
205 tr.addabort(callback_id, abortck)
207 tr.addabort(callback_id, abortck)
206 with revlog.opener(datafile, b'w+') as fd:
208 with revlog.opener(datafile, b'w+') as fd:
207 fd.write(data)
209 fd.write(data)
208 if feed_data:
210 if feed_data:
209 if use_mmap:
211 if use_mmap:
210 new_data = data
212 new_data = data
211 else:
213 else:
212 fd.flush()
214 fd.flush()
213 new_data = util.buffer(util.mmapread(fd, len(data)))
215 new_data = util.buffer(util.mmapread(fd, len(data)))
214 target_docket.data_length = len(data)
216 target_docket.data_length = len(data)
215 target_docket.tip_rev = revlog.tiprev()
217 target_docket.tip_rev = revlog.tiprev()
216 target_docket.tip_node = revlog.node(target_docket.tip_rev)
218 target_docket.tip_node = revlog.node(target_docket.tip_rev)
217 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
219 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
218 # store vfs
220 # store vfs
219 file_path = revlog.nodemap_file
221 file_path = revlog.nodemap_file
220 if pending:
222 if pending:
221 file_path += b'.a'
223 file_path += b'.a'
222 tr.registertmp(file_path)
224 tr.registertmp(file_path)
223 else:
225 else:
224 tr.addbackup(file_path)
226 tr.addbackup(file_path)
225
227
226 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
228 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
227 fp.write(target_docket.serialize())
229 fp.write(target_docket.serialize())
228 revlog._nodemap_docket = target_docket
230 revlog._nodemap_docket = target_docket
229 if feed_data:
231 if feed_data:
230 revlog.index.update_nodemap_data(target_docket, new_data)
232 revlog.index.update_nodemap_data(target_docket, new_data)
231
233
232 # search for old index file in all cases, some older process might have
234 # search for old index file in all cases, some older process might have
233 # left one behind.
235 # left one behind.
234 olds = _other_rawdata_filepath(revlog, target_docket)
236 olds = _other_rawdata_filepath(revlog, target_docket)
235 if olds:
237 if olds:
236 realvfs = getattr(revlog, '_realopener', revlog.opener)
238 realvfs = getattr(revlog, '_realopener', revlog.opener)
237
239
238 def cleanup(tr):
240 def cleanup(tr):
239 for oldfile in olds:
241 for oldfile in olds:
240 realvfs.tryunlink(oldfile)
242 realvfs.tryunlink(oldfile)
241
243
242 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
244 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
243 tr.addpostclose(callback_id, cleanup)
245 tr.addpostclose(callback_id, cleanup)
244
246
245
247
246 ### Nodemap docket file
248 ### Nodemap docket file
247 #
249 #
248 # The nodemap data are stored on disk using 2 files:
250 # The nodemap data are stored on disk using 2 files:
249 #
251 #
250 # * a raw data files containing a persistent nodemap
252 # * a raw data files containing a persistent nodemap
251 # (see `Nodemap Trie` section)
253 # (see `Nodemap Trie` section)
252 #
254 #
253 # * a small "docket" file containing medatadata
255 # * a small "docket" file containing medatadata
254 #
256 #
255 # While the nodemap data can be multiple tens of megabytes, the "docket" is
257 # While the nodemap data can be multiple tens of megabytes, the "docket" is
256 # small, it is easy to update it automatically or to duplicated its content
258 # small, it is easy to update it automatically or to duplicated its content
257 # during a transaction.
259 # during a transaction.
258 #
260 #
259 # Multiple raw data can exist at the same time (The currently valid one and a
261 # Multiple raw data can exist at the same time (The currently valid one and a
260 # new one beind used by an in progress transaction). To accomodate this, the
262 # new one beind used by an in progress transaction). To accomodate this, the
261 # filename hosting the raw data has a variable parts. The exact filename is
263 # filename hosting the raw data has a variable parts. The exact filename is
262 # specified inside the "docket" file.
264 # specified inside the "docket" file.
263 #
265 #
264 # The docket file contains information to find, qualify and validate the raw
266 # The docket file contains information to find, qualify and validate the raw
265 # data. Its content is currently very light, but it will expand as the on disk
267 # data. Its content is currently very light, but it will expand as the on disk
266 # nodemap gains the necessary features to be used in production.
268 # nodemap gains the necessary features to be used in production.
267
269
268 ONDISK_VERSION = 1
270 ONDISK_VERSION = 1
269 S_VERSION = struct.Struct(">B")
271 S_VERSION = struct.Struct(">B")
270 S_HEADER = struct.Struct(">BQQQQ")
272 S_HEADER = struct.Struct(">BQQQQ")
271
273
272 ID_SIZE = 8
274 ID_SIZE = 8
273
275
274
276
275 def _make_uid():
277 def _make_uid():
276 """return a new unique identifier.
278 """return a new unique identifier.
277
279
278 The identifier is random and composed of ascii characters."""
280 The identifier is random and composed of ascii characters."""
279 return nodemod.hex(os.urandom(ID_SIZE))
281 return nodemod.hex(os.urandom(ID_SIZE))
280
282
281
283
282 class NodeMapDocket(object):
284 class NodeMapDocket(object):
283 """metadata associated with persistent nodemap data
285 """metadata associated with persistent nodemap data
284
286
285 The persistent data may come from disk or be on their way to disk.
287 The persistent data may come from disk or be on their way to disk.
286 """
288 """
287
289
288 def __init__(self, uid=None):
290 def __init__(self, uid=None):
289 if uid is None:
291 if uid is None:
290 uid = _make_uid()
292 uid = _make_uid()
291 # a unique identifier for the data file:
293 # a unique identifier for the data file:
292 # - When new data are appended, it is preserved.
294 # - When new data are appended, it is preserved.
293 # - When a new data file is created, a new identifier is generated.
295 # - When a new data file is created, a new identifier is generated.
294 self.uid = uid
296 self.uid = uid
295 # the tipmost revision stored in the data file. This revision and all
297 # the tipmost revision stored in the data file. This revision and all
296 # revision before it are expected to be encoded in the data file.
298 # revision before it are expected to be encoded in the data file.
297 self.tip_rev = None
299 self.tip_rev = None
298 # the node of that tipmost revision, if it mismatch the current index
300 # the node of that tipmost revision, if it mismatch the current index
299 # data the docket is not valid for the current index and should be
301 # data the docket is not valid for the current index and should be
300 # discarded.
302 # discarded.
301 #
303 #
302 # note: this method is not perfect as some destructive operation could
304 # note: this method is not perfect as some destructive operation could
303 # preserve the same tip_rev + tip_node while altering lower revision.
305 # preserve the same tip_rev + tip_node while altering lower revision.
304 # However this multiple other caches have the same vulnerability (eg:
306 # However this multiple other caches have the same vulnerability (eg:
305 # brancmap cache).
307 # brancmap cache).
306 self.tip_node = None
308 self.tip_node = None
307 # the size (in bytes) of the persisted data to encode the nodemap valid
309 # the size (in bytes) of the persisted data to encode the nodemap valid
308 # for `tip_rev`.
310 # for `tip_rev`.
309 # - data file shorter than this are corrupted,
311 # - data file shorter than this are corrupted,
310 # - any extra data should be ignored.
312 # - any extra data should be ignored.
311 self.data_length = None
313 self.data_length = None
312 # the amount (in bytes) of "dead" data, still in the data file but no
314 # the amount (in bytes) of "dead" data, still in the data file but no
313 # longer used for the nodemap.
315 # longer used for the nodemap.
314 self.data_unused = 0
316 self.data_unused = 0
315
317
316 def copy(self):
318 def copy(self):
317 new = NodeMapDocket(uid=self.uid)
319 new = NodeMapDocket(uid=self.uid)
318 new.tip_rev = self.tip_rev
320 new.tip_rev = self.tip_rev
319 new.tip_node = self.tip_node
321 new.tip_node = self.tip_node
320 new.data_length = self.data_length
322 new.data_length = self.data_length
321 new.data_unused = self.data_unused
323 new.data_unused = self.data_unused
322 return new
324 return new
323
325
324 def __cmp__(self, other):
326 def __cmp__(self, other):
325 if self.uid < other.uid:
327 if self.uid < other.uid:
326 return -1
328 return -1
327 if self.uid > other.uid:
329 if self.uid > other.uid:
328 return 1
330 return 1
329 elif self.data_length < other.data_length:
331 elif self.data_length < other.data_length:
330 return -1
332 return -1
331 elif self.data_length > other.data_length:
333 elif self.data_length > other.data_length:
332 return 1
334 return 1
333 return 0
335 return 0
334
336
335 def __eq__(self, other):
337 def __eq__(self, other):
336 return self.uid == other.uid and self.data_length == other.data_length
338 return self.uid == other.uid and self.data_length == other.data_length
337
339
338 def serialize(self):
340 def serialize(self):
339 """return serialized bytes for a docket using the passed uid"""
341 """return serialized bytes for a docket using the passed uid"""
340 data = []
342 data = []
341 data.append(S_VERSION.pack(ONDISK_VERSION))
343 data.append(S_VERSION.pack(ONDISK_VERSION))
342 headers = (
344 headers = (
343 len(self.uid),
345 len(self.uid),
344 self.tip_rev,
346 self.tip_rev,
345 self.data_length,
347 self.data_length,
346 self.data_unused,
348 self.data_unused,
347 len(self.tip_node),
349 len(self.tip_node),
348 )
350 )
349 data.append(S_HEADER.pack(*headers))
351 data.append(S_HEADER.pack(*headers))
350 data.append(self.uid)
352 data.append(self.uid)
351 data.append(self.tip_node)
353 data.append(self.tip_node)
352 return b''.join(data)
354 return b''.join(data)
353
355
354
356
355 def _rawdata_filepath(revlog, docket):
357 def _rawdata_filepath(revlog, docket):
356 """The (vfs relative) nodemap's rawdata file for a given uid"""
358 """The (vfs relative) nodemap's rawdata file for a given uid"""
357 if revlog.nodemap_file.endswith(b'.n.a'):
359 if revlog.nodemap_file.endswith(b'.n.a'):
358 prefix = revlog.nodemap_file[:-4]
360 prefix = revlog.nodemap_file[:-4]
359 else:
361 else:
360 prefix = revlog.nodemap_file[:-2]
362 prefix = revlog.nodemap_file[:-2]
361 return b"%s-%s.nd" % (prefix, docket.uid)
363 return b"%s-%s.nd" % (prefix, docket.uid)
362
364
363
365
364 def _other_rawdata_filepath(revlog, docket):
366 def _other_rawdata_filepath(revlog, docket):
365 prefix = revlog.nodemap_file[:-2]
367 prefix = revlog.nodemap_file[:-2]
366 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
368 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
367 new_file_path = _rawdata_filepath(revlog, docket)
369 new_file_path = _rawdata_filepath(revlog, docket)
368 new_file_name = revlog.opener.basename(new_file_path)
370 new_file_name = revlog.opener.basename(new_file_path)
369 dirpath = revlog.opener.dirname(new_file_path)
371 dirpath = revlog.opener.dirname(new_file_path)
370 others = []
372 others = []
371 for f in revlog.opener.listdir(dirpath):
373 for f in revlog.opener.listdir(dirpath):
372 if pattern.match(f) and f != new_file_name:
374 if pattern.match(f) and f != new_file_name:
373 others.append(f)
375 others.append(f)
374 return others
376 return others
375
377
376
378
377 ### Nodemap Trie
379 ### Nodemap Trie
378 #
380 #
379 # This is a simple reference implementation to compute and persist a nodemap
381 # This is a simple reference implementation to compute and persist a nodemap
380 # trie. This reference implementation is write only. The python version of this
382 # trie. This reference implementation is write only. The python version of this
381 # is not expected to be actually used, since it wont provide performance
383 # is not expected to be actually used, since it wont provide performance
382 # improvement over existing non-persistent C implementation.
384 # improvement over existing non-persistent C implementation.
383 #
385 #
384 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
386 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
385 # revision can be adressed using its node shortest prefix.
387 # revision can be adressed using its node shortest prefix.
386 #
388 #
387 # The trie is stored as a sequence of block. Each block contains 16 entries
389 # The trie is stored as a sequence of block. Each block contains 16 entries
388 # (signed 64bit integer, big endian). Each entry can be one of the following:
390 # (signed 64bit integer, big endian). Each entry can be one of the following:
389 #
391 #
390 # * value >= 0 -> index of sub-block
392 # * value >= 0 -> index of sub-block
391 # * value == -1 -> no value
393 # * value == -1 -> no value
392 # * value < -1 -> a revision value: rev = -(value+10)
394 # * value < -1 -> a revision value: rev = -(value+10)
393 #
395 #
394 # The implementation focus on simplicity, not on performance. A Rust
396 # The implementation focus on simplicity, not on performance. A Rust
395 # implementation should provide a efficient version of the same binary
397 # implementation should provide a efficient version of the same binary
396 # persistence. This reference python implementation is never meant to be
398 # persistence. This reference python implementation is never meant to be
397 # extensively use in production.
399 # extensively use in production.
398
400
399
401
400 def persistent_data(index):
402 def persistent_data(index):
401 """return the persistent binary form for a nodemap for a given index"""
403 """return the persistent binary form for a nodemap for a given index"""
402 trie = _build_trie(index)
404 trie = _build_trie(index)
403 return _persist_trie(trie)
405 return _persist_trie(trie)
404
406
405
407
406 def update_persistent_data(index, root, max_idx, last_rev):
408 def update_persistent_data(index, root, max_idx, last_rev):
407 """return the incremental update for persistent nodemap from a given index"""
409 """return the incremental update for persistent nodemap from a given index"""
408 changed_block, trie = _update_trie(index, root, last_rev)
410 changed_block, trie = _update_trie(index, root, last_rev)
409 return (
411 return (
410 changed_block * S_BLOCK.size,
412 changed_block * S_BLOCK.size,
411 _persist_trie(trie, existing_idx=max_idx),
413 _persist_trie(trie, existing_idx=max_idx),
412 )
414 )
413
415
414
416
415 S_BLOCK = struct.Struct(">" + ("l" * 16))
417 S_BLOCK = struct.Struct(">" + ("l" * 16))
416
418
417 NO_ENTRY = -1
419 NO_ENTRY = -1
418 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
420 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
419 REV_OFFSET = 2
421 REV_OFFSET = 2
420
422
421
423
422 def _transform_rev(rev):
424 def _transform_rev(rev):
423 """Return the number used to represent the rev in the tree.
425 """Return the number used to represent the rev in the tree.
424
426
425 (or retrieve a rev number from such representation)
427 (or retrieve a rev number from such representation)
426
428
427 Note that this is an involution, a function equal to its inverse (i.e.
429 Note that this is an involution, a function equal to its inverse (i.e.
428 which gives the identity when applied to itself).
430 which gives the identity when applied to itself).
429 """
431 """
430 return -(rev + REV_OFFSET)
432 return -(rev + REV_OFFSET)
431
433
432
434
433 def _to_int(hex_digit):
435 def _to_int(hex_digit):
434 """turn an hexadecimal digit into a proper integer"""
436 """turn an hexadecimal digit into a proper integer"""
435 return int(hex_digit, 16)
437 return int(hex_digit, 16)
436
438
437
439
438 class Block(dict):
440 class Block(dict):
439 """represent a block of the Trie
441 """represent a block of the Trie
440
442
441 contains up to 16 entry indexed from 0 to 15"""
443 contains up to 16 entry indexed from 0 to 15"""
442
444
443 def __init__(self):
445 def __init__(self):
444 super(Block, self).__init__()
446 super(Block, self).__init__()
445 # If this block exist on disk, here is its ID
447 # If this block exist on disk, here is its ID
446 self.ondisk_id = None
448 self.ondisk_id = None
447
449
448 def __iter__(self):
450 def __iter__(self):
449 return iter(self.get(i) for i in range(16))
451 return iter(self.get(i) for i in range(16))
450
452
451
453
452 def _build_trie(index):
454 def _build_trie(index):
453 """build a nodemap trie
455 """build a nodemap trie
454
456
455 The nodemap stores revision number for each unique prefix.
457 The nodemap stores revision number for each unique prefix.
456
458
457 Each block is a dictionary with keys in `[0, 15]`. Values are either
459 Each block is a dictionary with keys in `[0, 15]`. Values are either
458 another block or a revision number.
460 another block or a revision number.
459 """
461 """
460 root = Block()
462 root = Block()
461 for rev in range(len(index)):
463 for rev in range(len(index)):
462 hex = nodemod.hex(index[rev][7])
464 hex = nodemod.hex(index[rev][7])
463 _insert_into_block(index, 0, root, rev, hex)
465 _insert_into_block(index, 0, root, rev, hex)
464 return root
466 return root
465
467
466
468
467 def _update_trie(index, root, last_rev):
469 def _update_trie(index, root, last_rev):
468 """consume"""
470 """consume"""
469 changed = 0
471 changed = 0
470 for rev in range(last_rev + 1, len(index)):
472 for rev in range(last_rev + 1, len(index)):
471 hex = nodemod.hex(index[rev][7])
473 hex = nodemod.hex(index[rev][7])
472 changed += _insert_into_block(index, 0, root, rev, hex)
474 changed += _insert_into_block(index, 0, root, rev, hex)
473 return changed, root
475 return changed, root
474
476
475
477
476 def _insert_into_block(index, level, block, current_rev, current_hex):
478 def _insert_into_block(index, level, block, current_rev, current_hex):
477 """insert a new revision in a block
479 """insert a new revision in a block
478
480
479 index: the index we are adding revision for
481 index: the index we are adding revision for
480 level: the depth of the current block in the trie
482 level: the depth of the current block in the trie
481 block: the block currently being considered
483 block: the block currently being considered
482 current_rev: the revision number we are adding
484 current_rev: the revision number we are adding
483 current_hex: the hexadecimal representation of the of that revision
485 current_hex: the hexadecimal representation of the of that revision
484 """
486 """
485 changed = 1
487 changed = 1
486 if block.ondisk_id is not None:
488 if block.ondisk_id is not None:
487 block.ondisk_id = None
489 block.ondisk_id = None
488 hex_digit = _to_int(current_hex[level : level + 1])
490 hex_digit = _to_int(current_hex[level : level + 1])
489 entry = block.get(hex_digit)
491 entry = block.get(hex_digit)
490 if entry is None:
492 if entry is None:
491 # no entry, simply store the revision number
493 # no entry, simply store the revision number
492 block[hex_digit] = current_rev
494 block[hex_digit] = current_rev
493 elif isinstance(entry, dict):
495 elif isinstance(entry, dict):
494 # need to recurse to an underlying block
496 # need to recurse to an underlying block
495 changed += _insert_into_block(
497 changed += _insert_into_block(
496 index, level + 1, entry, current_rev, current_hex
498 index, level + 1, entry, current_rev, current_hex
497 )
499 )
498 else:
500 else:
499 # collision with a previously unique prefix, inserting new
501 # collision with a previously unique prefix, inserting new
500 # vertices to fit both entry.
502 # vertices to fit both entry.
501 other_hex = nodemod.hex(index[entry][7])
503 other_hex = nodemod.hex(index[entry][7])
502 other_rev = entry
504 other_rev = entry
503 new = Block()
505 new = Block()
504 block[hex_digit] = new
506 block[hex_digit] = new
505 _insert_into_block(index, level + 1, new, other_rev, other_hex)
507 _insert_into_block(index, level + 1, new, other_rev, other_hex)
506 _insert_into_block(index, level + 1, new, current_rev, current_hex)
508 _insert_into_block(index, level + 1, new, current_rev, current_hex)
507 return changed
509 return changed
508
510
509
511
510 def _persist_trie(root, existing_idx=None):
512 def _persist_trie(root, existing_idx=None):
511 """turn a nodemap trie into persistent binary data
513 """turn a nodemap trie into persistent binary data
512
514
513 See `_build_trie` for nodemap trie structure"""
515 See `_build_trie` for nodemap trie structure"""
514 block_map = {}
516 block_map = {}
515 if existing_idx is not None:
517 if existing_idx is not None:
516 base_idx = existing_idx + 1
518 base_idx = existing_idx + 1
517 else:
519 else:
518 base_idx = 0
520 base_idx = 0
519 chunks = []
521 chunks = []
520 for tn in _walk_trie(root):
522 for tn in _walk_trie(root):
521 if tn.ondisk_id is not None:
523 if tn.ondisk_id is not None:
522 block_map[id(tn)] = tn.ondisk_id
524 block_map[id(tn)] = tn.ondisk_id
523 else:
525 else:
524 block_map[id(tn)] = len(chunks) + base_idx
526 block_map[id(tn)] = len(chunks) + base_idx
525 chunks.append(_persist_block(tn, block_map))
527 chunks.append(_persist_block(tn, block_map))
526 return b''.join(chunks)
528 return b''.join(chunks)
527
529
528
530
529 def _walk_trie(block):
531 def _walk_trie(block):
530 """yield all the block in a trie
532 """yield all the block in a trie
531
533
532 Children blocks are always yield before their parent block.
534 Children blocks are always yield before their parent block.
533 """
535 """
534 for (__, item) in sorted(block.items()):
536 for (__, item) in sorted(block.items()):
535 if isinstance(item, dict):
537 if isinstance(item, dict):
536 for sub_block in _walk_trie(item):
538 for sub_block in _walk_trie(item):
537 yield sub_block
539 yield sub_block
538 yield block
540 yield block
539
541
540
542
541 def _persist_block(block_node, block_map):
543 def _persist_block(block_node, block_map):
542 """produce persistent binary data for a single block
544 """produce persistent binary data for a single block
543
545
544 Children block are assumed to be already persisted and present in
546 Children block are assumed to be already persisted and present in
545 block_map.
547 block_map.
546 """
548 """
547 data = tuple(_to_value(v, block_map) for v in block_node)
549 data = tuple(_to_value(v, block_map) for v in block_node)
548 return S_BLOCK.pack(*data)
550 return S_BLOCK.pack(*data)
549
551
550
552
551 def _to_value(item, block_map):
553 def _to_value(item, block_map):
552 """persist any value as an integer"""
554 """persist any value as an integer"""
553 if item is None:
555 if item is None:
554 return NO_ENTRY
556 return NO_ENTRY
555 elif isinstance(item, dict):
557 elif isinstance(item, dict):
556 return block_map[id(item)]
558 return block_map[id(item)]
557 else:
559 else:
558 return _transform_rev(item)
560 return _transform_rev(item)
559
561
560
562
561 def parse_data(data):
563 def parse_data(data):
562 """parse parse nodemap data into a nodemap Trie"""
564 """parse parse nodemap data into a nodemap Trie"""
563 if (len(data) % S_BLOCK.size) != 0:
565 if (len(data) % S_BLOCK.size) != 0:
564 msg = "nodemap data size is not a multiple of block size (%d): %d"
566 msg = "nodemap data size is not a multiple of block size (%d): %d"
565 raise error.Abort(msg % (S_BLOCK.size, len(data)))
567 raise error.Abort(msg % (S_BLOCK.size, len(data)))
566 if not data:
568 if not data:
567 return Block(), None
569 return Block(), None
568 block_map = {}
570 block_map = {}
569 new_blocks = []
571 new_blocks = []
570 for i in range(0, len(data), S_BLOCK.size):
572 for i in range(0, len(data), S_BLOCK.size):
571 block = Block()
573 block = Block()
572 block.ondisk_id = len(block_map)
574 block.ondisk_id = len(block_map)
573 block_map[block.ondisk_id] = block
575 block_map[block.ondisk_id] = block
574 block_data = data[i : i + S_BLOCK.size]
576 block_data = data[i : i + S_BLOCK.size]
575 values = S_BLOCK.unpack(block_data)
577 values = S_BLOCK.unpack(block_data)
576 new_blocks.append((block, values))
578 new_blocks.append((block, values))
577 for b, values in new_blocks:
579 for b, values in new_blocks:
578 for idx, v in enumerate(values):
580 for idx, v in enumerate(values):
579 if v == NO_ENTRY:
581 if v == NO_ENTRY:
580 continue
582 continue
581 elif v >= 0:
583 elif v >= 0:
582 b[idx] = block_map[v]
584 b[idx] = block_map[v]
583 else:
585 else:
584 b[idx] = _transform_rev(v)
586 b[idx] = _transform_rev(v)
585 return block, i // S_BLOCK.size
587 return block, i // S_BLOCK.size
586
588
587
589
588 # debug utility
590 # debug utility
589
591
590
592
591 def check_data(ui, index, data):
593 def check_data(ui, index, data):
592 """verify that the provided nodemap data are valid for the given idex"""
594 """verify that the provided nodemap data are valid for the given idex"""
593 ret = 0
595 ret = 0
594 ui.status((b"revision in index: %d\n") % len(index))
596 ui.status((b"revision in index: %d\n") % len(index))
595 root, __ = parse_data(data)
597 root, __ = parse_data(data)
596 all_revs = set(_all_revisions(root))
598 all_revs = set(_all_revisions(root))
597 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
599 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
598 for r in range(len(index)):
600 for r in range(len(index)):
599 if r not in all_revs:
601 if r not in all_revs:
600 msg = b" revision missing from nodemap: %d\n" % r
602 msg = b" revision missing from nodemap: %d\n" % r
601 ui.write_err(msg)
603 ui.write_err(msg)
602 ret = 1
604 ret = 1
603 else:
605 else:
604 all_revs.remove(r)
606 all_revs.remove(r)
605 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
607 nm_rev = _find_node(root, nodemod.hex(index[r][7]))
606 if nm_rev is None:
608 if nm_rev is None:
607 msg = b" revision node does not match any entries: %d\n" % r
609 msg = b" revision node does not match any entries: %d\n" % r
608 ui.write_err(msg)
610 ui.write_err(msg)
609 ret = 1
611 ret = 1
610 elif nm_rev != r:
612 elif nm_rev != r:
611 msg = (
613 msg = (
612 b" revision node does not match the expected revision: "
614 b" revision node does not match the expected revision: "
613 b"%d != %d\n" % (r, nm_rev)
615 b"%d != %d\n" % (r, nm_rev)
614 )
616 )
615 ui.write_err(msg)
617 ui.write_err(msg)
616 ret = 1
618 ret = 1
617
619
618 if all_revs:
620 if all_revs:
619 for r in sorted(all_revs):
621 for r in sorted(all_revs):
620 msg = b" extra revision in nodemap: %d\n" % r
622 msg = b" extra revision in nodemap: %d\n" % r
621 ui.write_err(msg)
623 ui.write_err(msg)
622 ret = 1
624 ret = 1
623 return ret
625 return ret
624
626
625
627
626 def _all_revisions(root):
628 def _all_revisions(root):
627 """return all revisions stored in a Trie"""
629 """return all revisions stored in a Trie"""
628 for block in _walk_trie(root):
630 for block in _walk_trie(root):
629 for v in block:
631 for v in block:
630 if v is None or isinstance(v, Block):
632 if v is None or isinstance(v, Block):
631 continue
633 continue
632 yield v
634 yield v
633
635
634
636
635 def _find_node(block, node):
637 def _find_node(block, node):
636 """find the revision associated with a given node"""
638 """find the revision associated with a given node"""
637 entry = block.get(_to_int(node[0:1]))
639 entry = block.get(_to_int(node[0:1]))
638 if isinstance(entry, dict):
640 if isinstance(entry, dict):
639 return _find_node(entry, node[1:])
641 return _find_node(entry, node[1:])
640 return entry
642 return entry
General Comments 0
You need to be logged in to leave comments. Login now