##// END OF EJS Templates
mmap: only use mmap to read revlog persistent nodemap if it is safe...
marmoute -
r52549:21442f87 stable
parent child Browse files
Show More
@@ -1,670 +1,670
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 import re
10 import re
11 import struct
11 import struct
12
12
13 from ..node import hex
13 from ..node import hex
14
14
15 from .. import (
15 from .. import (
16 error,
16 error,
17 requirements,
17 requirements,
18 util,
18 util,
19 )
19 )
20 from . import docket as docket_mod
20 from . import docket as docket_mod
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def test_race_hook_1():
28 def test_race_hook_1():
29 """hook point for test
29 """hook point for test
30
30
31 This let tests to have things happens between the docket reading and the
31 This let tests to have things happens between the docket reading and the
32 data reading"""
32 data reading"""
33 pass
33 pass
34
34
35
35
36 def post_stream_cleanup(repo):
36 def post_stream_cleanup(repo):
37 """The stream clone might needs to remove some file if persisten nodemap
37 """The stream clone might needs to remove some file if persisten nodemap
38 was dropped while stream cloning
38 was dropped while stream cloning
39 """
39 """
40 if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
40 if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
41 return
41 return
42 if requirements.NODEMAP_REQUIREMENT in repo.requirements:
42 if requirements.NODEMAP_REQUIREMENT in repo.requirements:
43 return
43 return
44 unfi = repo.unfiltered()
44 unfi = repo.unfiltered()
45 delete_nodemap(None, unfi, unfi.changelog)
45 delete_nodemap(None, unfi, unfi.changelog)
46 delete_nodemap(None, repo, unfi.manifestlog._rootstore._revlog)
46 delete_nodemap(None, repo, unfi.manifestlog._rootstore._revlog)
47
47
48
48
49 def persisted_data(revlog):
49 def persisted_data(revlog):
50 """read the nodemap for a revlog from disk"""
50 """read the nodemap for a revlog from disk"""
51 if revlog._nodemap_file is None:
51 if revlog._nodemap_file is None:
52 return None
52 return None
53 pdata = revlog.opener.tryread(revlog._nodemap_file)
53 pdata = revlog.opener.tryread(revlog._nodemap_file)
54 if not pdata:
54 if not pdata:
55 return None
55 return None
56 offset = 0
56 offset = 0
57 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
57 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
58 if version != ONDISK_VERSION:
58 if version != ONDISK_VERSION:
59 return None
59 return None
60 offset += S_VERSION.size
60 offset += S_VERSION.size
61 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
61 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
62 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
62 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
63 offset += S_HEADER.size
63 offset += S_HEADER.size
64 docket = NodeMapDocket(pdata[offset : offset + uid_size])
64 docket = NodeMapDocket(pdata[offset : offset + uid_size])
65 offset += uid_size
65 offset += uid_size
66 docket.tip_rev = tip_rev
66 docket.tip_rev = tip_rev
67 docket.tip_node = pdata[offset : offset + tip_node_size]
67 docket.tip_node = pdata[offset : offset + tip_node_size]
68 docket.data_length = data_length
68 docket.data_length = data_length
69 docket.data_unused = data_unused
69 docket.data_unused = data_unused
70
70
71 filename = _rawdata_filepath(revlog, docket)
71 filename = _rawdata_filepath(revlog, docket)
72 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
72 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
73
73
74 test_race_hook_1()
74 test_race_hook_1()
75 try:
75 try:
76 with revlog.opener(filename) as fd:
76 with revlog.opener(filename) as fd:
77 if use_mmap:
77 if use_mmap and revlog.opener.is_mmap_safe(filename):
78 try:
78 try:
79 data = util.buffer(util.mmapread(fd, data_length))
79 data = util.buffer(util.mmapread(fd, data_length))
80 except ValueError:
80 except ValueError:
81 # raised when the read file is too small
81 # raised when the read file is too small
82 data = b''
82 data = b''
83 else:
83 else:
84 data = fd.read(data_length)
84 data = fd.read(data_length)
85 except FileNotFoundError:
85 except FileNotFoundError:
86 return None
86 return None
87 if len(data) < data_length:
87 if len(data) < data_length:
88 return None
88 return None
89 return docket, data
89 return docket, data
90
90
91
91
92 def setup_persistent_nodemap(tr, revlog):
92 def setup_persistent_nodemap(tr, revlog):
93 """Install whatever is needed transaction side to persist a nodemap on disk
93 """Install whatever is needed transaction side to persist a nodemap on disk
94
94
95 (only actually persist the nodemap if this is relevant for this revlog)
95 (only actually persist the nodemap if this is relevant for this revlog)
96 """
96 """
97 if revlog._inline:
97 if revlog._inline:
98 return # inlined revlog are too small for this to be relevant
98 return # inlined revlog are too small for this to be relevant
99 if revlog._nodemap_file is None:
99 if revlog._nodemap_file is None:
100 return # we do not use persistent_nodemap on this revlog
100 return # we do not use persistent_nodemap on this revlog
101
101
102 # we need to happen after the changelog finalization, in that use "cl-"
102 # we need to happen after the changelog finalization, in that use "cl-"
103 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
103 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
104 if tr.hasfinalize(callback_id):
104 if tr.hasfinalize(callback_id):
105 return # no need to register again
105 return # no need to register again
106 tr.addpending(
106 tr.addpending(
107 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
107 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
108 )
108 )
109 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
109 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
110
110
111
111
112 class _NoTransaction:
112 class _NoTransaction:
113 """transaction like object to update the nodemap outside a transaction"""
113 """transaction like object to update the nodemap outside a transaction"""
114
114
115 def __init__(self):
115 def __init__(self):
116 self._postclose = {}
116 self._postclose = {}
117
117
118 def addpostclose(self, callback_id, callback_func):
118 def addpostclose(self, callback_id, callback_func):
119 self._postclose[callback_id] = callback_func
119 self._postclose[callback_id] = callback_func
120
120
121 def registertmp(self, *args, **kwargs):
121 def registertmp(self, *args, **kwargs):
122 pass
122 pass
123
123
124 def addbackup(self, *args, **kwargs):
124 def addbackup(self, *args, **kwargs):
125 pass
125 pass
126
126
127 def add(self, *args, **kwargs):
127 def add(self, *args, **kwargs):
128 pass
128 pass
129
129
130 def addabort(self, *args, **kwargs):
130 def addabort(self, *args, **kwargs):
131 pass
131 pass
132
132
133 def _report(self, *args):
133 def _report(self, *args):
134 pass
134 pass
135
135
136
136
137 def update_persistent_nodemap(revlog):
137 def update_persistent_nodemap(revlog):
138 """update the persistent nodemap right now
138 """update the persistent nodemap right now
139
139
140 To be used for updating the nodemap on disk outside of a normal transaction
140 To be used for updating the nodemap on disk outside of a normal transaction
141 setup (eg, `debugupdatecache`).
141 setup (eg, `debugupdatecache`).
142 """
142 """
143 if revlog._inline:
143 if revlog._inline:
144 return # inlined revlog are too small for this to be relevant
144 return # inlined revlog are too small for this to be relevant
145 if revlog._nodemap_file is None:
145 if revlog._nodemap_file is None:
146 return # we do not use persistent_nodemap on this revlog
146 return # we do not use persistent_nodemap on this revlog
147
147
148 notr = _NoTransaction()
148 notr = _NoTransaction()
149 persist_nodemap(notr, revlog)
149 persist_nodemap(notr, revlog)
150 for k in sorted(notr._postclose):
150 for k in sorted(notr._postclose):
151 notr._postclose[k](None)
151 notr._postclose[k](None)
152
152
153
153
154 def delete_nodemap(tr, repo, revlog):
154 def delete_nodemap(tr, repo, revlog):
155 """Delete nodemap data on disk for a given revlog"""
155 """Delete nodemap data on disk for a given revlog"""
156 prefix = revlog.radix
156 prefix = revlog.radix
157 pattern = re.compile(br"(^|/)%s(-[0-9a-f]+\.nd|\.n(\.a)?)$" % prefix)
157 pattern = re.compile(br"(^|/)%s(-[0-9a-f]+\.nd|\.n(\.a)?)$" % prefix)
158 dirpath = revlog.opener.dirname(revlog._indexfile)
158 dirpath = revlog.opener.dirname(revlog._indexfile)
159 for f in revlog.opener.listdir(dirpath):
159 for f in revlog.opener.listdir(dirpath):
160 if pattern.match(f):
160 if pattern.match(f):
161 repo.svfs.tryunlink(f)
161 repo.svfs.tryunlink(f)
162
162
163
163
164 def persist_nodemap(tr, revlog, pending=False, force=False):
164 def persist_nodemap(tr, revlog, pending=False, force=False):
165 """Write nodemap data on disk for a given revlog"""
165 """Write nodemap data on disk for a given revlog"""
166 if len(revlog.index) <= 0:
166 if len(revlog.index) <= 0:
167 return
167 return
168 if getattr(revlog, 'filteredrevs', ()):
168 if getattr(revlog, 'filteredrevs', ()):
169 raise error.ProgrammingError(
169 raise error.ProgrammingError(
170 "cannot persist nodemap of a filtered changelog"
170 "cannot persist nodemap of a filtered changelog"
171 )
171 )
172 if revlog._nodemap_file is None:
172 if revlog._nodemap_file is None:
173 if force:
173 if force:
174 revlog._nodemap_file = get_nodemap_file(revlog)
174 revlog._nodemap_file = get_nodemap_file(revlog)
175 else:
175 else:
176 msg = "calling persist nodemap on a revlog without the feature enabled"
176 msg = "calling persist nodemap on a revlog without the feature enabled"
177 raise error.ProgrammingError(msg)
177 raise error.ProgrammingError(msg)
178
178
179 can_incremental = hasattr(revlog.index, "nodemap_data_incremental")
179 can_incremental = hasattr(revlog.index, "nodemap_data_incremental")
180 ondisk_docket = revlog._nodemap_docket
180 ondisk_docket = revlog._nodemap_docket
181 feed_data = hasattr(revlog.index, "update_nodemap_data")
181 feed_data = hasattr(revlog.index, "update_nodemap_data")
182 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
182 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
183
183
184 data = None
184 data = None
185 # first attemp an incremental update of the data
185 # first attemp an incremental update of the data
186 if can_incremental and ondisk_docket is not None:
186 if can_incremental and ondisk_docket is not None:
187 target_docket = revlog._nodemap_docket.copy()
187 target_docket = revlog._nodemap_docket.copy()
188 (
188 (
189 src_docket,
189 src_docket,
190 data_changed_count,
190 data_changed_count,
191 data,
191 data,
192 ) = revlog.index.nodemap_data_incremental()
192 ) = revlog.index.nodemap_data_incremental()
193 new_length = target_docket.data_length + len(data)
193 new_length = target_docket.data_length + len(data)
194 new_unused = target_docket.data_unused + data_changed_count
194 new_unused = target_docket.data_unused + data_changed_count
195 if src_docket != target_docket:
195 if src_docket != target_docket:
196 data = None
196 data = None
197 elif new_length <= (new_unused * 10): # under 10% of unused data
197 elif new_length <= (new_unused * 10): # under 10% of unused data
198 data = None
198 data = None
199 else:
199 else:
200 datafile = _rawdata_filepath(revlog, target_docket)
200 datafile = _rawdata_filepath(revlog, target_docket)
201 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
201 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
202 # store vfs
202 # store vfs
203 tr.add(datafile, target_docket.data_length)
203 tr.add(datafile, target_docket.data_length)
204 with revlog.opener(datafile, b'r+') as fd:
204 with revlog.opener(datafile, b'r+') as fd:
205 fd.seek(target_docket.data_length)
205 fd.seek(target_docket.data_length)
206 fd.write(data)
206 fd.write(data)
207 if feed_data:
207 if feed_data:
208 if use_mmap:
208 if use_mmap and revlog.opener.is_mmap_safe(datafile):
209 fd.flush()
209 fd.flush()
210 new_data = util.buffer(util.mmapread(fd, new_length))
210 new_data = util.buffer(util.mmapread(fd, new_length))
211 else:
211 else:
212 fd.seek(0)
212 fd.seek(0)
213 new_data = fd.read(new_length)
213 new_data = fd.read(new_length)
214 target_docket.data_length = new_length
214 target_docket.data_length = new_length
215 target_docket.data_unused = new_unused
215 target_docket.data_unused = new_unused
216
216
217 if data is None:
217 if data is None:
218 # otherwise fallback to a full new export
218 # otherwise fallback to a full new export
219 target_docket = NodeMapDocket()
219 target_docket = NodeMapDocket()
220 datafile = _rawdata_filepath(revlog, target_docket)
220 datafile = _rawdata_filepath(revlog, target_docket)
221 if hasattr(revlog.index, "nodemap_data_all"):
221 if hasattr(revlog.index, "nodemap_data_all"):
222 data = revlog.index.nodemap_data_all()
222 data = revlog.index.nodemap_data_all()
223 else:
223 else:
224 data = persistent_data(revlog.index)
224 data = persistent_data(revlog.index)
225 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
225 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
226 # store vfs
226 # store vfs
227
227
228 tryunlink = revlog.opener.tryunlink
228 tryunlink = revlog.opener.tryunlink
229
229
230 def abortck(tr):
230 def abortck(tr):
231 tryunlink(datafile)
231 tryunlink(datafile)
232
232
233 callback_id = b"delete-%s" % datafile
233 callback_id = b"delete-%s" % datafile
234
234
235 # some flavor of the transaction abort does not cleanup new file, it
235 # some flavor of the transaction abort does not cleanup new file, it
236 # simply empty them.
236 # simply empty them.
237 tr.addabort(callback_id, abortck)
237 tr.addabort(callback_id, abortck)
238 with revlog.opener(datafile, b'w+') as fd:
238 with revlog.opener(datafile, b'w+') as fd:
239 fd.write(data)
239 fd.write(data)
240 if feed_data:
240 if feed_data:
241 if use_mmap:
241 if use_mmap and revlog.opener.is_mmap_safe(datafile):
242 fd.flush()
242 fd.flush()
243 new_data = util.buffer(util.mmapread(fd, len(data)))
243 new_data = util.buffer(util.mmapread(fd, len(data)))
244 else:
244 else:
245 new_data = data
245 new_data = data
246 target_docket.data_length = len(data)
246 target_docket.data_length = len(data)
247 target_docket.tip_rev = revlog.tiprev()
247 target_docket.tip_rev = revlog.tiprev()
248 target_docket.tip_node = revlog.node(target_docket.tip_rev)
248 target_docket.tip_node = revlog.node(target_docket.tip_rev)
249 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
249 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
250 # store vfs
250 # store vfs
251 file_path = revlog._nodemap_file
251 file_path = revlog._nodemap_file
252 if pending:
252 if pending:
253 file_path += b'.a'
253 file_path += b'.a'
254 tr.registertmp(file_path)
254 tr.registertmp(file_path)
255 else:
255 else:
256 tr.addbackup(file_path)
256 tr.addbackup(file_path)
257
257
258 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
258 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
259 fp.write(target_docket.serialize())
259 fp.write(target_docket.serialize())
260 revlog._nodemap_docket = target_docket
260 revlog._nodemap_docket = target_docket
261 if feed_data:
261 if feed_data:
262 revlog.index.update_nodemap_data(target_docket, new_data)
262 revlog.index.update_nodemap_data(target_docket, new_data)
263
263
264 # search for old index file in all cases, some older process might have
264 # search for old index file in all cases, some older process might have
265 # left one behind.
265 # left one behind.
266 olds = _other_rawdata_filepath(revlog, target_docket)
266 olds = _other_rawdata_filepath(revlog, target_docket)
267 if olds:
267 if olds:
268 realvfs = getattr(revlog, '_realopener', revlog.opener)
268 realvfs = getattr(revlog, '_realopener', revlog.opener)
269
269
270 def cleanup(tr):
270 def cleanup(tr):
271 for oldfile in olds:
271 for oldfile in olds:
272 realvfs.tryunlink(oldfile)
272 realvfs.tryunlink(oldfile)
273
273
274 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
274 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
275 tr.addpostclose(callback_id, cleanup)
275 tr.addpostclose(callback_id, cleanup)
276
276
277
277
278 ### Nodemap docket file
278 ### Nodemap docket file
279 #
279 #
280 # The nodemap data are stored on disk using 2 files:
280 # The nodemap data are stored on disk using 2 files:
281 #
281 #
282 # * a raw data files containing a persistent nodemap
282 # * a raw data files containing a persistent nodemap
283 # (see `Nodemap Trie` section)
283 # (see `Nodemap Trie` section)
284 #
284 #
285 # * a small "docket" file containing medatadata
285 # * a small "docket" file containing medatadata
286 #
286 #
287 # While the nodemap data can be multiple tens of megabytes, the "docket" is
287 # While the nodemap data can be multiple tens of megabytes, the "docket" is
288 # small, it is easy to update it automatically or to duplicated its content
288 # small, it is easy to update it automatically or to duplicated its content
289 # during a transaction.
289 # during a transaction.
290 #
290 #
291 # Multiple raw data can exist at the same time (The currently valid one and a
291 # Multiple raw data can exist at the same time (The currently valid one and a
292 # new one beind used by an in progress transaction). To accomodate this, the
292 # new one beind used by an in progress transaction). To accomodate this, the
293 # filename hosting the raw data has a variable parts. The exact filename is
293 # filename hosting the raw data has a variable parts. The exact filename is
294 # specified inside the "docket" file.
294 # specified inside the "docket" file.
295 #
295 #
296 # The docket file contains information to find, qualify and validate the raw
296 # The docket file contains information to find, qualify and validate the raw
297 # data. Its content is currently very light, but it will expand as the on disk
297 # data. Its content is currently very light, but it will expand as the on disk
298 # nodemap gains the necessary features to be used in production.
298 # nodemap gains the necessary features to be used in production.
299
299
300 ONDISK_VERSION = 1
300 ONDISK_VERSION = 1
301 S_VERSION = struct.Struct(">B")
301 S_VERSION = struct.Struct(">B")
302 S_HEADER = struct.Struct(">BQQQQ")
302 S_HEADER = struct.Struct(">BQQQQ")
303
303
304
304
305 class NodeMapDocket:
305 class NodeMapDocket:
306 """metadata associated with persistent nodemap data
306 """metadata associated with persistent nodemap data
307
307
308 The persistent data may come from disk or be on their way to disk.
308 The persistent data may come from disk or be on their way to disk.
309 """
309 """
310
310
311 def __init__(self, uid=None):
311 def __init__(self, uid=None):
312 if uid is None:
312 if uid is None:
313 uid = docket_mod.make_uid()
313 uid = docket_mod.make_uid()
314 # a unique identifier for the data file:
314 # a unique identifier for the data file:
315 # - When new data are appended, it is preserved.
315 # - When new data are appended, it is preserved.
316 # - When a new data file is created, a new identifier is generated.
316 # - When a new data file is created, a new identifier is generated.
317 self.uid = uid
317 self.uid = uid
318 # the tipmost revision stored in the data file. This revision and all
318 # the tipmost revision stored in the data file. This revision and all
319 # revision before it are expected to be encoded in the data file.
319 # revision before it are expected to be encoded in the data file.
320 self.tip_rev = None
320 self.tip_rev = None
321 # the node of that tipmost revision, if it mismatch the current index
321 # the node of that tipmost revision, if it mismatch the current index
322 # data the docket is not valid for the current index and should be
322 # data the docket is not valid for the current index and should be
323 # discarded.
323 # discarded.
324 #
324 #
325 # note: this method is not perfect as some destructive operation could
325 # note: this method is not perfect as some destructive operation could
326 # preserve the same tip_rev + tip_node while altering lower revision.
326 # preserve the same tip_rev + tip_node while altering lower revision.
327 # However this multiple other caches have the same vulnerability (eg:
327 # However this multiple other caches have the same vulnerability (eg:
328 # brancmap cache).
328 # brancmap cache).
329 self.tip_node = None
329 self.tip_node = None
330 # the size (in bytes) of the persisted data to encode the nodemap valid
330 # the size (in bytes) of the persisted data to encode the nodemap valid
331 # for `tip_rev`.
331 # for `tip_rev`.
332 # - data file shorter than this are corrupted,
332 # - data file shorter than this are corrupted,
333 # - any extra data should be ignored.
333 # - any extra data should be ignored.
334 self.data_length = None
334 self.data_length = None
335 # the amount (in bytes) of "dead" data, still in the data file but no
335 # the amount (in bytes) of "dead" data, still in the data file but no
336 # longer used for the nodemap.
336 # longer used for the nodemap.
337 self.data_unused = 0
337 self.data_unused = 0
338
338
339 def copy(self):
339 def copy(self):
340 new = NodeMapDocket(uid=self.uid)
340 new = NodeMapDocket(uid=self.uid)
341 new.tip_rev = self.tip_rev
341 new.tip_rev = self.tip_rev
342 new.tip_node = self.tip_node
342 new.tip_node = self.tip_node
343 new.data_length = self.data_length
343 new.data_length = self.data_length
344 new.data_unused = self.data_unused
344 new.data_unused = self.data_unused
345 return new
345 return new
346
346
347 def __cmp__(self, other):
347 def __cmp__(self, other):
348 if self.uid < other.uid:
348 if self.uid < other.uid:
349 return -1
349 return -1
350 if self.uid > other.uid:
350 if self.uid > other.uid:
351 return 1
351 return 1
352 elif self.data_length < other.data_length:
352 elif self.data_length < other.data_length:
353 return -1
353 return -1
354 elif self.data_length > other.data_length:
354 elif self.data_length > other.data_length:
355 return 1
355 return 1
356 return 0
356 return 0
357
357
358 def __eq__(self, other):
358 def __eq__(self, other):
359 return self.uid == other.uid and self.data_length == other.data_length
359 return self.uid == other.uid and self.data_length == other.data_length
360
360
361 def serialize(self):
361 def serialize(self):
362 """return serialized bytes for a docket using the passed uid"""
362 """return serialized bytes for a docket using the passed uid"""
363 data = []
363 data = []
364 data.append(S_VERSION.pack(ONDISK_VERSION))
364 data.append(S_VERSION.pack(ONDISK_VERSION))
365 headers = (
365 headers = (
366 len(self.uid),
366 len(self.uid),
367 self.tip_rev,
367 self.tip_rev,
368 self.data_length,
368 self.data_length,
369 self.data_unused,
369 self.data_unused,
370 len(self.tip_node),
370 len(self.tip_node),
371 )
371 )
372 data.append(S_HEADER.pack(*headers))
372 data.append(S_HEADER.pack(*headers))
373 data.append(self.uid)
373 data.append(self.uid)
374 data.append(self.tip_node)
374 data.append(self.tip_node)
375 return b''.join(data)
375 return b''.join(data)
376
376
377
377
378 def _rawdata_filepath(revlog, docket):
378 def _rawdata_filepath(revlog, docket):
379 """The (vfs relative) nodemap's rawdata file for a given uid"""
379 """The (vfs relative) nodemap's rawdata file for a given uid"""
380 prefix = revlog.radix
380 prefix = revlog.radix
381 return b"%s-%s.nd" % (prefix, docket.uid)
381 return b"%s-%s.nd" % (prefix, docket.uid)
382
382
383
383
384 def _other_rawdata_filepath(revlog, docket):
384 def _other_rawdata_filepath(revlog, docket):
385 prefix = revlog.radix
385 prefix = revlog.radix
386 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
386 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
387 new_file_path = _rawdata_filepath(revlog, docket)
387 new_file_path = _rawdata_filepath(revlog, docket)
388 new_file_name = revlog.opener.basename(new_file_path)
388 new_file_name = revlog.opener.basename(new_file_path)
389 dirpath = revlog.opener.dirname(new_file_path)
389 dirpath = revlog.opener.dirname(new_file_path)
390 others = []
390 others = []
391 for f in revlog.opener.listdir(dirpath):
391 for f in revlog.opener.listdir(dirpath):
392 if pattern.match(f) and f != new_file_name:
392 if pattern.match(f) and f != new_file_name:
393 others.append(f)
393 others.append(f)
394 return others
394 return others
395
395
396
396
397 ### Nodemap Trie
397 ### Nodemap Trie
398 #
398 #
399 # This is a simple reference implementation to compute and persist a nodemap
399 # This is a simple reference implementation to compute and persist a nodemap
400 # trie. This reference implementation is write only. The python version of this
400 # trie. This reference implementation is write only. The python version of this
401 # is not expected to be actually used, since it wont provide performance
401 # is not expected to be actually used, since it wont provide performance
402 # improvement over existing non-persistent C implementation.
402 # improvement over existing non-persistent C implementation.
403 #
403 #
404 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
404 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
405 # revision can be adressed using its node shortest prefix.
405 # revision can be adressed using its node shortest prefix.
406 #
406 #
407 # The trie is stored as a sequence of block. Each block contains 16 entries
407 # The trie is stored as a sequence of block. Each block contains 16 entries
408 # (signed 64bit integer, big endian). Each entry can be one of the following:
408 # (signed 64bit integer, big endian). Each entry can be one of the following:
409 #
409 #
410 # * value >= 0 -> index of sub-block
410 # * value >= 0 -> index of sub-block
411 # * value == -1 -> no value
411 # * value == -1 -> no value
412 # * value < -1 -> encoded revision: rev = -(value+2)
412 # * value < -1 -> encoded revision: rev = -(value+2)
413 #
413 #
414 # See REV_OFFSET and _transform_rev below.
414 # See REV_OFFSET and _transform_rev below.
415 #
415 #
416 # The implementation focus on simplicity, not on performance. A Rust
416 # The implementation focus on simplicity, not on performance. A Rust
417 # implementation should provide a efficient version of the same binary
417 # implementation should provide a efficient version of the same binary
418 # persistence. This reference python implementation is never meant to be
418 # persistence. This reference python implementation is never meant to be
419 # extensively use in production.
419 # extensively use in production.
420
420
421
421
422 def persistent_data(index):
422 def persistent_data(index):
423 """return the persistent binary form for a nodemap for a given index"""
423 """return the persistent binary form for a nodemap for a given index"""
424 trie = _build_trie(index)
424 trie = _build_trie(index)
425 return _persist_trie(trie)
425 return _persist_trie(trie)
426
426
427
427
428 def update_persistent_data(index, root, max_idx, last_rev):
428 def update_persistent_data(index, root, max_idx, last_rev):
429 """return the incremental update for persistent nodemap from a given index"""
429 """return the incremental update for persistent nodemap from a given index"""
430 changed_block, trie = _update_trie(index, root, last_rev)
430 changed_block, trie = _update_trie(index, root, last_rev)
431 return (
431 return (
432 changed_block * S_BLOCK.size,
432 changed_block * S_BLOCK.size,
433 _persist_trie(trie, existing_idx=max_idx),
433 _persist_trie(trie, existing_idx=max_idx),
434 )
434 )
435
435
436
436
437 S_BLOCK = struct.Struct(">" + ("l" * 16))
437 S_BLOCK = struct.Struct(">" + ("l" * 16))
438
438
439 NO_ENTRY = -1
439 NO_ENTRY = -1
440 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
440 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
441 REV_OFFSET = 2
441 REV_OFFSET = 2
442
442
443
443
444 def _transform_rev(rev):
444 def _transform_rev(rev):
445 """Return the number used to represent the rev in the tree.
445 """Return the number used to represent the rev in the tree.
446
446
447 (or retrieve a rev number from such representation)
447 (or retrieve a rev number from such representation)
448
448
449 Note that this is an involution, a function equal to its inverse (i.e.
449 Note that this is an involution, a function equal to its inverse (i.e.
450 which gives the identity when applied to itself).
450 which gives the identity when applied to itself).
451 """
451 """
452 return -(rev + REV_OFFSET)
452 return -(rev + REV_OFFSET)
453
453
454
454
455 def _to_int(hex_digit):
455 def _to_int(hex_digit):
456 """turn an hexadecimal digit into a proper integer"""
456 """turn an hexadecimal digit into a proper integer"""
457 return int(hex_digit, 16)
457 return int(hex_digit, 16)
458
458
459
459
460 class Block(dict):
460 class Block(dict):
461 """represent a block of the Trie
461 """represent a block of the Trie
462
462
463 contains up to 16 entry indexed from 0 to 15"""
463 contains up to 16 entry indexed from 0 to 15"""
464
464
465 def __init__(self):
465 def __init__(self):
466 super(Block, self).__init__()
466 super(Block, self).__init__()
467 # If this block exist on disk, here is its ID
467 # If this block exist on disk, here is its ID
468 self.ondisk_id = None
468 self.ondisk_id = None
469
469
470 def __iter__(self):
470 def __iter__(self):
471 return iter(self.get(i) for i in range(16))
471 return iter(self.get(i) for i in range(16))
472
472
473
473
474 def _build_trie(index):
474 def _build_trie(index):
475 """build a nodemap trie
475 """build a nodemap trie
476
476
477 The nodemap stores revision number for each unique prefix.
477 The nodemap stores revision number for each unique prefix.
478
478
479 Each block is a dictionary with keys in `[0, 15]`. Values are either
479 Each block is a dictionary with keys in `[0, 15]`. Values are either
480 another block or a revision number.
480 another block or a revision number.
481 """
481 """
482 root = Block()
482 root = Block()
483 for rev in range(len(index)):
483 for rev in range(len(index)):
484 current_hex = hex(index[rev][7])
484 current_hex = hex(index[rev][7])
485 _insert_into_block(index, 0, root, rev, current_hex)
485 _insert_into_block(index, 0, root, rev, current_hex)
486 return root
486 return root
487
487
488
488
489 def _update_trie(index, root, last_rev):
489 def _update_trie(index, root, last_rev):
490 """consume"""
490 """consume"""
491 changed = 0
491 changed = 0
492 for rev in range(last_rev + 1, len(index)):
492 for rev in range(last_rev + 1, len(index)):
493 current_hex = hex(index[rev][7])
493 current_hex = hex(index[rev][7])
494 changed += _insert_into_block(index, 0, root, rev, current_hex)
494 changed += _insert_into_block(index, 0, root, rev, current_hex)
495 return changed, root
495 return changed, root
496
496
497
497
498 def _insert_into_block(index, level, block, current_rev, current_hex):
498 def _insert_into_block(index, level, block, current_rev, current_hex):
499 """insert a new revision in a block
499 """insert a new revision in a block
500
500
501 index: the index we are adding revision for
501 index: the index we are adding revision for
502 level: the depth of the current block in the trie
502 level: the depth of the current block in the trie
503 block: the block currently being considered
503 block: the block currently being considered
504 current_rev: the revision number we are adding
504 current_rev: the revision number we are adding
505 current_hex: the hexadecimal representation of the of that revision
505 current_hex: the hexadecimal representation of the of that revision
506 """
506 """
507 changed = 1
507 changed = 1
508 if block.ondisk_id is not None:
508 if block.ondisk_id is not None:
509 block.ondisk_id = None
509 block.ondisk_id = None
510 hex_digit = _to_int(current_hex[level : level + 1])
510 hex_digit = _to_int(current_hex[level : level + 1])
511 entry = block.get(hex_digit)
511 entry = block.get(hex_digit)
512 if entry is None:
512 if entry is None:
513 # no entry, simply store the revision number
513 # no entry, simply store the revision number
514 block[hex_digit] = current_rev
514 block[hex_digit] = current_rev
515 elif isinstance(entry, dict):
515 elif isinstance(entry, dict):
516 # need to recurse to an underlying block
516 # need to recurse to an underlying block
517 changed += _insert_into_block(
517 changed += _insert_into_block(
518 index, level + 1, entry, current_rev, current_hex
518 index, level + 1, entry, current_rev, current_hex
519 )
519 )
520 else:
520 else:
521 # collision with a previously unique prefix, inserting new
521 # collision with a previously unique prefix, inserting new
522 # vertices to fit both entry.
522 # vertices to fit both entry.
523 other_hex = hex(index[entry][7])
523 other_hex = hex(index[entry][7])
524 other_rev = entry
524 other_rev = entry
525 new = Block()
525 new = Block()
526 block[hex_digit] = new
526 block[hex_digit] = new
527 _insert_into_block(index, level + 1, new, other_rev, other_hex)
527 _insert_into_block(index, level + 1, new, other_rev, other_hex)
528 _insert_into_block(index, level + 1, new, current_rev, current_hex)
528 _insert_into_block(index, level + 1, new, current_rev, current_hex)
529 return changed
529 return changed
530
530
531
531
532 def _persist_trie(root, existing_idx=None):
532 def _persist_trie(root, existing_idx=None):
533 """turn a nodemap trie into persistent binary data
533 """turn a nodemap trie into persistent binary data
534
534
535 See `_build_trie` for nodemap trie structure"""
535 See `_build_trie` for nodemap trie structure"""
536 block_map = {}
536 block_map = {}
537 if existing_idx is not None:
537 if existing_idx is not None:
538 base_idx = existing_idx + 1
538 base_idx = existing_idx + 1
539 else:
539 else:
540 base_idx = 0
540 base_idx = 0
541 chunks = []
541 chunks = []
542 for tn in _walk_trie(root):
542 for tn in _walk_trie(root):
543 if tn.ondisk_id is not None:
543 if tn.ondisk_id is not None:
544 block_map[id(tn)] = tn.ondisk_id
544 block_map[id(tn)] = tn.ondisk_id
545 else:
545 else:
546 block_map[id(tn)] = len(chunks) + base_idx
546 block_map[id(tn)] = len(chunks) + base_idx
547 chunks.append(_persist_block(tn, block_map))
547 chunks.append(_persist_block(tn, block_map))
548 return b''.join(chunks)
548 return b''.join(chunks)
549
549
550
550
551 def _walk_trie(block):
551 def _walk_trie(block):
552 """yield all the block in a trie
552 """yield all the block in a trie
553
553
554 Children blocks are always yield before their parent block.
554 Children blocks are always yield before their parent block.
555 """
555 """
556 for (__, item) in sorted(block.items()):
556 for (__, item) in sorted(block.items()):
557 if isinstance(item, dict):
557 if isinstance(item, dict):
558 for sub_block in _walk_trie(item):
558 for sub_block in _walk_trie(item):
559 yield sub_block
559 yield sub_block
560 yield block
560 yield block
561
561
562
562
563 def _persist_block(block_node, block_map):
563 def _persist_block(block_node, block_map):
564 """produce persistent binary data for a single block
564 """produce persistent binary data for a single block
565
565
566 Children block are assumed to be already persisted and present in
566 Children block are assumed to be already persisted and present in
567 block_map.
567 block_map.
568 """
568 """
569 data = tuple(_to_value(v, block_map) for v in block_node)
569 data = tuple(_to_value(v, block_map) for v in block_node)
570 return S_BLOCK.pack(*data)
570 return S_BLOCK.pack(*data)
571
571
572
572
573 def _to_value(item, block_map):
573 def _to_value(item, block_map):
574 """persist any value as an integer"""
574 """persist any value as an integer"""
575 if item is None:
575 if item is None:
576 return NO_ENTRY
576 return NO_ENTRY
577 elif isinstance(item, dict):
577 elif isinstance(item, dict):
578 return block_map[id(item)]
578 return block_map[id(item)]
579 else:
579 else:
580 return _transform_rev(item)
580 return _transform_rev(item)
581
581
582
582
583 def parse_data(data):
583 def parse_data(data):
584 """parse parse nodemap data into a nodemap Trie"""
584 """parse parse nodemap data into a nodemap Trie"""
585 if (len(data) % S_BLOCK.size) != 0:
585 if (len(data) % S_BLOCK.size) != 0:
586 msg = b"nodemap data size is not a multiple of block size (%d): %d"
586 msg = b"nodemap data size is not a multiple of block size (%d): %d"
587 raise error.Abort(msg % (S_BLOCK.size, len(data)))
587 raise error.Abort(msg % (S_BLOCK.size, len(data)))
588 if not data:
588 if not data:
589 return Block(), None
589 return Block(), None
590 block_map = {}
590 block_map = {}
591 new_blocks = []
591 new_blocks = []
592 for i in range(0, len(data), S_BLOCK.size):
592 for i in range(0, len(data), S_BLOCK.size):
593 block = Block()
593 block = Block()
594 block.ondisk_id = len(block_map)
594 block.ondisk_id = len(block_map)
595 block_map[block.ondisk_id] = block
595 block_map[block.ondisk_id] = block
596 block_data = data[i : i + S_BLOCK.size]
596 block_data = data[i : i + S_BLOCK.size]
597 values = S_BLOCK.unpack(block_data)
597 values = S_BLOCK.unpack(block_data)
598 new_blocks.append((block, values))
598 new_blocks.append((block, values))
599 for b, values in new_blocks:
599 for b, values in new_blocks:
600 for idx, v in enumerate(values):
600 for idx, v in enumerate(values):
601 if v == NO_ENTRY:
601 if v == NO_ENTRY:
602 continue
602 continue
603 elif v >= 0:
603 elif v >= 0:
604 b[idx] = block_map[v]
604 b[idx] = block_map[v]
605 else:
605 else:
606 b[idx] = _transform_rev(v)
606 b[idx] = _transform_rev(v)
607 return block, i // S_BLOCK.size
607 return block, i // S_BLOCK.size
608
608
609
609
610 # debug utility
610 # debug utility
611
611
612
612
613 def check_data(ui, index, data):
613 def check_data(ui, index, data):
614 """verify that the provided nodemap data are valid for the given idex"""
614 """verify that the provided nodemap data are valid for the given idex"""
615 ret = 0
615 ret = 0
616 ui.status((b"revisions in index: %d\n") % len(index))
616 ui.status((b"revisions in index: %d\n") % len(index))
617 root, __ = parse_data(data)
617 root, __ = parse_data(data)
618 all_revs = set(_all_revisions(root))
618 all_revs = set(_all_revisions(root))
619 ui.status((b"revisions in nodemap: %d\n") % len(all_revs))
619 ui.status((b"revisions in nodemap: %d\n") % len(all_revs))
620 for r in range(len(index)):
620 for r in range(len(index)):
621 if r not in all_revs:
621 if r not in all_revs:
622 msg = b" revision missing from nodemap: %d\n" % r
622 msg = b" revision missing from nodemap: %d\n" % r
623 ui.write_err(msg)
623 ui.write_err(msg)
624 ret = 1
624 ret = 1
625 else:
625 else:
626 all_revs.remove(r)
626 all_revs.remove(r)
627 nm_rev = _find_node(root, hex(index[r][7]))
627 nm_rev = _find_node(root, hex(index[r][7]))
628 if nm_rev is None:
628 if nm_rev is None:
629 msg = b" revision node does not match any entries: %d\n" % r
629 msg = b" revision node does not match any entries: %d\n" % r
630 ui.write_err(msg)
630 ui.write_err(msg)
631 ret = 1
631 ret = 1
632 elif nm_rev != r:
632 elif nm_rev != r:
633 msg = (
633 msg = (
634 b" revision node does not match the expected revision: "
634 b" revision node does not match the expected revision: "
635 b"%d != %d\n" % (r, nm_rev)
635 b"%d != %d\n" % (r, nm_rev)
636 )
636 )
637 ui.write_err(msg)
637 ui.write_err(msg)
638 ret = 1
638 ret = 1
639
639
640 if all_revs:
640 if all_revs:
641 for r in sorted(all_revs):
641 for r in sorted(all_revs):
642 msg = b" extra revisions in nodemap: %d\n" % r
642 msg = b" extra revisions in nodemap: %d\n" % r
643 ui.write_err(msg)
643 ui.write_err(msg)
644 ret = 1
644 ret = 1
645 return ret
645 return ret
646
646
647
647
648 def _all_revisions(root):
648 def _all_revisions(root):
649 """return all revisions stored in a Trie"""
649 """return all revisions stored in a Trie"""
650 for block in _walk_trie(root):
650 for block in _walk_trie(root):
651 for v in block:
651 for v in block:
652 if v is None or isinstance(v, Block):
652 if v is None or isinstance(v, Block):
653 continue
653 continue
654 yield v
654 yield v
655
655
656
656
657 def _find_node(block, node):
657 def _find_node(block, node):
658 """find the revision associated with a given node"""
658 """find the revision associated with a given node"""
659 entry = block.get(_to_int(node[0:1]))
659 entry = block.get(_to_int(node[0:1]))
660 if isinstance(entry, dict):
660 if isinstance(entry, dict):
661 return _find_node(entry, node[1:])
661 return _find_node(entry, node[1:])
662 return entry
662 return entry
663
663
664
664
665 def get_nodemap_file(revlog):
665 def get_nodemap_file(revlog):
666 if revlog._trypending:
666 if revlog._trypending:
667 pending_path = revlog.radix + b".n.a"
667 pending_path = revlog.radix + b".n.a"
668 if revlog.opener.exists(pending_path):
668 if revlog.opener.exists(pending_path):
669 return pending_path
669 return pending_path
670 return revlog.radix + b".n"
670 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now