##// END OF EJS Templates
persistent-nodemap: avoid writing nodemap for empty revlog...
marmoute -
r52068:1486d8c6 stable
parent child Browse files
Show More
@@ -1,668 +1,670 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9
9
10 import re
10 import re
11 import struct
11 import struct
12
12
13 from ..node import hex
13 from ..node import hex
14
14
15 from .. import (
15 from .. import (
16 error,
16 error,
17 requirements,
17 requirements,
18 util,
18 util,
19 )
19 )
20 from . import docket as docket_mod
20 from . import docket as docket_mod
21
21
22
22
23 class NodeMap(dict):
23 class NodeMap(dict):
24 def __missing__(self, x):
24 def __missing__(self, x):
25 raise error.RevlogError(b'unknown node: %s' % x)
25 raise error.RevlogError(b'unknown node: %s' % x)
26
26
27
27
28 def test_race_hook_1():
28 def test_race_hook_1():
29 """hook point for test
29 """hook point for test
30
30
31 This let tests to have things happens between the docket reading and the
31 This let tests to have things happens between the docket reading and the
32 data reading"""
32 data reading"""
33 pass
33 pass
34
34
35
35
36 def post_stream_cleanup(repo):
36 def post_stream_cleanup(repo):
37 """The stream clone might needs to remove some file if persisten nodemap
37 """The stream clone might needs to remove some file if persisten nodemap
38 was dropped while stream cloning
38 was dropped while stream cloning
39 """
39 """
40 if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
40 if requirements.REVLOGV1_REQUIREMENT not in repo.requirements:
41 return
41 return
42 if requirements.NODEMAP_REQUIREMENT in repo.requirements:
42 if requirements.NODEMAP_REQUIREMENT in repo.requirements:
43 return
43 return
44 unfi = repo.unfiltered()
44 unfi = repo.unfiltered()
45 delete_nodemap(None, unfi, unfi.changelog)
45 delete_nodemap(None, unfi, unfi.changelog)
46 delete_nodemap(None, repo, unfi.manifestlog._rootstore._revlog)
46 delete_nodemap(None, repo, unfi.manifestlog._rootstore._revlog)
47
47
48
48
49 def persisted_data(revlog):
49 def persisted_data(revlog):
50 """read the nodemap for a revlog from disk"""
50 """read the nodemap for a revlog from disk"""
51 if revlog._nodemap_file is None:
51 if revlog._nodemap_file is None:
52 return None
52 return None
53 pdata = revlog.opener.tryread(revlog._nodemap_file)
53 pdata = revlog.opener.tryread(revlog._nodemap_file)
54 if not pdata:
54 if not pdata:
55 return None
55 return None
56 offset = 0
56 offset = 0
57 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
57 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
58 if version != ONDISK_VERSION:
58 if version != ONDISK_VERSION:
59 return None
59 return None
60 offset += S_VERSION.size
60 offset += S_VERSION.size
61 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
61 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
62 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
62 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
63 offset += S_HEADER.size
63 offset += S_HEADER.size
64 docket = NodeMapDocket(pdata[offset : offset + uid_size])
64 docket = NodeMapDocket(pdata[offset : offset + uid_size])
65 offset += uid_size
65 offset += uid_size
66 docket.tip_rev = tip_rev
66 docket.tip_rev = tip_rev
67 docket.tip_node = pdata[offset : offset + tip_node_size]
67 docket.tip_node = pdata[offset : offset + tip_node_size]
68 docket.data_length = data_length
68 docket.data_length = data_length
69 docket.data_unused = data_unused
69 docket.data_unused = data_unused
70
70
71 filename = _rawdata_filepath(revlog, docket)
71 filename = _rawdata_filepath(revlog, docket)
72 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
72 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
73
73
74 test_race_hook_1()
74 test_race_hook_1()
75 try:
75 try:
76 with revlog.opener(filename) as fd:
76 with revlog.opener(filename) as fd:
77 if use_mmap:
77 if use_mmap:
78 try:
78 try:
79 data = util.buffer(util.mmapread(fd, data_length))
79 data = util.buffer(util.mmapread(fd, data_length))
80 except ValueError:
80 except ValueError:
81 # raised when the read file is too small
81 # raised when the read file is too small
82 data = b''
82 data = b''
83 else:
83 else:
84 data = fd.read(data_length)
84 data = fd.read(data_length)
85 except FileNotFoundError:
85 except FileNotFoundError:
86 return None
86 return None
87 if len(data) < data_length:
87 if len(data) < data_length:
88 return None
88 return None
89 return docket, data
89 return docket, data
90
90
91
91
92 def setup_persistent_nodemap(tr, revlog):
92 def setup_persistent_nodemap(tr, revlog):
93 """Install whatever is needed transaction side to persist a nodemap on disk
93 """Install whatever is needed transaction side to persist a nodemap on disk
94
94
95 (only actually persist the nodemap if this is relevant for this revlog)
95 (only actually persist the nodemap if this is relevant for this revlog)
96 """
96 """
97 if revlog._inline:
97 if revlog._inline:
98 return # inlined revlog are too small for this to be relevant
98 return # inlined revlog are too small for this to be relevant
99 if revlog._nodemap_file is None:
99 if revlog._nodemap_file is None:
100 return # we do not use persistent_nodemap on this revlog
100 return # we do not use persistent_nodemap on this revlog
101
101
102 # we need to happen after the changelog finalization, in that use "cl-"
102 # we need to happen after the changelog finalization, in that use "cl-"
103 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
103 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
104 if tr.hasfinalize(callback_id):
104 if tr.hasfinalize(callback_id):
105 return # no need to register again
105 return # no need to register again
106 tr.addpending(
106 tr.addpending(
107 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
107 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
108 )
108 )
109 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
109 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
110
110
111
111
112 class _NoTransaction:
112 class _NoTransaction:
113 """transaction like object to update the nodemap outside a transaction"""
113 """transaction like object to update the nodemap outside a transaction"""
114
114
115 def __init__(self):
115 def __init__(self):
116 self._postclose = {}
116 self._postclose = {}
117
117
118 def addpostclose(self, callback_id, callback_func):
118 def addpostclose(self, callback_id, callback_func):
119 self._postclose[callback_id] = callback_func
119 self._postclose[callback_id] = callback_func
120
120
121 def registertmp(self, *args, **kwargs):
121 def registertmp(self, *args, **kwargs):
122 pass
122 pass
123
123
124 def addbackup(self, *args, **kwargs):
124 def addbackup(self, *args, **kwargs):
125 pass
125 pass
126
126
127 def add(self, *args, **kwargs):
127 def add(self, *args, **kwargs):
128 pass
128 pass
129
129
130 def addabort(self, *args, **kwargs):
130 def addabort(self, *args, **kwargs):
131 pass
131 pass
132
132
133 def _report(self, *args):
133 def _report(self, *args):
134 pass
134 pass
135
135
136
136
137 def update_persistent_nodemap(revlog):
137 def update_persistent_nodemap(revlog):
138 """update the persistent nodemap right now
138 """update the persistent nodemap right now
139
139
140 To be used for updating the nodemap on disk outside of a normal transaction
140 To be used for updating the nodemap on disk outside of a normal transaction
141 setup (eg, `debugupdatecache`).
141 setup (eg, `debugupdatecache`).
142 """
142 """
143 if revlog._inline:
143 if revlog._inline:
144 return # inlined revlog are too small for this to be relevant
144 return # inlined revlog are too small for this to be relevant
145 if revlog._nodemap_file is None:
145 if revlog._nodemap_file is None:
146 return # we do not use persistent_nodemap on this revlog
146 return # we do not use persistent_nodemap on this revlog
147
147
148 notr = _NoTransaction()
148 notr = _NoTransaction()
149 persist_nodemap(notr, revlog)
149 persist_nodemap(notr, revlog)
150 for k in sorted(notr._postclose):
150 for k in sorted(notr._postclose):
151 notr._postclose[k](None)
151 notr._postclose[k](None)
152
152
153
153
154 def delete_nodemap(tr, repo, revlog):
154 def delete_nodemap(tr, repo, revlog):
155 """Delete nodemap data on disk for a given revlog"""
155 """Delete nodemap data on disk for a given revlog"""
156 prefix = revlog.radix
156 prefix = revlog.radix
157 pattern = re.compile(br"(^|/)%s(-[0-9a-f]+\.nd|\.n(\.a)?)$" % prefix)
157 pattern = re.compile(br"(^|/)%s(-[0-9a-f]+\.nd|\.n(\.a)?)$" % prefix)
158 dirpath = revlog.opener.dirname(revlog._indexfile)
158 dirpath = revlog.opener.dirname(revlog._indexfile)
159 for f in revlog.opener.listdir(dirpath):
159 for f in revlog.opener.listdir(dirpath):
160 if pattern.match(f):
160 if pattern.match(f):
161 repo.svfs.tryunlink(f)
161 repo.svfs.tryunlink(f)
162
162
163
163
164 def persist_nodemap(tr, revlog, pending=False, force=False):
164 def persist_nodemap(tr, revlog, pending=False, force=False):
165 """Write nodemap data on disk for a given revlog"""
165 """Write nodemap data on disk for a given revlog"""
166 if len(revlog.index) <= 0:
167 return
166 if getattr(revlog, 'filteredrevs', ()):
168 if getattr(revlog, 'filteredrevs', ()):
167 raise error.ProgrammingError(
169 raise error.ProgrammingError(
168 "cannot persist nodemap of a filtered changelog"
170 "cannot persist nodemap of a filtered changelog"
169 )
171 )
170 if revlog._nodemap_file is None:
172 if revlog._nodemap_file is None:
171 if force:
173 if force:
172 revlog._nodemap_file = get_nodemap_file(revlog)
174 revlog._nodemap_file = get_nodemap_file(revlog)
173 else:
175 else:
174 msg = "calling persist nodemap on a revlog without the feature enabled"
176 msg = "calling persist nodemap on a revlog without the feature enabled"
175 raise error.ProgrammingError(msg)
177 raise error.ProgrammingError(msg)
176
178
177 can_incremental = hasattr(revlog.index, "nodemap_data_incremental")
179 can_incremental = hasattr(revlog.index, "nodemap_data_incremental")
178 ondisk_docket = revlog._nodemap_docket
180 ondisk_docket = revlog._nodemap_docket
179 feed_data = hasattr(revlog.index, "update_nodemap_data")
181 feed_data = hasattr(revlog.index, "update_nodemap_data")
180 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
182 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
181
183
182 data = None
184 data = None
183 # first attemp an incremental update of the data
185 # first attemp an incremental update of the data
184 if can_incremental and ondisk_docket is not None:
186 if can_incremental and ondisk_docket is not None:
185 target_docket = revlog._nodemap_docket.copy()
187 target_docket = revlog._nodemap_docket.copy()
186 (
188 (
187 src_docket,
189 src_docket,
188 data_changed_count,
190 data_changed_count,
189 data,
191 data,
190 ) = revlog.index.nodemap_data_incremental()
192 ) = revlog.index.nodemap_data_incremental()
191 new_length = target_docket.data_length + len(data)
193 new_length = target_docket.data_length + len(data)
192 new_unused = target_docket.data_unused + data_changed_count
194 new_unused = target_docket.data_unused + data_changed_count
193 if src_docket != target_docket:
195 if src_docket != target_docket:
194 data = None
196 data = None
195 elif new_length <= (new_unused * 10): # under 10% of unused data
197 elif new_length <= (new_unused * 10): # under 10% of unused data
196 data = None
198 data = None
197 else:
199 else:
198 datafile = _rawdata_filepath(revlog, target_docket)
200 datafile = _rawdata_filepath(revlog, target_docket)
199 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
201 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
200 # store vfs
202 # store vfs
201 tr.add(datafile, target_docket.data_length)
203 tr.add(datafile, target_docket.data_length)
202 with revlog.opener(datafile, b'r+') as fd:
204 with revlog.opener(datafile, b'r+') as fd:
203 fd.seek(target_docket.data_length)
205 fd.seek(target_docket.data_length)
204 fd.write(data)
206 fd.write(data)
205 if feed_data:
207 if feed_data:
206 if use_mmap:
208 if use_mmap:
207 fd.seek(0)
209 fd.seek(0)
208 new_data = fd.read(new_length)
210 new_data = fd.read(new_length)
209 else:
211 else:
210 fd.flush()
212 fd.flush()
211 new_data = util.buffer(util.mmapread(fd, new_length))
213 new_data = util.buffer(util.mmapread(fd, new_length))
212 target_docket.data_length = new_length
214 target_docket.data_length = new_length
213 target_docket.data_unused = new_unused
215 target_docket.data_unused = new_unused
214
216
215 if data is None:
217 if data is None:
216 # otherwise fallback to a full new export
218 # otherwise fallback to a full new export
217 target_docket = NodeMapDocket()
219 target_docket = NodeMapDocket()
218 datafile = _rawdata_filepath(revlog, target_docket)
220 datafile = _rawdata_filepath(revlog, target_docket)
219 if hasattr(revlog.index, "nodemap_data_all"):
221 if hasattr(revlog.index, "nodemap_data_all"):
220 data = revlog.index.nodemap_data_all()
222 data = revlog.index.nodemap_data_all()
221 else:
223 else:
222 data = persistent_data(revlog.index)
224 data = persistent_data(revlog.index)
223 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
225 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
224 # store vfs
226 # store vfs
225
227
226 tryunlink = revlog.opener.tryunlink
228 tryunlink = revlog.opener.tryunlink
227
229
228 def abortck(tr):
230 def abortck(tr):
229 tryunlink(datafile)
231 tryunlink(datafile)
230
232
231 callback_id = b"delete-%s" % datafile
233 callback_id = b"delete-%s" % datafile
232
234
233 # some flavor of the transaction abort does not cleanup new file, it
235 # some flavor of the transaction abort does not cleanup new file, it
234 # simply empty them.
236 # simply empty them.
235 tr.addabort(callback_id, abortck)
237 tr.addabort(callback_id, abortck)
236 with revlog.opener(datafile, b'w+') as fd:
238 with revlog.opener(datafile, b'w+') as fd:
237 fd.write(data)
239 fd.write(data)
238 if feed_data:
240 if feed_data:
239 if use_mmap:
241 if use_mmap:
240 new_data = data
242 new_data = data
241 else:
243 else:
242 fd.flush()
244 fd.flush()
243 new_data = util.buffer(util.mmapread(fd, len(data)))
245 new_data = util.buffer(util.mmapread(fd, len(data)))
244 target_docket.data_length = len(data)
246 target_docket.data_length = len(data)
245 target_docket.tip_rev = revlog.tiprev()
247 target_docket.tip_rev = revlog.tiprev()
246 target_docket.tip_node = revlog.node(target_docket.tip_rev)
248 target_docket.tip_node = revlog.node(target_docket.tip_rev)
247 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
249 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
248 # store vfs
250 # store vfs
249 file_path = revlog._nodemap_file
251 file_path = revlog._nodemap_file
250 if pending:
252 if pending:
251 file_path += b'.a'
253 file_path += b'.a'
252 tr.registertmp(file_path)
254 tr.registertmp(file_path)
253 else:
255 else:
254 tr.addbackup(file_path)
256 tr.addbackup(file_path)
255
257
256 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
258 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
257 fp.write(target_docket.serialize())
259 fp.write(target_docket.serialize())
258 revlog._nodemap_docket = target_docket
260 revlog._nodemap_docket = target_docket
259 if feed_data:
261 if feed_data:
260 revlog.index.update_nodemap_data(target_docket, new_data)
262 revlog.index.update_nodemap_data(target_docket, new_data)
261
263
262 # search for old index file in all cases, some older process might have
264 # search for old index file in all cases, some older process might have
263 # left one behind.
265 # left one behind.
264 olds = _other_rawdata_filepath(revlog, target_docket)
266 olds = _other_rawdata_filepath(revlog, target_docket)
265 if olds:
267 if olds:
266 realvfs = getattr(revlog, '_realopener', revlog.opener)
268 realvfs = getattr(revlog, '_realopener', revlog.opener)
267
269
268 def cleanup(tr):
270 def cleanup(tr):
269 for oldfile in olds:
271 for oldfile in olds:
270 realvfs.tryunlink(oldfile)
272 realvfs.tryunlink(oldfile)
271
273
272 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
274 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
273 tr.addpostclose(callback_id, cleanup)
275 tr.addpostclose(callback_id, cleanup)
274
276
275
277
276 ### Nodemap docket file
278 ### Nodemap docket file
277 #
279 #
278 # The nodemap data are stored on disk using 2 files:
280 # The nodemap data are stored on disk using 2 files:
279 #
281 #
280 # * a raw data files containing a persistent nodemap
282 # * a raw data files containing a persistent nodemap
281 # (see `Nodemap Trie` section)
283 # (see `Nodemap Trie` section)
282 #
284 #
283 # * a small "docket" file containing medatadata
285 # * a small "docket" file containing medatadata
284 #
286 #
285 # While the nodemap data can be multiple tens of megabytes, the "docket" is
287 # While the nodemap data can be multiple tens of megabytes, the "docket" is
286 # small, it is easy to update it automatically or to duplicated its content
288 # small, it is easy to update it automatically or to duplicated its content
287 # during a transaction.
289 # during a transaction.
288 #
290 #
289 # Multiple raw data can exist at the same time (The currently valid one and a
291 # Multiple raw data can exist at the same time (The currently valid one and a
290 # new one beind used by an in progress transaction). To accomodate this, the
292 # new one beind used by an in progress transaction). To accomodate this, the
291 # filename hosting the raw data has a variable parts. The exact filename is
293 # filename hosting the raw data has a variable parts. The exact filename is
292 # specified inside the "docket" file.
294 # specified inside the "docket" file.
293 #
295 #
294 # The docket file contains information to find, qualify and validate the raw
296 # The docket file contains information to find, qualify and validate the raw
295 # data. Its content is currently very light, but it will expand as the on disk
297 # data. Its content is currently very light, but it will expand as the on disk
296 # nodemap gains the necessary features to be used in production.
298 # nodemap gains the necessary features to be used in production.
297
299
298 ONDISK_VERSION = 1
300 ONDISK_VERSION = 1
299 S_VERSION = struct.Struct(">B")
301 S_VERSION = struct.Struct(">B")
300 S_HEADER = struct.Struct(">BQQQQ")
302 S_HEADER = struct.Struct(">BQQQQ")
301
303
302
304
303 class NodeMapDocket:
305 class NodeMapDocket:
304 """metadata associated with persistent nodemap data
306 """metadata associated with persistent nodemap data
305
307
306 The persistent data may come from disk or be on their way to disk.
308 The persistent data may come from disk or be on their way to disk.
307 """
309 """
308
310
309 def __init__(self, uid=None):
311 def __init__(self, uid=None):
310 if uid is None:
312 if uid is None:
311 uid = docket_mod.make_uid()
313 uid = docket_mod.make_uid()
312 # a unique identifier for the data file:
314 # a unique identifier for the data file:
313 # - When new data are appended, it is preserved.
315 # - When new data are appended, it is preserved.
314 # - When a new data file is created, a new identifier is generated.
316 # - When a new data file is created, a new identifier is generated.
315 self.uid = uid
317 self.uid = uid
316 # the tipmost revision stored in the data file. This revision and all
318 # the tipmost revision stored in the data file. This revision and all
317 # revision before it are expected to be encoded in the data file.
319 # revision before it are expected to be encoded in the data file.
318 self.tip_rev = None
320 self.tip_rev = None
319 # the node of that tipmost revision, if it mismatch the current index
321 # the node of that tipmost revision, if it mismatch the current index
320 # data the docket is not valid for the current index and should be
322 # data the docket is not valid for the current index and should be
321 # discarded.
323 # discarded.
322 #
324 #
323 # note: this method is not perfect as some destructive operation could
325 # note: this method is not perfect as some destructive operation could
324 # preserve the same tip_rev + tip_node while altering lower revision.
326 # preserve the same tip_rev + tip_node while altering lower revision.
325 # However this multiple other caches have the same vulnerability (eg:
327 # However this multiple other caches have the same vulnerability (eg:
326 # brancmap cache).
328 # brancmap cache).
327 self.tip_node = None
329 self.tip_node = None
328 # the size (in bytes) of the persisted data to encode the nodemap valid
330 # the size (in bytes) of the persisted data to encode the nodemap valid
329 # for `tip_rev`.
331 # for `tip_rev`.
330 # - data file shorter than this are corrupted,
332 # - data file shorter than this are corrupted,
331 # - any extra data should be ignored.
333 # - any extra data should be ignored.
332 self.data_length = None
334 self.data_length = None
333 # the amount (in bytes) of "dead" data, still in the data file but no
335 # the amount (in bytes) of "dead" data, still in the data file but no
334 # longer used for the nodemap.
336 # longer used for the nodemap.
335 self.data_unused = 0
337 self.data_unused = 0
336
338
337 def copy(self):
339 def copy(self):
338 new = NodeMapDocket(uid=self.uid)
340 new = NodeMapDocket(uid=self.uid)
339 new.tip_rev = self.tip_rev
341 new.tip_rev = self.tip_rev
340 new.tip_node = self.tip_node
342 new.tip_node = self.tip_node
341 new.data_length = self.data_length
343 new.data_length = self.data_length
342 new.data_unused = self.data_unused
344 new.data_unused = self.data_unused
343 return new
345 return new
344
346
345 def __cmp__(self, other):
347 def __cmp__(self, other):
346 if self.uid < other.uid:
348 if self.uid < other.uid:
347 return -1
349 return -1
348 if self.uid > other.uid:
350 if self.uid > other.uid:
349 return 1
351 return 1
350 elif self.data_length < other.data_length:
352 elif self.data_length < other.data_length:
351 return -1
353 return -1
352 elif self.data_length > other.data_length:
354 elif self.data_length > other.data_length:
353 return 1
355 return 1
354 return 0
356 return 0
355
357
356 def __eq__(self, other):
358 def __eq__(self, other):
357 return self.uid == other.uid and self.data_length == other.data_length
359 return self.uid == other.uid and self.data_length == other.data_length
358
360
359 def serialize(self):
361 def serialize(self):
360 """return serialized bytes for a docket using the passed uid"""
362 """return serialized bytes for a docket using the passed uid"""
361 data = []
363 data = []
362 data.append(S_VERSION.pack(ONDISK_VERSION))
364 data.append(S_VERSION.pack(ONDISK_VERSION))
363 headers = (
365 headers = (
364 len(self.uid),
366 len(self.uid),
365 self.tip_rev,
367 self.tip_rev,
366 self.data_length,
368 self.data_length,
367 self.data_unused,
369 self.data_unused,
368 len(self.tip_node),
370 len(self.tip_node),
369 )
371 )
370 data.append(S_HEADER.pack(*headers))
372 data.append(S_HEADER.pack(*headers))
371 data.append(self.uid)
373 data.append(self.uid)
372 data.append(self.tip_node)
374 data.append(self.tip_node)
373 return b''.join(data)
375 return b''.join(data)
374
376
375
377
376 def _rawdata_filepath(revlog, docket):
378 def _rawdata_filepath(revlog, docket):
377 """The (vfs relative) nodemap's rawdata file for a given uid"""
379 """The (vfs relative) nodemap's rawdata file for a given uid"""
378 prefix = revlog.radix
380 prefix = revlog.radix
379 return b"%s-%s.nd" % (prefix, docket.uid)
381 return b"%s-%s.nd" % (prefix, docket.uid)
380
382
381
383
382 def _other_rawdata_filepath(revlog, docket):
384 def _other_rawdata_filepath(revlog, docket):
383 prefix = revlog.radix
385 prefix = revlog.radix
384 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
386 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
385 new_file_path = _rawdata_filepath(revlog, docket)
387 new_file_path = _rawdata_filepath(revlog, docket)
386 new_file_name = revlog.opener.basename(new_file_path)
388 new_file_name = revlog.opener.basename(new_file_path)
387 dirpath = revlog.opener.dirname(new_file_path)
389 dirpath = revlog.opener.dirname(new_file_path)
388 others = []
390 others = []
389 for f in revlog.opener.listdir(dirpath):
391 for f in revlog.opener.listdir(dirpath):
390 if pattern.match(f) and f != new_file_name:
392 if pattern.match(f) and f != new_file_name:
391 others.append(f)
393 others.append(f)
392 return others
394 return others
393
395
394
396
395 ### Nodemap Trie
397 ### Nodemap Trie
396 #
398 #
397 # This is a simple reference implementation to compute and persist a nodemap
399 # This is a simple reference implementation to compute and persist a nodemap
398 # trie. This reference implementation is write only. The python version of this
400 # trie. This reference implementation is write only. The python version of this
399 # is not expected to be actually used, since it wont provide performance
401 # is not expected to be actually used, since it wont provide performance
400 # improvement over existing non-persistent C implementation.
402 # improvement over existing non-persistent C implementation.
401 #
403 #
402 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
404 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
403 # revision can be adressed using its node shortest prefix.
405 # revision can be adressed using its node shortest prefix.
404 #
406 #
405 # The trie is stored as a sequence of block. Each block contains 16 entries
407 # The trie is stored as a sequence of block. Each block contains 16 entries
406 # (signed 64bit integer, big endian). Each entry can be one of the following:
408 # (signed 64bit integer, big endian). Each entry can be one of the following:
407 #
409 #
408 # * value >= 0 -> index of sub-block
410 # * value >= 0 -> index of sub-block
409 # * value == -1 -> no value
411 # * value == -1 -> no value
410 # * value < -1 -> encoded revision: rev = -(value+2)
412 # * value < -1 -> encoded revision: rev = -(value+2)
411 #
413 #
412 # See REV_OFFSET and _transform_rev below.
414 # See REV_OFFSET and _transform_rev below.
413 #
415 #
414 # The implementation focus on simplicity, not on performance. A Rust
416 # The implementation focus on simplicity, not on performance. A Rust
415 # implementation should provide a efficient version of the same binary
417 # implementation should provide a efficient version of the same binary
416 # persistence. This reference python implementation is never meant to be
418 # persistence. This reference python implementation is never meant to be
417 # extensively use in production.
419 # extensively use in production.
418
420
419
421
420 def persistent_data(index):
422 def persistent_data(index):
421 """return the persistent binary form for a nodemap for a given index"""
423 """return the persistent binary form for a nodemap for a given index"""
422 trie = _build_trie(index)
424 trie = _build_trie(index)
423 return _persist_trie(trie)
425 return _persist_trie(trie)
424
426
425
427
426 def update_persistent_data(index, root, max_idx, last_rev):
428 def update_persistent_data(index, root, max_idx, last_rev):
427 """return the incremental update for persistent nodemap from a given index"""
429 """return the incremental update for persistent nodemap from a given index"""
428 changed_block, trie = _update_trie(index, root, last_rev)
430 changed_block, trie = _update_trie(index, root, last_rev)
429 return (
431 return (
430 changed_block * S_BLOCK.size,
432 changed_block * S_BLOCK.size,
431 _persist_trie(trie, existing_idx=max_idx),
433 _persist_trie(trie, existing_idx=max_idx),
432 )
434 )
433
435
434
436
435 S_BLOCK = struct.Struct(">" + ("l" * 16))
437 S_BLOCK = struct.Struct(">" + ("l" * 16))
436
438
437 NO_ENTRY = -1
439 NO_ENTRY = -1
438 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
440 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
439 REV_OFFSET = 2
441 REV_OFFSET = 2
440
442
441
443
442 def _transform_rev(rev):
444 def _transform_rev(rev):
443 """Return the number used to represent the rev in the tree.
445 """Return the number used to represent the rev in the tree.
444
446
445 (or retrieve a rev number from such representation)
447 (or retrieve a rev number from such representation)
446
448
447 Note that this is an involution, a function equal to its inverse (i.e.
449 Note that this is an involution, a function equal to its inverse (i.e.
448 which gives the identity when applied to itself).
450 which gives the identity when applied to itself).
449 """
451 """
450 return -(rev + REV_OFFSET)
452 return -(rev + REV_OFFSET)
451
453
452
454
453 def _to_int(hex_digit):
455 def _to_int(hex_digit):
454 """turn an hexadecimal digit into a proper integer"""
456 """turn an hexadecimal digit into a proper integer"""
455 return int(hex_digit, 16)
457 return int(hex_digit, 16)
456
458
457
459
458 class Block(dict):
460 class Block(dict):
459 """represent a block of the Trie
461 """represent a block of the Trie
460
462
461 contains up to 16 entry indexed from 0 to 15"""
463 contains up to 16 entry indexed from 0 to 15"""
462
464
463 def __init__(self):
465 def __init__(self):
464 super(Block, self).__init__()
466 super(Block, self).__init__()
465 # If this block exist on disk, here is its ID
467 # If this block exist on disk, here is its ID
466 self.ondisk_id = None
468 self.ondisk_id = None
467
469
468 def __iter__(self):
470 def __iter__(self):
469 return iter(self.get(i) for i in range(16))
471 return iter(self.get(i) for i in range(16))
470
472
471
473
472 def _build_trie(index):
474 def _build_trie(index):
473 """build a nodemap trie
475 """build a nodemap trie
474
476
475 The nodemap stores revision number for each unique prefix.
477 The nodemap stores revision number for each unique prefix.
476
478
477 Each block is a dictionary with keys in `[0, 15]`. Values are either
479 Each block is a dictionary with keys in `[0, 15]`. Values are either
478 another block or a revision number.
480 another block or a revision number.
479 """
481 """
480 root = Block()
482 root = Block()
481 for rev in range(len(index)):
483 for rev in range(len(index)):
482 current_hex = hex(index[rev][7])
484 current_hex = hex(index[rev][7])
483 _insert_into_block(index, 0, root, rev, current_hex)
485 _insert_into_block(index, 0, root, rev, current_hex)
484 return root
486 return root
485
487
486
488
487 def _update_trie(index, root, last_rev):
489 def _update_trie(index, root, last_rev):
488 """consume"""
490 """consume"""
489 changed = 0
491 changed = 0
490 for rev in range(last_rev + 1, len(index)):
492 for rev in range(last_rev + 1, len(index)):
491 current_hex = hex(index[rev][7])
493 current_hex = hex(index[rev][7])
492 changed += _insert_into_block(index, 0, root, rev, current_hex)
494 changed += _insert_into_block(index, 0, root, rev, current_hex)
493 return changed, root
495 return changed, root
494
496
495
497
496 def _insert_into_block(index, level, block, current_rev, current_hex):
498 def _insert_into_block(index, level, block, current_rev, current_hex):
497 """insert a new revision in a block
499 """insert a new revision in a block
498
500
499 index: the index we are adding revision for
501 index: the index we are adding revision for
500 level: the depth of the current block in the trie
502 level: the depth of the current block in the trie
501 block: the block currently being considered
503 block: the block currently being considered
502 current_rev: the revision number we are adding
504 current_rev: the revision number we are adding
503 current_hex: the hexadecimal representation of the of that revision
505 current_hex: the hexadecimal representation of the of that revision
504 """
506 """
505 changed = 1
507 changed = 1
506 if block.ondisk_id is not None:
508 if block.ondisk_id is not None:
507 block.ondisk_id = None
509 block.ondisk_id = None
508 hex_digit = _to_int(current_hex[level : level + 1])
510 hex_digit = _to_int(current_hex[level : level + 1])
509 entry = block.get(hex_digit)
511 entry = block.get(hex_digit)
510 if entry is None:
512 if entry is None:
511 # no entry, simply store the revision number
513 # no entry, simply store the revision number
512 block[hex_digit] = current_rev
514 block[hex_digit] = current_rev
513 elif isinstance(entry, dict):
515 elif isinstance(entry, dict):
514 # need to recurse to an underlying block
516 # need to recurse to an underlying block
515 changed += _insert_into_block(
517 changed += _insert_into_block(
516 index, level + 1, entry, current_rev, current_hex
518 index, level + 1, entry, current_rev, current_hex
517 )
519 )
518 else:
520 else:
519 # collision with a previously unique prefix, inserting new
521 # collision with a previously unique prefix, inserting new
520 # vertices to fit both entry.
522 # vertices to fit both entry.
521 other_hex = hex(index[entry][7])
523 other_hex = hex(index[entry][7])
522 other_rev = entry
524 other_rev = entry
523 new = Block()
525 new = Block()
524 block[hex_digit] = new
526 block[hex_digit] = new
525 _insert_into_block(index, level + 1, new, other_rev, other_hex)
527 _insert_into_block(index, level + 1, new, other_rev, other_hex)
526 _insert_into_block(index, level + 1, new, current_rev, current_hex)
528 _insert_into_block(index, level + 1, new, current_rev, current_hex)
527 return changed
529 return changed
528
530
529
531
530 def _persist_trie(root, existing_idx=None):
532 def _persist_trie(root, existing_idx=None):
531 """turn a nodemap trie into persistent binary data
533 """turn a nodemap trie into persistent binary data
532
534
533 See `_build_trie` for nodemap trie structure"""
535 See `_build_trie` for nodemap trie structure"""
534 block_map = {}
536 block_map = {}
535 if existing_idx is not None:
537 if existing_idx is not None:
536 base_idx = existing_idx + 1
538 base_idx = existing_idx + 1
537 else:
539 else:
538 base_idx = 0
540 base_idx = 0
539 chunks = []
541 chunks = []
540 for tn in _walk_trie(root):
542 for tn in _walk_trie(root):
541 if tn.ondisk_id is not None:
543 if tn.ondisk_id is not None:
542 block_map[id(tn)] = tn.ondisk_id
544 block_map[id(tn)] = tn.ondisk_id
543 else:
545 else:
544 block_map[id(tn)] = len(chunks) + base_idx
546 block_map[id(tn)] = len(chunks) + base_idx
545 chunks.append(_persist_block(tn, block_map))
547 chunks.append(_persist_block(tn, block_map))
546 return b''.join(chunks)
548 return b''.join(chunks)
547
549
548
550
549 def _walk_trie(block):
551 def _walk_trie(block):
550 """yield all the block in a trie
552 """yield all the block in a trie
551
553
552 Children blocks are always yield before their parent block.
554 Children blocks are always yield before their parent block.
553 """
555 """
554 for (__, item) in sorted(block.items()):
556 for (__, item) in sorted(block.items()):
555 if isinstance(item, dict):
557 if isinstance(item, dict):
556 for sub_block in _walk_trie(item):
558 for sub_block in _walk_trie(item):
557 yield sub_block
559 yield sub_block
558 yield block
560 yield block
559
561
560
562
561 def _persist_block(block_node, block_map):
563 def _persist_block(block_node, block_map):
562 """produce persistent binary data for a single block
564 """produce persistent binary data for a single block
563
565
564 Children block are assumed to be already persisted and present in
566 Children block are assumed to be already persisted and present in
565 block_map.
567 block_map.
566 """
568 """
567 data = tuple(_to_value(v, block_map) for v in block_node)
569 data = tuple(_to_value(v, block_map) for v in block_node)
568 return S_BLOCK.pack(*data)
570 return S_BLOCK.pack(*data)
569
571
570
572
571 def _to_value(item, block_map):
573 def _to_value(item, block_map):
572 """persist any value as an integer"""
574 """persist any value as an integer"""
573 if item is None:
575 if item is None:
574 return NO_ENTRY
576 return NO_ENTRY
575 elif isinstance(item, dict):
577 elif isinstance(item, dict):
576 return block_map[id(item)]
578 return block_map[id(item)]
577 else:
579 else:
578 return _transform_rev(item)
580 return _transform_rev(item)
579
581
580
582
581 def parse_data(data):
583 def parse_data(data):
582 """parse parse nodemap data into a nodemap Trie"""
584 """parse parse nodemap data into a nodemap Trie"""
583 if (len(data) % S_BLOCK.size) != 0:
585 if (len(data) % S_BLOCK.size) != 0:
584 msg = b"nodemap data size is not a multiple of block size (%d): %d"
586 msg = b"nodemap data size is not a multiple of block size (%d): %d"
585 raise error.Abort(msg % (S_BLOCK.size, len(data)))
587 raise error.Abort(msg % (S_BLOCK.size, len(data)))
586 if not data:
588 if not data:
587 return Block(), None
589 return Block(), None
588 block_map = {}
590 block_map = {}
589 new_blocks = []
591 new_blocks = []
590 for i in range(0, len(data), S_BLOCK.size):
592 for i in range(0, len(data), S_BLOCK.size):
591 block = Block()
593 block = Block()
592 block.ondisk_id = len(block_map)
594 block.ondisk_id = len(block_map)
593 block_map[block.ondisk_id] = block
595 block_map[block.ondisk_id] = block
594 block_data = data[i : i + S_BLOCK.size]
596 block_data = data[i : i + S_BLOCK.size]
595 values = S_BLOCK.unpack(block_data)
597 values = S_BLOCK.unpack(block_data)
596 new_blocks.append((block, values))
598 new_blocks.append((block, values))
597 for b, values in new_blocks:
599 for b, values in new_blocks:
598 for idx, v in enumerate(values):
600 for idx, v in enumerate(values):
599 if v == NO_ENTRY:
601 if v == NO_ENTRY:
600 continue
602 continue
601 elif v >= 0:
603 elif v >= 0:
602 b[idx] = block_map[v]
604 b[idx] = block_map[v]
603 else:
605 else:
604 b[idx] = _transform_rev(v)
606 b[idx] = _transform_rev(v)
605 return block, i // S_BLOCK.size
607 return block, i // S_BLOCK.size
606
608
607
609
608 # debug utility
610 # debug utility
609
611
610
612
611 def check_data(ui, index, data):
613 def check_data(ui, index, data):
612 """verify that the provided nodemap data are valid for the given idex"""
614 """verify that the provided nodemap data are valid for the given idex"""
613 ret = 0
615 ret = 0
614 ui.status((b"revisions in index: %d\n") % len(index))
616 ui.status((b"revisions in index: %d\n") % len(index))
615 root, __ = parse_data(data)
617 root, __ = parse_data(data)
616 all_revs = set(_all_revisions(root))
618 all_revs = set(_all_revisions(root))
617 ui.status((b"revisions in nodemap: %d\n") % len(all_revs))
619 ui.status((b"revisions in nodemap: %d\n") % len(all_revs))
618 for r in range(len(index)):
620 for r in range(len(index)):
619 if r not in all_revs:
621 if r not in all_revs:
620 msg = b" revision missing from nodemap: %d\n" % r
622 msg = b" revision missing from nodemap: %d\n" % r
621 ui.write_err(msg)
623 ui.write_err(msg)
622 ret = 1
624 ret = 1
623 else:
625 else:
624 all_revs.remove(r)
626 all_revs.remove(r)
625 nm_rev = _find_node(root, hex(index[r][7]))
627 nm_rev = _find_node(root, hex(index[r][7]))
626 if nm_rev is None:
628 if nm_rev is None:
627 msg = b" revision node does not match any entries: %d\n" % r
629 msg = b" revision node does not match any entries: %d\n" % r
628 ui.write_err(msg)
630 ui.write_err(msg)
629 ret = 1
631 ret = 1
630 elif nm_rev != r:
632 elif nm_rev != r:
631 msg = (
633 msg = (
632 b" revision node does not match the expected revision: "
634 b" revision node does not match the expected revision: "
633 b"%d != %d\n" % (r, nm_rev)
635 b"%d != %d\n" % (r, nm_rev)
634 )
636 )
635 ui.write_err(msg)
637 ui.write_err(msg)
636 ret = 1
638 ret = 1
637
639
638 if all_revs:
640 if all_revs:
639 for r in sorted(all_revs):
641 for r in sorted(all_revs):
640 msg = b" extra revisions in nodemap: %d\n" % r
642 msg = b" extra revisions in nodemap: %d\n" % r
641 ui.write_err(msg)
643 ui.write_err(msg)
642 ret = 1
644 ret = 1
643 return ret
645 return ret
644
646
645
647
646 def _all_revisions(root):
648 def _all_revisions(root):
647 """return all revisions stored in a Trie"""
649 """return all revisions stored in a Trie"""
648 for block in _walk_trie(root):
650 for block in _walk_trie(root):
649 for v in block:
651 for v in block:
650 if v is None or isinstance(v, Block):
652 if v is None or isinstance(v, Block):
651 continue
653 continue
652 yield v
654 yield v
653
655
654
656
655 def _find_node(block, node):
657 def _find_node(block, node):
656 """find the revision associated with a given node"""
658 """find the revision associated with a given node"""
657 entry = block.get(_to_int(node[0:1]))
659 entry = block.get(_to_int(node[0:1]))
658 if isinstance(entry, dict):
660 if isinstance(entry, dict):
659 return _find_node(entry, node[1:])
661 return _find_node(entry, node[1:])
660 return entry
662 return entry
661
663
662
664
663 def get_nodemap_file(revlog):
665 def get_nodemap_file(revlog):
664 if revlog._trypending:
666 if revlog._trypending:
665 pending_path = revlog.radix + b".n.a"
667 pending_path = revlog.radix + b".n.a"
666 if revlog.opener.exists(pending_path):
668 if revlog.opener.exists(pending_path):
667 return pending_path
669 return pending_path
668 return revlog.radix + b".n"
670 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now