##// END OF EJS Templates
downgrade: don't assume existence of nodemap files when downgrading...
Raphaël Gomès -
r49253:897aa0c4 stable
parent child Browse files
Show More
@@ -1,657 +1,657 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import re
12 import re
13 import struct
13 import struct
14
14
15 from ..node import hex
15 from ..node import hex
16
16
17 from .. import (
17 from .. import (
18 error,
18 error,
19 util,
19 util,
20 )
20 )
21 from . import docket as docket_mod
21 from . import docket as docket_mod
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def test_race_hook_1():
29 def test_race_hook_1():
30 """hook point for test
30 """hook point for test
31
31
32 This let tests to have things happens between the docket reading and the
32 This let tests to have things happens between the docket reading and the
33 data reading"""
33 data reading"""
34 pass
34 pass
35
35
36
36
37 def persisted_data(revlog):
37 def persisted_data(revlog):
38 """read the nodemap for a revlog from disk"""
38 """read the nodemap for a revlog from disk"""
39 if revlog._nodemap_file is None:
39 if revlog._nodemap_file is None:
40 return None
40 return None
41 pdata = revlog.opener.tryread(revlog._nodemap_file)
41 pdata = revlog.opener.tryread(revlog._nodemap_file)
42 if not pdata:
42 if not pdata:
43 return None
43 return None
44 offset = 0
44 offset = 0
45 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
45 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
46 if version != ONDISK_VERSION:
46 if version != ONDISK_VERSION:
47 return None
47 return None
48 offset += S_VERSION.size
48 offset += S_VERSION.size
49 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
49 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
50 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
50 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
51 offset += S_HEADER.size
51 offset += S_HEADER.size
52 docket = NodeMapDocket(pdata[offset : offset + uid_size])
52 docket = NodeMapDocket(pdata[offset : offset + uid_size])
53 offset += uid_size
53 offset += uid_size
54 docket.tip_rev = tip_rev
54 docket.tip_rev = tip_rev
55 docket.tip_node = pdata[offset : offset + tip_node_size]
55 docket.tip_node = pdata[offset : offset + tip_node_size]
56 docket.data_length = data_length
56 docket.data_length = data_length
57 docket.data_unused = data_unused
57 docket.data_unused = data_unused
58
58
59 filename = _rawdata_filepath(revlog, docket)
59 filename = _rawdata_filepath(revlog, docket)
60 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
60 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
61
61
62 test_race_hook_1()
62 test_race_hook_1()
63 try:
63 try:
64 with revlog.opener(filename) as fd:
64 with revlog.opener(filename) as fd:
65 if use_mmap:
65 if use_mmap:
66 try:
66 try:
67 data = util.buffer(util.mmapread(fd, data_length))
67 data = util.buffer(util.mmapread(fd, data_length))
68 except ValueError:
68 except ValueError:
69 # raised when the read file is too small
69 # raised when the read file is too small
70 data = b''
70 data = b''
71 else:
71 else:
72 data = fd.read(data_length)
72 data = fd.read(data_length)
73 except (IOError, OSError) as e:
73 except (IOError, OSError) as e:
74 if e.errno == errno.ENOENT:
74 if e.errno == errno.ENOENT:
75 return None
75 return None
76 else:
76 else:
77 raise
77 raise
78 if len(data) < data_length:
78 if len(data) < data_length:
79 return None
79 return None
80 return docket, data
80 return docket, data
81
81
82
82
83 def setup_persistent_nodemap(tr, revlog):
83 def setup_persistent_nodemap(tr, revlog):
84 """Install whatever is needed transaction side to persist a nodemap on disk
84 """Install whatever is needed transaction side to persist a nodemap on disk
85
85
86 (only actually persist the nodemap if this is relevant for this revlog)
86 (only actually persist the nodemap if this is relevant for this revlog)
87 """
87 """
88 if revlog._inline:
88 if revlog._inline:
89 return # inlined revlog are too small for this to be relevant
89 return # inlined revlog are too small for this to be relevant
90 if revlog._nodemap_file is None:
90 if revlog._nodemap_file is None:
91 return # we do not use persistent_nodemap on this revlog
91 return # we do not use persistent_nodemap on this revlog
92
92
93 # we need to happen after the changelog finalization, in that use "cl-"
93 # we need to happen after the changelog finalization, in that use "cl-"
94 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
94 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
95 if tr.hasfinalize(callback_id):
95 if tr.hasfinalize(callback_id):
96 return # no need to register again
96 return # no need to register again
97 tr.addpending(
97 tr.addpending(
98 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
98 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
99 )
99 )
100 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
100 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
101
101
102
102
103 class _NoTransaction(object):
103 class _NoTransaction(object):
104 """transaction like object to update the nodemap outside a transaction"""
104 """transaction like object to update the nodemap outside a transaction"""
105
105
106 def __init__(self):
106 def __init__(self):
107 self._postclose = {}
107 self._postclose = {}
108
108
109 def addpostclose(self, callback_id, callback_func):
109 def addpostclose(self, callback_id, callback_func):
110 self._postclose[callback_id] = callback_func
110 self._postclose[callback_id] = callback_func
111
111
112 def registertmp(self, *args, **kwargs):
112 def registertmp(self, *args, **kwargs):
113 pass
113 pass
114
114
115 def addbackup(self, *args, **kwargs):
115 def addbackup(self, *args, **kwargs):
116 pass
116 pass
117
117
118 def add(self, *args, **kwargs):
118 def add(self, *args, **kwargs):
119 pass
119 pass
120
120
121 def addabort(self, *args, **kwargs):
121 def addabort(self, *args, **kwargs):
122 pass
122 pass
123
123
124 def _report(self, *args):
124 def _report(self, *args):
125 pass
125 pass
126
126
127
127
128 def update_persistent_nodemap(revlog):
128 def update_persistent_nodemap(revlog):
129 """update the persistent nodemap right now
129 """update the persistent nodemap right now
130
130
131 To be used for updating the nodemap on disk outside of a normal transaction
131 To be used for updating the nodemap on disk outside of a normal transaction
132 setup (eg, `debugupdatecache`).
132 setup (eg, `debugupdatecache`).
133 """
133 """
134 if revlog._inline:
134 if revlog._inline:
135 return # inlined revlog are too small for this to be relevant
135 return # inlined revlog are too small for this to be relevant
136 if revlog._nodemap_file is None:
136 if revlog._nodemap_file is None:
137 return # we do not use persistent_nodemap on this revlog
137 return # we do not use persistent_nodemap on this revlog
138
138
139 notr = _NoTransaction()
139 notr = _NoTransaction()
140 persist_nodemap(notr, revlog)
140 persist_nodemap(notr, revlog)
141 for k in sorted(notr._postclose):
141 for k in sorted(notr._postclose):
142 notr._postclose[k](None)
142 notr._postclose[k](None)
143
143
144
144
145 def delete_nodemap(tr, repo, revlog):
145 def delete_nodemap(tr, repo, revlog):
146 """Delete nodemap data on disk for a given revlog"""
146 """Delete nodemap data on disk for a given revlog"""
147 if revlog._nodemap_file is None:
147 if revlog._nodemap_file is None:
148 msg = "calling persist nodemap on a revlog without the feature enabled"
148 msg = "calling persist nodemap on a revlog without the feature enabled"
149 raise error.ProgrammingError(msg)
149 raise error.ProgrammingError(msg)
150 repo.svfs.unlink(revlog._nodemap_file)
150 repo.svfs.tryunlink(revlog._nodemap_file)
151
151
152
152
153 def persist_nodemap(tr, revlog, pending=False, force=False):
153 def persist_nodemap(tr, revlog, pending=False, force=False):
154 """Write nodemap data on disk for a given revlog"""
154 """Write nodemap data on disk for a given revlog"""
155 if getattr(revlog, 'filteredrevs', ()):
155 if getattr(revlog, 'filteredrevs', ()):
156 raise error.ProgrammingError(
156 raise error.ProgrammingError(
157 "cannot persist nodemap of a filtered changelog"
157 "cannot persist nodemap of a filtered changelog"
158 )
158 )
159 if revlog._nodemap_file is None:
159 if revlog._nodemap_file is None:
160 if force:
160 if force:
161 revlog._nodemap_file = get_nodemap_file(revlog)
161 revlog._nodemap_file = get_nodemap_file(revlog)
162 else:
162 else:
163 msg = "calling persist nodemap on a revlog without the feature enabled"
163 msg = "calling persist nodemap on a revlog without the feature enabled"
164 raise error.ProgrammingError(msg)
164 raise error.ProgrammingError(msg)
165
165
166 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
166 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
167 ondisk_docket = revlog._nodemap_docket
167 ondisk_docket = revlog._nodemap_docket
168 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
168 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
169 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
169 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
170
170
171 data = None
171 data = None
172 # first attemp an incremental update of the data
172 # first attemp an incremental update of the data
173 if can_incremental and ondisk_docket is not None:
173 if can_incremental and ondisk_docket is not None:
174 target_docket = revlog._nodemap_docket.copy()
174 target_docket = revlog._nodemap_docket.copy()
175 (
175 (
176 src_docket,
176 src_docket,
177 data_changed_count,
177 data_changed_count,
178 data,
178 data,
179 ) = revlog.index.nodemap_data_incremental()
179 ) = revlog.index.nodemap_data_incremental()
180 new_length = target_docket.data_length + len(data)
180 new_length = target_docket.data_length + len(data)
181 new_unused = target_docket.data_unused + data_changed_count
181 new_unused = target_docket.data_unused + data_changed_count
182 if src_docket != target_docket:
182 if src_docket != target_docket:
183 data = None
183 data = None
184 elif new_length <= (new_unused * 10): # under 10% of unused data
184 elif new_length <= (new_unused * 10): # under 10% of unused data
185 data = None
185 data = None
186 else:
186 else:
187 datafile = _rawdata_filepath(revlog, target_docket)
187 datafile = _rawdata_filepath(revlog, target_docket)
188 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
188 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
189 # store vfs
189 # store vfs
190 tr.add(datafile, target_docket.data_length)
190 tr.add(datafile, target_docket.data_length)
191 with revlog.opener(datafile, b'r+') as fd:
191 with revlog.opener(datafile, b'r+') as fd:
192 fd.seek(target_docket.data_length)
192 fd.seek(target_docket.data_length)
193 fd.write(data)
193 fd.write(data)
194 if feed_data:
194 if feed_data:
195 if use_mmap:
195 if use_mmap:
196 fd.seek(0)
196 fd.seek(0)
197 new_data = fd.read(new_length)
197 new_data = fd.read(new_length)
198 else:
198 else:
199 fd.flush()
199 fd.flush()
200 new_data = util.buffer(util.mmapread(fd, new_length))
200 new_data = util.buffer(util.mmapread(fd, new_length))
201 target_docket.data_length = new_length
201 target_docket.data_length = new_length
202 target_docket.data_unused = new_unused
202 target_docket.data_unused = new_unused
203
203
204 if data is None:
204 if data is None:
205 # otherwise fallback to a full new export
205 # otherwise fallback to a full new export
206 target_docket = NodeMapDocket()
206 target_docket = NodeMapDocket()
207 datafile = _rawdata_filepath(revlog, target_docket)
207 datafile = _rawdata_filepath(revlog, target_docket)
208 if util.safehasattr(revlog.index, "nodemap_data_all"):
208 if util.safehasattr(revlog.index, "nodemap_data_all"):
209 data = revlog.index.nodemap_data_all()
209 data = revlog.index.nodemap_data_all()
210 else:
210 else:
211 data = persistent_data(revlog.index)
211 data = persistent_data(revlog.index)
212 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
212 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
213 # store vfs
213 # store vfs
214
214
215 tryunlink = revlog.opener.tryunlink
215 tryunlink = revlog.opener.tryunlink
216
216
217 def abortck(tr):
217 def abortck(tr):
218 tryunlink(datafile)
218 tryunlink(datafile)
219
219
220 callback_id = b"delete-%s" % datafile
220 callback_id = b"delete-%s" % datafile
221
221
222 # some flavor of the transaction abort does not cleanup new file, it
222 # some flavor of the transaction abort does not cleanup new file, it
223 # simply empty them.
223 # simply empty them.
224 tr.addabort(callback_id, abortck)
224 tr.addabort(callback_id, abortck)
225 with revlog.opener(datafile, b'w+') as fd:
225 with revlog.opener(datafile, b'w+') as fd:
226 fd.write(data)
226 fd.write(data)
227 if feed_data:
227 if feed_data:
228 if use_mmap:
228 if use_mmap:
229 new_data = data
229 new_data = data
230 else:
230 else:
231 fd.flush()
231 fd.flush()
232 new_data = util.buffer(util.mmapread(fd, len(data)))
232 new_data = util.buffer(util.mmapread(fd, len(data)))
233 target_docket.data_length = len(data)
233 target_docket.data_length = len(data)
234 target_docket.tip_rev = revlog.tiprev()
234 target_docket.tip_rev = revlog.tiprev()
235 target_docket.tip_node = revlog.node(target_docket.tip_rev)
235 target_docket.tip_node = revlog.node(target_docket.tip_rev)
236 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
236 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
237 # store vfs
237 # store vfs
238 file_path = revlog._nodemap_file
238 file_path = revlog._nodemap_file
239 if pending:
239 if pending:
240 file_path += b'.a'
240 file_path += b'.a'
241 tr.registertmp(file_path)
241 tr.registertmp(file_path)
242 else:
242 else:
243 tr.addbackup(file_path)
243 tr.addbackup(file_path)
244
244
245 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
245 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
246 fp.write(target_docket.serialize())
246 fp.write(target_docket.serialize())
247 revlog._nodemap_docket = target_docket
247 revlog._nodemap_docket = target_docket
248 if feed_data:
248 if feed_data:
249 revlog.index.update_nodemap_data(target_docket, new_data)
249 revlog.index.update_nodemap_data(target_docket, new_data)
250
250
251 # search for old index file in all cases, some older process might have
251 # search for old index file in all cases, some older process might have
252 # left one behind.
252 # left one behind.
253 olds = _other_rawdata_filepath(revlog, target_docket)
253 olds = _other_rawdata_filepath(revlog, target_docket)
254 if olds:
254 if olds:
255 realvfs = getattr(revlog, '_realopener', revlog.opener)
255 realvfs = getattr(revlog, '_realopener', revlog.opener)
256
256
257 def cleanup(tr):
257 def cleanup(tr):
258 for oldfile in olds:
258 for oldfile in olds:
259 realvfs.tryunlink(oldfile)
259 realvfs.tryunlink(oldfile)
260
260
261 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
261 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
262 tr.addpostclose(callback_id, cleanup)
262 tr.addpostclose(callback_id, cleanup)
263
263
264
264
265 ### Nodemap docket file
265 ### Nodemap docket file
266 #
266 #
267 # The nodemap data are stored on disk using 2 files:
267 # The nodemap data are stored on disk using 2 files:
268 #
268 #
269 # * a raw data files containing a persistent nodemap
269 # * a raw data files containing a persistent nodemap
270 # (see `Nodemap Trie` section)
270 # (see `Nodemap Trie` section)
271 #
271 #
272 # * a small "docket" file containing medatadata
272 # * a small "docket" file containing medatadata
273 #
273 #
274 # While the nodemap data can be multiple tens of megabytes, the "docket" is
274 # While the nodemap data can be multiple tens of megabytes, the "docket" is
275 # small, it is easy to update it automatically or to duplicated its content
275 # small, it is easy to update it automatically or to duplicated its content
276 # during a transaction.
276 # during a transaction.
277 #
277 #
278 # Multiple raw data can exist at the same time (The currently valid one and a
278 # Multiple raw data can exist at the same time (The currently valid one and a
279 # new one beind used by an in progress transaction). To accomodate this, the
279 # new one beind used by an in progress transaction). To accomodate this, the
280 # filename hosting the raw data has a variable parts. The exact filename is
280 # filename hosting the raw data has a variable parts. The exact filename is
281 # specified inside the "docket" file.
281 # specified inside the "docket" file.
282 #
282 #
283 # The docket file contains information to find, qualify and validate the raw
283 # The docket file contains information to find, qualify and validate the raw
284 # data. Its content is currently very light, but it will expand as the on disk
284 # data. Its content is currently very light, but it will expand as the on disk
285 # nodemap gains the necessary features to be used in production.
285 # nodemap gains the necessary features to be used in production.
286
286
287 ONDISK_VERSION = 1
287 ONDISK_VERSION = 1
288 S_VERSION = struct.Struct(">B")
288 S_VERSION = struct.Struct(">B")
289 S_HEADER = struct.Struct(">BQQQQ")
289 S_HEADER = struct.Struct(">BQQQQ")
290
290
291
291
292 class NodeMapDocket(object):
292 class NodeMapDocket(object):
293 """metadata associated with persistent nodemap data
293 """metadata associated with persistent nodemap data
294
294
295 The persistent data may come from disk or be on their way to disk.
295 The persistent data may come from disk or be on their way to disk.
296 """
296 """
297
297
298 def __init__(self, uid=None):
298 def __init__(self, uid=None):
299 if uid is None:
299 if uid is None:
300 uid = docket_mod.make_uid()
300 uid = docket_mod.make_uid()
301 # a unique identifier for the data file:
301 # a unique identifier for the data file:
302 # - When new data are appended, it is preserved.
302 # - When new data are appended, it is preserved.
303 # - When a new data file is created, a new identifier is generated.
303 # - When a new data file is created, a new identifier is generated.
304 self.uid = uid
304 self.uid = uid
305 # the tipmost revision stored in the data file. This revision and all
305 # the tipmost revision stored in the data file. This revision and all
306 # revision before it are expected to be encoded in the data file.
306 # revision before it are expected to be encoded in the data file.
307 self.tip_rev = None
307 self.tip_rev = None
308 # the node of that tipmost revision, if it mismatch the current index
308 # the node of that tipmost revision, if it mismatch the current index
309 # data the docket is not valid for the current index and should be
309 # data the docket is not valid for the current index and should be
310 # discarded.
310 # discarded.
311 #
311 #
312 # note: this method is not perfect as some destructive operation could
312 # note: this method is not perfect as some destructive operation could
313 # preserve the same tip_rev + tip_node while altering lower revision.
313 # preserve the same tip_rev + tip_node while altering lower revision.
314 # However this multiple other caches have the same vulnerability (eg:
314 # However this multiple other caches have the same vulnerability (eg:
315 # brancmap cache).
315 # brancmap cache).
316 self.tip_node = None
316 self.tip_node = None
317 # the size (in bytes) of the persisted data to encode the nodemap valid
317 # the size (in bytes) of the persisted data to encode the nodemap valid
318 # for `tip_rev`.
318 # for `tip_rev`.
319 # - data file shorter than this are corrupted,
319 # - data file shorter than this are corrupted,
320 # - any extra data should be ignored.
320 # - any extra data should be ignored.
321 self.data_length = None
321 self.data_length = None
322 # the amount (in bytes) of "dead" data, still in the data file but no
322 # the amount (in bytes) of "dead" data, still in the data file but no
323 # longer used for the nodemap.
323 # longer used for the nodemap.
324 self.data_unused = 0
324 self.data_unused = 0
325
325
326 def copy(self):
326 def copy(self):
327 new = NodeMapDocket(uid=self.uid)
327 new = NodeMapDocket(uid=self.uid)
328 new.tip_rev = self.tip_rev
328 new.tip_rev = self.tip_rev
329 new.tip_node = self.tip_node
329 new.tip_node = self.tip_node
330 new.data_length = self.data_length
330 new.data_length = self.data_length
331 new.data_unused = self.data_unused
331 new.data_unused = self.data_unused
332 return new
332 return new
333
333
334 def __cmp__(self, other):
334 def __cmp__(self, other):
335 if self.uid < other.uid:
335 if self.uid < other.uid:
336 return -1
336 return -1
337 if self.uid > other.uid:
337 if self.uid > other.uid:
338 return 1
338 return 1
339 elif self.data_length < other.data_length:
339 elif self.data_length < other.data_length:
340 return -1
340 return -1
341 elif self.data_length > other.data_length:
341 elif self.data_length > other.data_length:
342 return 1
342 return 1
343 return 0
343 return 0
344
344
345 def __eq__(self, other):
345 def __eq__(self, other):
346 return self.uid == other.uid and self.data_length == other.data_length
346 return self.uid == other.uid and self.data_length == other.data_length
347
347
348 def serialize(self):
348 def serialize(self):
349 """return serialized bytes for a docket using the passed uid"""
349 """return serialized bytes for a docket using the passed uid"""
350 data = []
350 data = []
351 data.append(S_VERSION.pack(ONDISK_VERSION))
351 data.append(S_VERSION.pack(ONDISK_VERSION))
352 headers = (
352 headers = (
353 len(self.uid),
353 len(self.uid),
354 self.tip_rev,
354 self.tip_rev,
355 self.data_length,
355 self.data_length,
356 self.data_unused,
356 self.data_unused,
357 len(self.tip_node),
357 len(self.tip_node),
358 )
358 )
359 data.append(S_HEADER.pack(*headers))
359 data.append(S_HEADER.pack(*headers))
360 data.append(self.uid)
360 data.append(self.uid)
361 data.append(self.tip_node)
361 data.append(self.tip_node)
362 return b''.join(data)
362 return b''.join(data)
363
363
364
364
365 def _rawdata_filepath(revlog, docket):
365 def _rawdata_filepath(revlog, docket):
366 """The (vfs relative) nodemap's rawdata file for a given uid"""
366 """The (vfs relative) nodemap's rawdata file for a given uid"""
367 prefix = revlog.radix
367 prefix = revlog.radix
368 return b"%s-%s.nd" % (prefix, docket.uid)
368 return b"%s-%s.nd" % (prefix, docket.uid)
369
369
370
370
371 def _other_rawdata_filepath(revlog, docket):
371 def _other_rawdata_filepath(revlog, docket):
372 prefix = revlog.radix
372 prefix = revlog.radix
373 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
373 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
374 new_file_path = _rawdata_filepath(revlog, docket)
374 new_file_path = _rawdata_filepath(revlog, docket)
375 new_file_name = revlog.opener.basename(new_file_path)
375 new_file_name = revlog.opener.basename(new_file_path)
376 dirpath = revlog.opener.dirname(new_file_path)
376 dirpath = revlog.opener.dirname(new_file_path)
377 others = []
377 others = []
378 for f in revlog.opener.listdir(dirpath):
378 for f in revlog.opener.listdir(dirpath):
379 if pattern.match(f) and f != new_file_name:
379 if pattern.match(f) and f != new_file_name:
380 others.append(f)
380 others.append(f)
381 return others
381 return others
382
382
383
383
384 ### Nodemap Trie
384 ### Nodemap Trie
385 #
385 #
386 # This is a simple reference implementation to compute and persist a nodemap
386 # This is a simple reference implementation to compute and persist a nodemap
387 # trie. This reference implementation is write only. The python version of this
387 # trie. This reference implementation is write only. The python version of this
388 # is not expected to be actually used, since it wont provide performance
388 # is not expected to be actually used, since it wont provide performance
389 # improvement over existing non-persistent C implementation.
389 # improvement over existing non-persistent C implementation.
390 #
390 #
391 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
391 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
392 # revision can be adressed using its node shortest prefix.
392 # revision can be adressed using its node shortest prefix.
393 #
393 #
394 # The trie is stored as a sequence of block. Each block contains 16 entries
394 # The trie is stored as a sequence of block. Each block contains 16 entries
395 # (signed 64bit integer, big endian). Each entry can be one of the following:
395 # (signed 64bit integer, big endian). Each entry can be one of the following:
396 #
396 #
397 # * value >= 0 -> index of sub-block
397 # * value >= 0 -> index of sub-block
398 # * value == -1 -> no value
398 # * value == -1 -> no value
399 # * value < -1 -> encoded revision: rev = -(value+2)
399 # * value < -1 -> encoded revision: rev = -(value+2)
400 #
400 #
401 # See REV_OFFSET and _transform_rev below.
401 # See REV_OFFSET and _transform_rev below.
402 #
402 #
403 # The implementation focus on simplicity, not on performance. A Rust
403 # The implementation focus on simplicity, not on performance. A Rust
404 # implementation should provide a efficient version of the same binary
404 # implementation should provide a efficient version of the same binary
405 # persistence. This reference python implementation is never meant to be
405 # persistence. This reference python implementation is never meant to be
406 # extensively use in production.
406 # extensively use in production.
407
407
408
408
409 def persistent_data(index):
409 def persistent_data(index):
410 """return the persistent binary form for a nodemap for a given index"""
410 """return the persistent binary form for a nodemap for a given index"""
411 trie = _build_trie(index)
411 trie = _build_trie(index)
412 return _persist_trie(trie)
412 return _persist_trie(trie)
413
413
414
414
415 def update_persistent_data(index, root, max_idx, last_rev):
415 def update_persistent_data(index, root, max_idx, last_rev):
416 """return the incremental update for persistent nodemap from a given index"""
416 """return the incremental update for persistent nodemap from a given index"""
417 changed_block, trie = _update_trie(index, root, last_rev)
417 changed_block, trie = _update_trie(index, root, last_rev)
418 return (
418 return (
419 changed_block * S_BLOCK.size,
419 changed_block * S_BLOCK.size,
420 _persist_trie(trie, existing_idx=max_idx),
420 _persist_trie(trie, existing_idx=max_idx),
421 )
421 )
422
422
423
423
424 S_BLOCK = struct.Struct(">" + ("l" * 16))
424 S_BLOCK = struct.Struct(">" + ("l" * 16))
425
425
426 NO_ENTRY = -1
426 NO_ENTRY = -1
427 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
427 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
428 REV_OFFSET = 2
428 REV_OFFSET = 2
429
429
430
430
431 def _transform_rev(rev):
431 def _transform_rev(rev):
432 """Return the number used to represent the rev in the tree.
432 """Return the number used to represent the rev in the tree.
433
433
434 (or retrieve a rev number from such representation)
434 (or retrieve a rev number from such representation)
435
435
436 Note that this is an involution, a function equal to its inverse (i.e.
436 Note that this is an involution, a function equal to its inverse (i.e.
437 which gives the identity when applied to itself).
437 which gives the identity when applied to itself).
438 """
438 """
439 return -(rev + REV_OFFSET)
439 return -(rev + REV_OFFSET)
440
440
441
441
442 def _to_int(hex_digit):
442 def _to_int(hex_digit):
443 """turn an hexadecimal digit into a proper integer"""
443 """turn an hexadecimal digit into a proper integer"""
444 return int(hex_digit, 16)
444 return int(hex_digit, 16)
445
445
446
446
447 class Block(dict):
447 class Block(dict):
448 """represent a block of the Trie
448 """represent a block of the Trie
449
449
450 contains up to 16 entry indexed from 0 to 15"""
450 contains up to 16 entry indexed from 0 to 15"""
451
451
452 def __init__(self):
452 def __init__(self):
453 super(Block, self).__init__()
453 super(Block, self).__init__()
454 # If this block exist on disk, here is its ID
454 # If this block exist on disk, here is its ID
455 self.ondisk_id = None
455 self.ondisk_id = None
456
456
457 def __iter__(self):
457 def __iter__(self):
458 return iter(self.get(i) for i in range(16))
458 return iter(self.get(i) for i in range(16))
459
459
460
460
461 def _build_trie(index):
461 def _build_trie(index):
462 """build a nodemap trie
462 """build a nodemap trie
463
463
464 The nodemap stores revision number for each unique prefix.
464 The nodemap stores revision number for each unique prefix.
465
465
466 Each block is a dictionary with keys in `[0, 15]`. Values are either
466 Each block is a dictionary with keys in `[0, 15]`. Values are either
467 another block or a revision number.
467 another block or a revision number.
468 """
468 """
469 root = Block()
469 root = Block()
470 for rev in range(len(index)):
470 for rev in range(len(index)):
471 current_hex = hex(index[rev][7])
471 current_hex = hex(index[rev][7])
472 _insert_into_block(index, 0, root, rev, current_hex)
472 _insert_into_block(index, 0, root, rev, current_hex)
473 return root
473 return root
474
474
475
475
476 def _update_trie(index, root, last_rev):
476 def _update_trie(index, root, last_rev):
477 """consume"""
477 """consume"""
478 changed = 0
478 changed = 0
479 for rev in range(last_rev + 1, len(index)):
479 for rev in range(last_rev + 1, len(index)):
480 current_hex = hex(index[rev][7])
480 current_hex = hex(index[rev][7])
481 changed += _insert_into_block(index, 0, root, rev, current_hex)
481 changed += _insert_into_block(index, 0, root, rev, current_hex)
482 return changed, root
482 return changed, root
483
483
484
484
485 def _insert_into_block(index, level, block, current_rev, current_hex):
485 def _insert_into_block(index, level, block, current_rev, current_hex):
486 """insert a new revision in a block
486 """insert a new revision in a block
487
487
488 index: the index we are adding revision for
488 index: the index we are adding revision for
489 level: the depth of the current block in the trie
489 level: the depth of the current block in the trie
490 block: the block currently being considered
490 block: the block currently being considered
491 current_rev: the revision number we are adding
491 current_rev: the revision number we are adding
492 current_hex: the hexadecimal representation of the of that revision
492 current_hex: the hexadecimal representation of the of that revision
493 """
493 """
494 changed = 1
494 changed = 1
495 if block.ondisk_id is not None:
495 if block.ondisk_id is not None:
496 block.ondisk_id = None
496 block.ondisk_id = None
497 hex_digit = _to_int(current_hex[level : level + 1])
497 hex_digit = _to_int(current_hex[level : level + 1])
498 entry = block.get(hex_digit)
498 entry = block.get(hex_digit)
499 if entry is None:
499 if entry is None:
500 # no entry, simply store the revision number
500 # no entry, simply store the revision number
501 block[hex_digit] = current_rev
501 block[hex_digit] = current_rev
502 elif isinstance(entry, dict):
502 elif isinstance(entry, dict):
503 # need to recurse to an underlying block
503 # need to recurse to an underlying block
504 changed += _insert_into_block(
504 changed += _insert_into_block(
505 index, level + 1, entry, current_rev, current_hex
505 index, level + 1, entry, current_rev, current_hex
506 )
506 )
507 else:
507 else:
508 # collision with a previously unique prefix, inserting new
508 # collision with a previously unique prefix, inserting new
509 # vertices to fit both entry.
509 # vertices to fit both entry.
510 other_hex = hex(index[entry][7])
510 other_hex = hex(index[entry][7])
511 other_rev = entry
511 other_rev = entry
512 new = Block()
512 new = Block()
513 block[hex_digit] = new
513 block[hex_digit] = new
514 _insert_into_block(index, level + 1, new, other_rev, other_hex)
514 _insert_into_block(index, level + 1, new, other_rev, other_hex)
515 _insert_into_block(index, level + 1, new, current_rev, current_hex)
515 _insert_into_block(index, level + 1, new, current_rev, current_hex)
516 return changed
516 return changed
517
517
518
518
519 def _persist_trie(root, existing_idx=None):
519 def _persist_trie(root, existing_idx=None):
520 """turn a nodemap trie into persistent binary data
520 """turn a nodemap trie into persistent binary data
521
521
522 See `_build_trie` for nodemap trie structure"""
522 See `_build_trie` for nodemap trie structure"""
523 block_map = {}
523 block_map = {}
524 if existing_idx is not None:
524 if existing_idx is not None:
525 base_idx = existing_idx + 1
525 base_idx = existing_idx + 1
526 else:
526 else:
527 base_idx = 0
527 base_idx = 0
528 chunks = []
528 chunks = []
529 for tn in _walk_trie(root):
529 for tn in _walk_trie(root):
530 if tn.ondisk_id is not None:
530 if tn.ondisk_id is not None:
531 block_map[id(tn)] = tn.ondisk_id
531 block_map[id(tn)] = tn.ondisk_id
532 else:
532 else:
533 block_map[id(tn)] = len(chunks) + base_idx
533 block_map[id(tn)] = len(chunks) + base_idx
534 chunks.append(_persist_block(tn, block_map))
534 chunks.append(_persist_block(tn, block_map))
535 return b''.join(chunks)
535 return b''.join(chunks)
536
536
537
537
538 def _walk_trie(block):
538 def _walk_trie(block):
539 """yield all the block in a trie
539 """yield all the block in a trie
540
540
541 Children blocks are always yield before their parent block.
541 Children blocks are always yield before their parent block.
542 """
542 """
543 for (__, item) in sorted(block.items()):
543 for (__, item) in sorted(block.items()):
544 if isinstance(item, dict):
544 if isinstance(item, dict):
545 for sub_block in _walk_trie(item):
545 for sub_block in _walk_trie(item):
546 yield sub_block
546 yield sub_block
547 yield block
547 yield block
548
548
549
549
550 def _persist_block(block_node, block_map):
550 def _persist_block(block_node, block_map):
551 """produce persistent binary data for a single block
551 """produce persistent binary data for a single block
552
552
553 Children block are assumed to be already persisted and present in
553 Children block are assumed to be already persisted and present in
554 block_map.
554 block_map.
555 """
555 """
556 data = tuple(_to_value(v, block_map) for v in block_node)
556 data = tuple(_to_value(v, block_map) for v in block_node)
557 return S_BLOCK.pack(*data)
557 return S_BLOCK.pack(*data)
558
558
559
559
560 def _to_value(item, block_map):
560 def _to_value(item, block_map):
561 """persist any value as an integer"""
561 """persist any value as an integer"""
562 if item is None:
562 if item is None:
563 return NO_ENTRY
563 return NO_ENTRY
564 elif isinstance(item, dict):
564 elif isinstance(item, dict):
565 return block_map[id(item)]
565 return block_map[id(item)]
566 else:
566 else:
567 return _transform_rev(item)
567 return _transform_rev(item)
568
568
569
569
570 def parse_data(data):
570 def parse_data(data):
571 """parse parse nodemap data into a nodemap Trie"""
571 """parse parse nodemap data into a nodemap Trie"""
572 if (len(data) % S_BLOCK.size) != 0:
572 if (len(data) % S_BLOCK.size) != 0:
573 msg = b"nodemap data size is not a multiple of block size (%d): %d"
573 msg = b"nodemap data size is not a multiple of block size (%d): %d"
574 raise error.Abort(msg % (S_BLOCK.size, len(data)))
574 raise error.Abort(msg % (S_BLOCK.size, len(data)))
575 if not data:
575 if not data:
576 return Block(), None
576 return Block(), None
577 block_map = {}
577 block_map = {}
578 new_blocks = []
578 new_blocks = []
579 for i in range(0, len(data), S_BLOCK.size):
579 for i in range(0, len(data), S_BLOCK.size):
580 block = Block()
580 block = Block()
581 block.ondisk_id = len(block_map)
581 block.ondisk_id = len(block_map)
582 block_map[block.ondisk_id] = block
582 block_map[block.ondisk_id] = block
583 block_data = data[i : i + S_BLOCK.size]
583 block_data = data[i : i + S_BLOCK.size]
584 values = S_BLOCK.unpack(block_data)
584 values = S_BLOCK.unpack(block_data)
585 new_blocks.append((block, values))
585 new_blocks.append((block, values))
586 for b, values in new_blocks:
586 for b, values in new_blocks:
587 for idx, v in enumerate(values):
587 for idx, v in enumerate(values):
588 if v == NO_ENTRY:
588 if v == NO_ENTRY:
589 continue
589 continue
590 elif v >= 0:
590 elif v >= 0:
591 b[idx] = block_map[v]
591 b[idx] = block_map[v]
592 else:
592 else:
593 b[idx] = _transform_rev(v)
593 b[idx] = _transform_rev(v)
594 return block, i // S_BLOCK.size
594 return block, i // S_BLOCK.size
595
595
596
596
597 # debug utility
597 # debug utility
598
598
599
599
600 def check_data(ui, index, data):
600 def check_data(ui, index, data):
601 """verify that the provided nodemap data are valid for the given idex"""
601 """verify that the provided nodemap data are valid for the given idex"""
602 ret = 0
602 ret = 0
603 ui.status((b"revision in index: %d\n") % len(index))
603 ui.status((b"revision in index: %d\n") % len(index))
604 root, __ = parse_data(data)
604 root, __ = parse_data(data)
605 all_revs = set(_all_revisions(root))
605 all_revs = set(_all_revisions(root))
606 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
606 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
607 for r in range(len(index)):
607 for r in range(len(index)):
608 if r not in all_revs:
608 if r not in all_revs:
609 msg = b" revision missing from nodemap: %d\n" % r
609 msg = b" revision missing from nodemap: %d\n" % r
610 ui.write_err(msg)
610 ui.write_err(msg)
611 ret = 1
611 ret = 1
612 else:
612 else:
613 all_revs.remove(r)
613 all_revs.remove(r)
614 nm_rev = _find_node(root, hex(index[r][7]))
614 nm_rev = _find_node(root, hex(index[r][7]))
615 if nm_rev is None:
615 if nm_rev is None:
616 msg = b" revision node does not match any entries: %d\n" % r
616 msg = b" revision node does not match any entries: %d\n" % r
617 ui.write_err(msg)
617 ui.write_err(msg)
618 ret = 1
618 ret = 1
619 elif nm_rev != r:
619 elif nm_rev != r:
620 msg = (
620 msg = (
621 b" revision node does not match the expected revision: "
621 b" revision node does not match the expected revision: "
622 b"%d != %d\n" % (r, nm_rev)
622 b"%d != %d\n" % (r, nm_rev)
623 )
623 )
624 ui.write_err(msg)
624 ui.write_err(msg)
625 ret = 1
625 ret = 1
626
626
627 if all_revs:
627 if all_revs:
628 for r in sorted(all_revs):
628 for r in sorted(all_revs):
629 msg = b" extra revision in nodemap: %d\n" % r
629 msg = b" extra revision in nodemap: %d\n" % r
630 ui.write_err(msg)
630 ui.write_err(msg)
631 ret = 1
631 ret = 1
632 return ret
632 return ret
633
633
634
634
635 def _all_revisions(root):
635 def _all_revisions(root):
636 """return all revisions stored in a Trie"""
636 """return all revisions stored in a Trie"""
637 for block in _walk_trie(root):
637 for block in _walk_trie(root):
638 for v in block:
638 for v in block:
639 if v is None or isinstance(v, Block):
639 if v is None or isinstance(v, Block):
640 continue
640 continue
641 yield v
641 yield v
642
642
643
643
644 def _find_node(block, node):
644 def _find_node(block, node):
645 """find the revision associated with a given node"""
645 """find the revision associated with a given node"""
646 entry = block.get(_to_int(node[0:1]))
646 entry = block.get(_to_int(node[0:1]))
647 if isinstance(entry, dict):
647 if isinstance(entry, dict):
648 return _find_node(entry, node[1:])
648 return _find_node(entry, node[1:])
649 return entry
649 return entry
650
650
651
651
652 def get_nodemap_file(revlog):
652 def get_nodemap_file(revlog):
653 if revlog._trypending:
653 if revlog._trypending:
654 pending_path = revlog.radix + b".n.a"
654 pending_path = revlog.radix + b".n.a"
655 if revlog.opener.exists(pending_path):
655 if revlog.opener.exists(pending_path):
656 return pending_path
656 return pending_path
657 return revlog.radix + b".n"
657 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now