##// END OF EJS Templates
nodemap: fix some comment formatting...
marmoute -
r48244:f70ca39d default
parent child Browse files
Show More
@@ -1,647 +1,647 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import re
12 import re
13 import struct
13 import struct
14
14
15 from ..node import hex
15 from ..node import hex
16
16
17 from .. import (
17 from .. import (
18 error,
18 error,
19 util,
19 util,
20 )
20 )
21 from . import docket as docket_mod
21 from . import docket as docket_mod
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog._nodemap_file is None:
31 if revlog._nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog._nodemap_file)
33 pdata = revlog.opener.tryread(revlog._nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 try:
56 try:
57 data = util.buffer(util.mmapread(fd, data_length))
57 data = util.buffer(util.mmapread(fd, data_length))
58 except ValueError:
58 except ValueError:
59 # raised when the read file is too small
59 # raised when the read file is too small
60 data = b''
60 data = b''
61 else:
61 else:
62 data = fd.read(data_length)
62 data = fd.read(data_length)
63 except (IOError, OSError) as e:
63 except (IOError, OSError) as e:
64 if e.errno == errno.ENOENT:
64 if e.errno == errno.ENOENT:
65 return None
65 return None
66 else:
66 else:
67 raise
67 raise
68 if len(data) < data_length:
68 if len(data) < data_length:
69 return None
69 return None
70 return docket, data
70 return docket, data
71
71
72
72
73 def setup_persistent_nodemap(tr, revlog):
73 def setup_persistent_nodemap(tr, revlog):
74 """Install whatever is needed transaction side to persist a nodemap on disk
74 """Install whatever is needed transaction side to persist a nodemap on disk
75
75
76 (only actually persist the nodemap if this is relevant for this revlog)
76 (only actually persist the nodemap if this is relevant for this revlog)
77 """
77 """
78 if revlog._inline:
78 if revlog._inline:
79 return # inlined revlog are too small for this to be relevant
79 return # inlined revlog are too small for this to be relevant
80 if revlog._nodemap_file is None:
80 if revlog._nodemap_file is None:
81 return # we do not use persistent_nodemap on this revlog
81 return # we do not use persistent_nodemap on this revlog
82
82
83 # we need to happen after the changelog finalization, in that use "cl-"
83 # we need to happen after the changelog finalization, in that use "cl-"
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
84 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog._nodemap_file
85 if tr.hasfinalize(callback_id):
85 if tr.hasfinalize(callback_id):
86 return # no need to register again
86 return # no need to register again
87 tr.addpending(
87 tr.addpending(
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
88 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
89 )
89 )
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
90 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
91
91
92
92
93 class _NoTransaction(object):
93 class _NoTransaction(object):
94 """transaction like object to update the nodemap outside a transaction"""
94 """transaction like object to update the nodemap outside a transaction"""
95
95
96 def __init__(self):
96 def __init__(self):
97 self._postclose = {}
97 self._postclose = {}
98
98
99 def addpostclose(self, callback_id, callback_func):
99 def addpostclose(self, callback_id, callback_func):
100 self._postclose[callback_id] = callback_func
100 self._postclose[callback_id] = callback_func
101
101
102 def registertmp(self, *args, **kwargs):
102 def registertmp(self, *args, **kwargs):
103 pass
103 pass
104
104
105 def addbackup(self, *args, **kwargs):
105 def addbackup(self, *args, **kwargs):
106 pass
106 pass
107
107
108 def add(self, *args, **kwargs):
108 def add(self, *args, **kwargs):
109 pass
109 pass
110
110
111 def addabort(self, *args, **kwargs):
111 def addabort(self, *args, **kwargs):
112 pass
112 pass
113
113
114 def _report(self, *args):
114 def _report(self, *args):
115 pass
115 pass
116
116
117
117
118 def update_persistent_nodemap(revlog):
118 def update_persistent_nodemap(revlog):
119 """update the persistent nodemap right now
119 """update the persistent nodemap right now
120
120
121 To be used for updating the nodemap on disk outside of a normal transaction
121 To be used for updating the nodemap on disk outside of a normal transaction
122 setup (eg, `debugupdatecache`).
122 setup (eg, `debugupdatecache`).
123 """
123 """
124 if revlog._inline:
124 if revlog._inline:
125 return # inlined revlog are too small for this to be relevant
125 return # inlined revlog are too small for this to be relevant
126 if revlog._nodemap_file is None:
126 if revlog._nodemap_file is None:
127 return # we do not use persistent_nodemap on this revlog
127 return # we do not use persistent_nodemap on this revlog
128
128
129 notr = _NoTransaction()
129 notr = _NoTransaction()
130 persist_nodemap(notr, revlog)
130 persist_nodemap(notr, revlog)
131 for k in sorted(notr._postclose):
131 for k in sorted(notr._postclose):
132 notr._postclose[k](None)
132 notr._postclose[k](None)
133
133
134
134
135 def delete_nodemap(tr, repo, revlog):
135 def delete_nodemap(tr, repo, revlog):
136 """ Delete nodemap data on disk for a given revlog"""
136 """Delete nodemap data on disk for a given revlog"""
137 if revlog._nodemap_file is None:
137 if revlog._nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enabled"
138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140 repo.svfs.unlink(revlog._nodemap_file)
140 repo.svfs.unlink(revlog._nodemap_file)
141
141
142
142
143 def persist_nodemap(tr, revlog, pending=False, force=False):
143 def persist_nodemap(tr, revlog, pending=False, force=False):
144 """Write nodemap data on disk for a given revlog"""
144 """Write nodemap data on disk for a given revlog"""
145 if getattr(revlog, 'filteredrevs', ()):
145 if getattr(revlog, 'filteredrevs', ()):
146 raise error.ProgrammingError(
146 raise error.ProgrammingError(
147 "cannot persist nodemap of a filtered changelog"
147 "cannot persist nodemap of a filtered changelog"
148 )
148 )
149 if revlog._nodemap_file is None:
149 if revlog._nodemap_file is None:
150 if force:
150 if force:
151 revlog._nodemap_file = get_nodemap_file(revlog)
151 revlog._nodemap_file = get_nodemap_file(revlog)
152 else:
152 else:
153 msg = "calling persist nodemap on a revlog without the feature enabled"
153 msg = "calling persist nodemap on a revlog without the feature enabled"
154 raise error.ProgrammingError(msg)
154 raise error.ProgrammingError(msg)
155
155
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
156 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
157 ondisk_docket = revlog._nodemap_docket
157 ondisk_docket = revlog._nodemap_docket
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
158 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
159 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
160
160
161 data = None
161 data = None
162 # first attemp an incremental update of the data
162 # first attemp an incremental update of the data
163 if can_incremental and ondisk_docket is not None:
163 if can_incremental and ondisk_docket is not None:
164 target_docket = revlog._nodemap_docket.copy()
164 target_docket = revlog._nodemap_docket.copy()
165 (
165 (
166 src_docket,
166 src_docket,
167 data_changed_count,
167 data_changed_count,
168 data,
168 data,
169 ) = revlog.index.nodemap_data_incremental()
169 ) = revlog.index.nodemap_data_incremental()
170 new_length = target_docket.data_length + len(data)
170 new_length = target_docket.data_length + len(data)
171 new_unused = target_docket.data_unused + data_changed_count
171 new_unused = target_docket.data_unused + data_changed_count
172 if src_docket != target_docket:
172 if src_docket != target_docket:
173 data = None
173 data = None
174 elif new_length <= (new_unused * 10): # under 10% of unused data
174 elif new_length <= (new_unused * 10): # under 10% of unused data
175 data = None
175 data = None
176 else:
176 else:
177 datafile = _rawdata_filepath(revlog, target_docket)
177 datafile = _rawdata_filepath(revlog, target_docket)
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
178 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
179 # store vfs
179 # store vfs
180 tr.add(datafile, target_docket.data_length)
180 tr.add(datafile, target_docket.data_length)
181 with revlog.opener(datafile, b'r+') as fd:
181 with revlog.opener(datafile, b'r+') as fd:
182 fd.seek(target_docket.data_length)
182 fd.seek(target_docket.data_length)
183 fd.write(data)
183 fd.write(data)
184 if feed_data:
184 if feed_data:
185 if use_mmap:
185 if use_mmap:
186 fd.seek(0)
186 fd.seek(0)
187 new_data = fd.read(new_length)
187 new_data = fd.read(new_length)
188 else:
188 else:
189 fd.flush()
189 fd.flush()
190 new_data = util.buffer(util.mmapread(fd, new_length))
190 new_data = util.buffer(util.mmapread(fd, new_length))
191 target_docket.data_length = new_length
191 target_docket.data_length = new_length
192 target_docket.data_unused = new_unused
192 target_docket.data_unused = new_unused
193
193
194 if data is None:
194 if data is None:
195 # otherwise fallback to a full new export
195 # otherwise fallback to a full new export
196 target_docket = NodeMapDocket()
196 target_docket = NodeMapDocket()
197 datafile = _rawdata_filepath(revlog, target_docket)
197 datafile = _rawdata_filepath(revlog, target_docket)
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
198 if util.safehasattr(revlog.index, "nodemap_data_all"):
199 data = revlog.index.nodemap_data_all()
199 data = revlog.index.nodemap_data_all()
200 else:
200 else:
201 data = persistent_data(revlog.index)
201 data = persistent_data(revlog.index)
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
202 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
203 # store vfs
203 # store vfs
204
204
205 tryunlink = revlog.opener.tryunlink
205 tryunlink = revlog.opener.tryunlink
206
206
207 def abortck(tr):
207 def abortck(tr):
208 tryunlink(datafile)
208 tryunlink(datafile)
209
209
210 callback_id = b"delete-%s" % datafile
210 callback_id = b"delete-%s" % datafile
211
211
212 # some flavor of the transaction abort does not cleanup new file, it
212 # some flavor of the transaction abort does not cleanup new file, it
213 # simply empty them.
213 # simply empty them.
214 tr.addabort(callback_id, abortck)
214 tr.addabort(callback_id, abortck)
215 with revlog.opener(datafile, b'w+') as fd:
215 with revlog.opener(datafile, b'w+') as fd:
216 fd.write(data)
216 fd.write(data)
217 if feed_data:
217 if feed_data:
218 if use_mmap:
218 if use_mmap:
219 new_data = data
219 new_data = data
220 else:
220 else:
221 fd.flush()
221 fd.flush()
222 new_data = util.buffer(util.mmapread(fd, len(data)))
222 new_data = util.buffer(util.mmapread(fd, len(data)))
223 target_docket.data_length = len(data)
223 target_docket.data_length = len(data)
224 target_docket.tip_rev = revlog.tiprev()
224 target_docket.tip_rev = revlog.tiprev()
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
225 target_docket.tip_node = revlog.node(target_docket.tip_rev)
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
226 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
227 # store vfs
227 # store vfs
228 file_path = revlog._nodemap_file
228 file_path = revlog._nodemap_file
229 if pending:
229 if pending:
230 file_path += b'.a'
230 file_path += b'.a'
231 tr.registertmp(file_path)
231 tr.registertmp(file_path)
232 else:
232 else:
233 tr.addbackup(file_path)
233 tr.addbackup(file_path)
234
234
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
235 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
236 fp.write(target_docket.serialize())
236 fp.write(target_docket.serialize())
237 revlog._nodemap_docket = target_docket
237 revlog._nodemap_docket = target_docket
238 if feed_data:
238 if feed_data:
239 revlog.index.update_nodemap_data(target_docket, new_data)
239 revlog.index.update_nodemap_data(target_docket, new_data)
240
240
241 # search for old index file in all cases, some older process might have
241 # search for old index file in all cases, some older process might have
242 # left one behind.
242 # left one behind.
243 olds = _other_rawdata_filepath(revlog, target_docket)
243 olds = _other_rawdata_filepath(revlog, target_docket)
244 if olds:
244 if olds:
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
245 realvfs = getattr(revlog, '_realopener', revlog.opener)
246
246
247 def cleanup(tr):
247 def cleanup(tr):
248 for oldfile in olds:
248 for oldfile in olds:
249 realvfs.tryunlink(oldfile)
249 realvfs.tryunlink(oldfile)
250
250
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
251 callback_id = b"revlog-cleanup-nodemap-%s" % revlog._nodemap_file
252 tr.addpostclose(callback_id, cleanup)
252 tr.addpostclose(callback_id, cleanup)
253
253
254
254
255 ### Nodemap docket file
255 ### Nodemap docket file
256 #
256 #
257 # The nodemap data are stored on disk using 2 files:
257 # The nodemap data are stored on disk using 2 files:
258 #
258 #
259 # * a raw data files containing a persistent nodemap
259 # * a raw data files containing a persistent nodemap
260 # (see `Nodemap Trie` section)
260 # (see `Nodemap Trie` section)
261 #
261 #
262 # * a small "docket" file containing medatadata
262 # * a small "docket" file containing medatadata
263 #
263 #
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
264 # While the nodemap data can be multiple tens of megabytes, the "docket" is
265 # small, it is easy to update it automatically or to duplicated its content
265 # small, it is easy to update it automatically or to duplicated its content
266 # during a transaction.
266 # during a transaction.
267 #
267 #
268 # Multiple raw data can exist at the same time (The currently valid one and a
268 # Multiple raw data can exist at the same time (The currently valid one and a
269 # new one beind used by an in progress transaction). To accomodate this, the
269 # new one beind used by an in progress transaction). To accomodate this, the
270 # filename hosting the raw data has a variable parts. The exact filename is
270 # filename hosting the raw data has a variable parts. The exact filename is
271 # specified inside the "docket" file.
271 # specified inside the "docket" file.
272 #
272 #
273 # The docket file contains information to find, qualify and validate the raw
273 # The docket file contains information to find, qualify and validate the raw
274 # data. Its content is currently very light, but it will expand as the on disk
274 # data. Its content is currently very light, but it will expand as the on disk
275 # nodemap gains the necessary features to be used in production.
275 # nodemap gains the necessary features to be used in production.
276
276
277 ONDISK_VERSION = 1
277 ONDISK_VERSION = 1
278 S_VERSION = struct.Struct(">B")
278 S_VERSION = struct.Struct(">B")
279 S_HEADER = struct.Struct(">BQQQQ")
279 S_HEADER = struct.Struct(">BQQQQ")
280
280
281
281
282 class NodeMapDocket(object):
282 class NodeMapDocket(object):
283 """metadata associated with persistent nodemap data
283 """metadata associated with persistent nodemap data
284
284
285 The persistent data may come from disk or be on their way to disk.
285 The persistent data may come from disk or be on their way to disk.
286 """
286 """
287
287
288 def __init__(self, uid=None):
288 def __init__(self, uid=None):
289 if uid is None:
289 if uid is None:
290 uid = docket_mod.make_uid()
290 uid = docket_mod.make_uid()
291 # a unique identifier for the data file:
291 # a unique identifier for the data file:
292 # - When new data are appended, it is preserved.
292 # - When new data are appended, it is preserved.
293 # - When a new data file is created, a new identifier is generated.
293 # - When a new data file is created, a new identifier is generated.
294 self.uid = uid
294 self.uid = uid
295 # the tipmost revision stored in the data file. This revision and all
295 # the tipmost revision stored in the data file. This revision and all
296 # revision before it are expected to be encoded in the data file.
296 # revision before it are expected to be encoded in the data file.
297 self.tip_rev = None
297 self.tip_rev = None
298 # the node of that tipmost revision, if it mismatch the current index
298 # the node of that tipmost revision, if it mismatch the current index
299 # data the docket is not valid for the current index and should be
299 # data the docket is not valid for the current index and should be
300 # discarded.
300 # discarded.
301 #
301 #
302 # note: this method is not perfect as some destructive operation could
302 # note: this method is not perfect as some destructive operation could
303 # preserve the same tip_rev + tip_node while altering lower revision.
303 # preserve the same tip_rev + tip_node while altering lower revision.
304 # However this multiple other caches have the same vulnerability (eg:
304 # However this multiple other caches have the same vulnerability (eg:
305 # brancmap cache).
305 # brancmap cache).
306 self.tip_node = None
306 self.tip_node = None
307 # the size (in bytes) of the persisted data to encode the nodemap valid
307 # the size (in bytes) of the persisted data to encode the nodemap valid
308 # for `tip_rev`.
308 # for `tip_rev`.
309 # - data file shorter than this are corrupted,
309 # - data file shorter than this are corrupted,
310 # - any extra data should be ignored.
310 # - any extra data should be ignored.
311 self.data_length = None
311 self.data_length = None
312 # the amount (in bytes) of "dead" data, still in the data file but no
312 # the amount (in bytes) of "dead" data, still in the data file but no
313 # longer used for the nodemap.
313 # longer used for the nodemap.
314 self.data_unused = 0
314 self.data_unused = 0
315
315
316 def copy(self):
316 def copy(self):
317 new = NodeMapDocket(uid=self.uid)
317 new = NodeMapDocket(uid=self.uid)
318 new.tip_rev = self.tip_rev
318 new.tip_rev = self.tip_rev
319 new.tip_node = self.tip_node
319 new.tip_node = self.tip_node
320 new.data_length = self.data_length
320 new.data_length = self.data_length
321 new.data_unused = self.data_unused
321 new.data_unused = self.data_unused
322 return new
322 return new
323
323
324 def __cmp__(self, other):
324 def __cmp__(self, other):
325 if self.uid < other.uid:
325 if self.uid < other.uid:
326 return -1
326 return -1
327 if self.uid > other.uid:
327 if self.uid > other.uid:
328 return 1
328 return 1
329 elif self.data_length < other.data_length:
329 elif self.data_length < other.data_length:
330 return -1
330 return -1
331 elif self.data_length > other.data_length:
331 elif self.data_length > other.data_length:
332 return 1
332 return 1
333 return 0
333 return 0
334
334
335 def __eq__(self, other):
335 def __eq__(self, other):
336 return self.uid == other.uid and self.data_length == other.data_length
336 return self.uid == other.uid and self.data_length == other.data_length
337
337
338 def serialize(self):
338 def serialize(self):
339 """return serialized bytes for a docket using the passed uid"""
339 """return serialized bytes for a docket using the passed uid"""
340 data = []
340 data = []
341 data.append(S_VERSION.pack(ONDISK_VERSION))
341 data.append(S_VERSION.pack(ONDISK_VERSION))
342 headers = (
342 headers = (
343 len(self.uid),
343 len(self.uid),
344 self.tip_rev,
344 self.tip_rev,
345 self.data_length,
345 self.data_length,
346 self.data_unused,
346 self.data_unused,
347 len(self.tip_node),
347 len(self.tip_node),
348 )
348 )
349 data.append(S_HEADER.pack(*headers))
349 data.append(S_HEADER.pack(*headers))
350 data.append(self.uid)
350 data.append(self.uid)
351 data.append(self.tip_node)
351 data.append(self.tip_node)
352 return b''.join(data)
352 return b''.join(data)
353
353
354
354
355 def _rawdata_filepath(revlog, docket):
355 def _rawdata_filepath(revlog, docket):
356 """The (vfs relative) nodemap's rawdata file for a given uid"""
356 """The (vfs relative) nodemap's rawdata file for a given uid"""
357 prefix = revlog.radix
357 prefix = revlog.radix
358 return b"%s-%s.nd" % (prefix, docket.uid)
358 return b"%s-%s.nd" % (prefix, docket.uid)
359
359
360
360
361 def _other_rawdata_filepath(revlog, docket):
361 def _other_rawdata_filepath(revlog, docket):
362 prefix = revlog.radix
362 prefix = revlog.radix
363 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
363 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
364 new_file_path = _rawdata_filepath(revlog, docket)
364 new_file_path = _rawdata_filepath(revlog, docket)
365 new_file_name = revlog.opener.basename(new_file_path)
365 new_file_name = revlog.opener.basename(new_file_path)
366 dirpath = revlog.opener.dirname(new_file_path)
366 dirpath = revlog.opener.dirname(new_file_path)
367 others = []
367 others = []
368 for f in revlog.opener.listdir(dirpath):
368 for f in revlog.opener.listdir(dirpath):
369 if pattern.match(f) and f != new_file_name:
369 if pattern.match(f) and f != new_file_name:
370 others.append(f)
370 others.append(f)
371 return others
371 return others
372
372
373
373
374 ### Nodemap Trie
374 ### Nodemap Trie
375 #
375 #
376 # This is a simple reference implementation to compute and persist a nodemap
376 # This is a simple reference implementation to compute and persist a nodemap
377 # trie. This reference implementation is write only. The python version of this
377 # trie. This reference implementation is write only. The python version of this
378 # is not expected to be actually used, since it wont provide performance
378 # is not expected to be actually used, since it wont provide performance
379 # improvement over existing non-persistent C implementation.
379 # improvement over existing non-persistent C implementation.
380 #
380 #
381 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
381 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
382 # revision can be adressed using its node shortest prefix.
382 # revision can be adressed using its node shortest prefix.
383 #
383 #
384 # The trie is stored as a sequence of block. Each block contains 16 entries
384 # The trie is stored as a sequence of block. Each block contains 16 entries
385 # (signed 64bit integer, big endian). Each entry can be one of the following:
385 # (signed 64bit integer, big endian). Each entry can be one of the following:
386 #
386 #
387 # * value >= 0 -> index of sub-block
387 # * value >= 0 -> index of sub-block
388 # * value == -1 -> no value
388 # * value == -1 -> no value
389 # * value < -1 -> encoded revision: rev = -(value+2)
389 # * value < -1 -> encoded revision: rev = -(value+2)
390 #
390 #
391 # See REV_OFFSET and _transform_rev below.
391 # See REV_OFFSET and _transform_rev below.
392 #
392 #
393 # The implementation focus on simplicity, not on performance. A Rust
393 # The implementation focus on simplicity, not on performance. A Rust
394 # implementation should provide a efficient version of the same binary
394 # implementation should provide a efficient version of the same binary
395 # persistence. This reference python implementation is never meant to be
395 # persistence. This reference python implementation is never meant to be
396 # extensively use in production.
396 # extensively use in production.
397
397
398
398
399 def persistent_data(index):
399 def persistent_data(index):
400 """return the persistent binary form for a nodemap for a given index"""
400 """return the persistent binary form for a nodemap for a given index"""
401 trie = _build_trie(index)
401 trie = _build_trie(index)
402 return _persist_trie(trie)
402 return _persist_trie(trie)
403
403
404
404
405 def update_persistent_data(index, root, max_idx, last_rev):
405 def update_persistent_data(index, root, max_idx, last_rev):
406 """return the incremental update for persistent nodemap from a given index"""
406 """return the incremental update for persistent nodemap from a given index"""
407 changed_block, trie = _update_trie(index, root, last_rev)
407 changed_block, trie = _update_trie(index, root, last_rev)
408 return (
408 return (
409 changed_block * S_BLOCK.size,
409 changed_block * S_BLOCK.size,
410 _persist_trie(trie, existing_idx=max_idx),
410 _persist_trie(trie, existing_idx=max_idx),
411 )
411 )
412
412
413
413
414 S_BLOCK = struct.Struct(">" + ("l" * 16))
414 S_BLOCK = struct.Struct(">" + ("l" * 16))
415
415
416 NO_ENTRY = -1
416 NO_ENTRY = -1
417 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
417 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
418 REV_OFFSET = 2
418 REV_OFFSET = 2
419
419
420
420
421 def _transform_rev(rev):
421 def _transform_rev(rev):
422 """Return the number used to represent the rev in the tree.
422 """Return the number used to represent the rev in the tree.
423
423
424 (or retrieve a rev number from such representation)
424 (or retrieve a rev number from such representation)
425
425
426 Note that this is an involution, a function equal to its inverse (i.e.
426 Note that this is an involution, a function equal to its inverse (i.e.
427 which gives the identity when applied to itself).
427 which gives the identity when applied to itself).
428 """
428 """
429 return -(rev + REV_OFFSET)
429 return -(rev + REV_OFFSET)
430
430
431
431
432 def _to_int(hex_digit):
432 def _to_int(hex_digit):
433 """turn an hexadecimal digit into a proper integer"""
433 """turn an hexadecimal digit into a proper integer"""
434 return int(hex_digit, 16)
434 return int(hex_digit, 16)
435
435
436
436
437 class Block(dict):
437 class Block(dict):
438 """represent a block of the Trie
438 """represent a block of the Trie
439
439
440 contains up to 16 entry indexed from 0 to 15"""
440 contains up to 16 entry indexed from 0 to 15"""
441
441
442 def __init__(self):
442 def __init__(self):
443 super(Block, self).__init__()
443 super(Block, self).__init__()
444 # If this block exist on disk, here is its ID
444 # If this block exist on disk, here is its ID
445 self.ondisk_id = None
445 self.ondisk_id = None
446
446
447 def __iter__(self):
447 def __iter__(self):
448 return iter(self.get(i) for i in range(16))
448 return iter(self.get(i) for i in range(16))
449
449
450
450
451 def _build_trie(index):
451 def _build_trie(index):
452 """build a nodemap trie
452 """build a nodemap trie
453
453
454 The nodemap stores revision number for each unique prefix.
454 The nodemap stores revision number for each unique prefix.
455
455
456 Each block is a dictionary with keys in `[0, 15]`. Values are either
456 Each block is a dictionary with keys in `[0, 15]`. Values are either
457 another block or a revision number.
457 another block or a revision number.
458 """
458 """
459 root = Block()
459 root = Block()
460 for rev in range(len(index)):
460 for rev in range(len(index)):
461 current_hex = hex(index[rev][7])
461 current_hex = hex(index[rev][7])
462 _insert_into_block(index, 0, root, rev, current_hex)
462 _insert_into_block(index, 0, root, rev, current_hex)
463 return root
463 return root
464
464
465
465
466 def _update_trie(index, root, last_rev):
466 def _update_trie(index, root, last_rev):
467 """consume"""
467 """consume"""
468 changed = 0
468 changed = 0
469 for rev in range(last_rev + 1, len(index)):
469 for rev in range(last_rev + 1, len(index)):
470 current_hex = hex(index[rev][7])
470 current_hex = hex(index[rev][7])
471 changed += _insert_into_block(index, 0, root, rev, current_hex)
471 changed += _insert_into_block(index, 0, root, rev, current_hex)
472 return changed, root
472 return changed, root
473
473
474
474
475 def _insert_into_block(index, level, block, current_rev, current_hex):
475 def _insert_into_block(index, level, block, current_rev, current_hex):
476 """insert a new revision in a block
476 """insert a new revision in a block
477
477
478 index: the index we are adding revision for
478 index: the index we are adding revision for
479 level: the depth of the current block in the trie
479 level: the depth of the current block in the trie
480 block: the block currently being considered
480 block: the block currently being considered
481 current_rev: the revision number we are adding
481 current_rev: the revision number we are adding
482 current_hex: the hexadecimal representation of the of that revision
482 current_hex: the hexadecimal representation of the of that revision
483 """
483 """
484 changed = 1
484 changed = 1
485 if block.ondisk_id is not None:
485 if block.ondisk_id is not None:
486 block.ondisk_id = None
486 block.ondisk_id = None
487 hex_digit = _to_int(current_hex[level : level + 1])
487 hex_digit = _to_int(current_hex[level : level + 1])
488 entry = block.get(hex_digit)
488 entry = block.get(hex_digit)
489 if entry is None:
489 if entry is None:
490 # no entry, simply store the revision number
490 # no entry, simply store the revision number
491 block[hex_digit] = current_rev
491 block[hex_digit] = current_rev
492 elif isinstance(entry, dict):
492 elif isinstance(entry, dict):
493 # need to recurse to an underlying block
493 # need to recurse to an underlying block
494 changed += _insert_into_block(
494 changed += _insert_into_block(
495 index, level + 1, entry, current_rev, current_hex
495 index, level + 1, entry, current_rev, current_hex
496 )
496 )
497 else:
497 else:
498 # collision with a previously unique prefix, inserting new
498 # collision with a previously unique prefix, inserting new
499 # vertices to fit both entry.
499 # vertices to fit both entry.
500 other_hex = hex(index[entry][7])
500 other_hex = hex(index[entry][7])
501 other_rev = entry
501 other_rev = entry
502 new = Block()
502 new = Block()
503 block[hex_digit] = new
503 block[hex_digit] = new
504 _insert_into_block(index, level + 1, new, other_rev, other_hex)
504 _insert_into_block(index, level + 1, new, other_rev, other_hex)
505 _insert_into_block(index, level + 1, new, current_rev, current_hex)
505 _insert_into_block(index, level + 1, new, current_rev, current_hex)
506 return changed
506 return changed
507
507
508
508
509 def _persist_trie(root, existing_idx=None):
509 def _persist_trie(root, existing_idx=None):
510 """turn a nodemap trie into persistent binary data
510 """turn a nodemap trie into persistent binary data
511
511
512 See `_build_trie` for nodemap trie structure"""
512 See `_build_trie` for nodemap trie structure"""
513 block_map = {}
513 block_map = {}
514 if existing_idx is not None:
514 if existing_idx is not None:
515 base_idx = existing_idx + 1
515 base_idx = existing_idx + 1
516 else:
516 else:
517 base_idx = 0
517 base_idx = 0
518 chunks = []
518 chunks = []
519 for tn in _walk_trie(root):
519 for tn in _walk_trie(root):
520 if tn.ondisk_id is not None:
520 if tn.ondisk_id is not None:
521 block_map[id(tn)] = tn.ondisk_id
521 block_map[id(tn)] = tn.ondisk_id
522 else:
522 else:
523 block_map[id(tn)] = len(chunks) + base_idx
523 block_map[id(tn)] = len(chunks) + base_idx
524 chunks.append(_persist_block(tn, block_map))
524 chunks.append(_persist_block(tn, block_map))
525 return b''.join(chunks)
525 return b''.join(chunks)
526
526
527
527
528 def _walk_trie(block):
528 def _walk_trie(block):
529 """yield all the block in a trie
529 """yield all the block in a trie
530
530
531 Children blocks are always yield before their parent block.
531 Children blocks are always yield before their parent block.
532 """
532 """
533 for (__, item) in sorted(block.items()):
533 for (__, item) in sorted(block.items()):
534 if isinstance(item, dict):
534 if isinstance(item, dict):
535 for sub_block in _walk_trie(item):
535 for sub_block in _walk_trie(item):
536 yield sub_block
536 yield sub_block
537 yield block
537 yield block
538
538
539
539
540 def _persist_block(block_node, block_map):
540 def _persist_block(block_node, block_map):
541 """produce persistent binary data for a single block
541 """produce persistent binary data for a single block
542
542
543 Children block are assumed to be already persisted and present in
543 Children block are assumed to be already persisted and present in
544 block_map.
544 block_map.
545 """
545 """
546 data = tuple(_to_value(v, block_map) for v in block_node)
546 data = tuple(_to_value(v, block_map) for v in block_node)
547 return S_BLOCK.pack(*data)
547 return S_BLOCK.pack(*data)
548
548
549
549
550 def _to_value(item, block_map):
550 def _to_value(item, block_map):
551 """persist any value as an integer"""
551 """persist any value as an integer"""
552 if item is None:
552 if item is None:
553 return NO_ENTRY
553 return NO_ENTRY
554 elif isinstance(item, dict):
554 elif isinstance(item, dict):
555 return block_map[id(item)]
555 return block_map[id(item)]
556 else:
556 else:
557 return _transform_rev(item)
557 return _transform_rev(item)
558
558
559
559
560 def parse_data(data):
560 def parse_data(data):
561 """parse parse nodemap data into a nodemap Trie"""
561 """parse parse nodemap data into a nodemap Trie"""
562 if (len(data) % S_BLOCK.size) != 0:
562 if (len(data) % S_BLOCK.size) != 0:
563 msg = b"nodemap data size is not a multiple of block size (%d): %d"
563 msg = b"nodemap data size is not a multiple of block size (%d): %d"
564 raise error.Abort(msg % (S_BLOCK.size, len(data)))
564 raise error.Abort(msg % (S_BLOCK.size, len(data)))
565 if not data:
565 if not data:
566 return Block(), None
566 return Block(), None
567 block_map = {}
567 block_map = {}
568 new_blocks = []
568 new_blocks = []
569 for i in range(0, len(data), S_BLOCK.size):
569 for i in range(0, len(data), S_BLOCK.size):
570 block = Block()
570 block = Block()
571 block.ondisk_id = len(block_map)
571 block.ondisk_id = len(block_map)
572 block_map[block.ondisk_id] = block
572 block_map[block.ondisk_id] = block
573 block_data = data[i : i + S_BLOCK.size]
573 block_data = data[i : i + S_BLOCK.size]
574 values = S_BLOCK.unpack(block_data)
574 values = S_BLOCK.unpack(block_data)
575 new_blocks.append((block, values))
575 new_blocks.append((block, values))
576 for b, values in new_blocks:
576 for b, values in new_blocks:
577 for idx, v in enumerate(values):
577 for idx, v in enumerate(values):
578 if v == NO_ENTRY:
578 if v == NO_ENTRY:
579 continue
579 continue
580 elif v >= 0:
580 elif v >= 0:
581 b[idx] = block_map[v]
581 b[idx] = block_map[v]
582 else:
582 else:
583 b[idx] = _transform_rev(v)
583 b[idx] = _transform_rev(v)
584 return block, i // S_BLOCK.size
584 return block, i // S_BLOCK.size
585
585
586
586
587 # debug utility
587 # debug utility
588
588
589
589
590 def check_data(ui, index, data):
590 def check_data(ui, index, data):
591 """verify that the provided nodemap data are valid for the given idex"""
591 """verify that the provided nodemap data are valid for the given idex"""
592 ret = 0
592 ret = 0
593 ui.status((b"revision in index: %d\n") % len(index))
593 ui.status((b"revision in index: %d\n") % len(index))
594 root, __ = parse_data(data)
594 root, __ = parse_data(data)
595 all_revs = set(_all_revisions(root))
595 all_revs = set(_all_revisions(root))
596 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
596 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
597 for r in range(len(index)):
597 for r in range(len(index)):
598 if r not in all_revs:
598 if r not in all_revs:
599 msg = b" revision missing from nodemap: %d\n" % r
599 msg = b" revision missing from nodemap: %d\n" % r
600 ui.write_err(msg)
600 ui.write_err(msg)
601 ret = 1
601 ret = 1
602 else:
602 else:
603 all_revs.remove(r)
603 all_revs.remove(r)
604 nm_rev = _find_node(root, hex(index[r][7]))
604 nm_rev = _find_node(root, hex(index[r][7]))
605 if nm_rev is None:
605 if nm_rev is None:
606 msg = b" revision node does not match any entries: %d\n" % r
606 msg = b" revision node does not match any entries: %d\n" % r
607 ui.write_err(msg)
607 ui.write_err(msg)
608 ret = 1
608 ret = 1
609 elif nm_rev != r:
609 elif nm_rev != r:
610 msg = (
610 msg = (
611 b" revision node does not match the expected revision: "
611 b" revision node does not match the expected revision: "
612 b"%d != %d\n" % (r, nm_rev)
612 b"%d != %d\n" % (r, nm_rev)
613 )
613 )
614 ui.write_err(msg)
614 ui.write_err(msg)
615 ret = 1
615 ret = 1
616
616
617 if all_revs:
617 if all_revs:
618 for r in sorted(all_revs):
618 for r in sorted(all_revs):
619 msg = b" extra revision in nodemap: %d\n" % r
619 msg = b" extra revision in nodemap: %d\n" % r
620 ui.write_err(msg)
620 ui.write_err(msg)
621 ret = 1
621 ret = 1
622 return ret
622 return ret
623
623
624
624
625 def _all_revisions(root):
625 def _all_revisions(root):
626 """return all revisions stored in a Trie"""
626 """return all revisions stored in a Trie"""
627 for block in _walk_trie(root):
627 for block in _walk_trie(root):
628 for v in block:
628 for v in block:
629 if v is None or isinstance(v, Block):
629 if v is None or isinstance(v, Block):
630 continue
630 continue
631 yield v
631 yield v
632
632
633
633
634 def _find_node(block, node):
634 def _find_node(block, node):
635 """find the revision associated with a given node"""
635 """find the revision associated with a given node"""
636 entry = block.get(_to_int(node[0:1]))
636 entry = block.get(_to_int(node[0:1]))
637 if isinstance(entry, dict):
637 if isinstance(entry, dict):
638 return _find_node(entry, node[1:])
638 return _find_node(entry, node[1:])
639 return entry
639 return entry
640
640
641
641
642 def get_nodemap_file(revlog):
642 def get_nodemap_file(revlog):
643 if revlog._trypending:
643 if revlog._trypending:
644 pending_path = revlog.radix + b".n.a"
644 pending_path = revlog.radix + b".n.a"
645 if revlog.opener.exists(pending_path):
645 if revlog.opener.exists(pending_path):
646 return pending_path
646 return pending_path
647 return revlog.radix + b".n"
647 return revlog.radix + b".n"
General Comments 0
You need to be logged in to leave comments. Login now