##// END OF EJS Templates
nodemap: make `_persist_nodemap` a public function...
Pulkit Goyal -
r47195:83f037ac default
parent child Browse files
Show More
@@ -1,636 +1,636 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import errno
11 import errno
12 import os
12 import os
13 import re
13 import re
14 import struct
14 import struct
15
15
16 from ..node import hex
16 from ..node import hex
17
17
18 from .. import (
18 from .. import (
19 error,
19 error,
20 util,
20 util,
21 )
21 )
22
22
23
23
24 class NodeMap(dict):
24 class NodeMap(dict):
25 def __missing__(self, x):
25 def __missing__(self, x):
26 raise error.RevlogError(b'unknown node: %s' % x)
26 raise error.RevlogError(b'unknown node: %s' % x)
27
27
28
28
29 def persisted_data(revlog):
29 def persisted_data(revlog):
30 """read the nodemap for a revlog from disk"""
30 """read the nodemap for a revlog from disk"""
31 if revlog.nodemap_file is None:
31 if revlog.nodemap_file is None:
32 return None
32 return None
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
33 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 if not pdata:
34 if not pdata:
35 return None
35 return None
36 offset = 0
36 offset = 0
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 if version != ONDISK_VERSION:
38 if version != ONDISK_VERSION:
39 return None
39 return None
40 offset += S_VERSION.size
40 offset += S_VERSION.size
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 offset += S_HEADER.size
43 offset += S_HEADER.size
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 offset += uid_size
45 offset += uid_size
46 docket.tip_rev = tip_rev
46 docket.tip_rev = tip_rev
47 docket.tip_node = pdata[offset : offset + tip_node_size]
47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 docket.data_length = data_length
48 docket.data_length = data_length
49 docket.data_unused = data_unused
49 docket.data_unused = data_unused
50
50
51 filename = _rawdata_filepath(revlog, docket)
51 filename = _rawdata_filepath(revlog, docket)
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 try:
53 try:
54 with revlog.opener(filename) as fd:
54 with revlog.opener(filename) as fd:
55 if use_mmap:
55 if use_mmap:
56 data = util.buffer(util.mmapread(fd, data_length))
56 data = util.buffer(util.mmapread(fd, data_length))
57 else:
57 else:
58 data = fd.read(data_length)
58 data = fd.read(data_length)
59 except (IOError, OSError) as e:
59 except (IOError, OSError) as e:
60 if e.errno == errno.ENOENT:
60 if e.errno == errno.ENOENT:
61 return None
61 return None
62 else:
62 else:
63 raise
63 raise
64 if len(data) < data_length:
64 if len(data) < data_length:
65 return None
65 return None
66 return docket, data
66 return docket, data
67
67
68
68
69 def setup_persistent_nodemap(tr, revlog):
69 def setup_persistent_nodemap(tr, revlog):
70 """Install whatever is needed transaction side to persist a nodemap on disk
70 """Install whatever is needed transaction side to persist a nodemap on disk
71
71
72 (only actually persist the nodemap if this is relevant for this revlog)
72 (only actually persist the nodemap if this is relevant for this revlog)
73 """
73 """
74 if revlog._inline:
74 if revlog._inline:
75 return # inlined revlog are too small for this to be relevant
75 return # inlined revlog are too small for this to be relevant
76 if revlog.nodemap_file is None:
76 if revlog.nodemap_file is None:
77 return # we do not use persistent_nodemap on this revlog
77 return # we do not use persistent_nodemap on this revlog
78
78
79 # we need to happen after the changelog finalization, in that use "cl-"
79 # we need to happen after the changelog finalization, in that use "cl-"
80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 if tr.hasfinalize(callback_id):
81 if tr.hasfinalize(callback_id):
82 return # no need to register again
82 return # no need to register again
83 tr.addpending(
83 tr.addpending(
84 callback_id, lambda tr: _persist_nodemap(tr, revlog, pending=True)
84 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
85 )
85 )
86 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
86 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
87
87
88
88
89 class _NoTransaction(object):
89 class _NoTransaction(object):
90 """transaction like object to update the nodemap outside a transaction"""
90 """transaction like object to update the nodemap outside a transaction"""
91
91
92 def __init__(self):
92 def __init__(self):
93 self._postclose = {}
93 self._postclose = {}
94
94
95 def addpostclose(self, callback_id, callback_func):
95 def addpostclose(self, callback_id, callback_func):
96 self._postclose[callback_id] = callback_func
96 self._postclose[callback_id] = callback_func
97
97
98 def registertmp(self, *args, **kwargs):
98 def registertmp(self, *args, **kwargs):
99 pass
99 pass
100
100
101 def addbackup(self, *args, **kwargs):
101 def addbackup(self, *args, **kwargs):
102 pass
102 pass
103
103
104 def add(self, *args, **kwargs):
104 def add(self, *args, **kwargs):
105 pass
105 pass
106
106
107 def addabort(self, *args, **kwargs):
107 def addabort(self, *args, **kwargs):
108 pass
108 pass
109
109
110 def _report(self, *args):
110 def _report(self, *args):
111 pass
111 pass
112
112
113
113
114 def update_persistent_nodemap(revlog):
114 def update_persistent_nodemap(revlog):
115 """update the persistent nodemap right now
115 """update the persistent nodemap right now
116
116
117 To be used for updating the nodemap on disk outside of a normal transaction
117 To be used for updating the nodemap on disk outside of a normal transaction
118 setup (eg, `debugupdatecache`).
118 setup (eg, `debugupdatecache`).
119 """
119 """
120 if revlog._inline:
120 if revlog._inline:
121 return # inlined revlog are too small for this to be relevant
121 return # inlined revlog are too small for this to be relevant
122 if revlog.nodemap_file is None:
122 if revlog.nodemap_file is None:
123 return # we do not use persistent_nodemap on this revlog
123 return # we do not use persistent_nodemap on this revlog
124
124
125 notr = _NoTransaction()
125 notr = _NoTransaction()
126 _persist_nodemap(notr, revlog)
126 persist_nodemap(notr, revlog)
127 for k in sorted(notr._postclose):
127 for k in sorted(notr._postclose):
128 notr._postclose[k](None)
128 notr._postclose[k](None)
129
129
130
130
131 def _persist_nodemap(tr, revlog, pending=False):
131 def persist_nodemap(tr, revlog, pending=False):
132 """Write nodemap data on disk for a given revlog"""
132 """Write nodemap data on disk for a given revlog"""
133 if getattr(revlog, 'filteredrevs', ()):
133 if getattr(revlog, 'filteredrevs', ()):
134 raise error.ProgrammingError(
134 raise error.ProgrammingError(
135 "cannot persist nodemap of a filtered changelog"
135 "cannot persist nodemap of a filtered changelog"
136 )
136 )
137 if revlog.nodemap_file is None:
137 if revlog.nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enableb"
138 msg = "calling persist nodemap on a revlog without the feature enableb"
139 raise error.ProgrammingError(msg)
139 raise error.ProgrammingError(msg)
140
140
141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 ondisk_docket = revlog._nodemap_docket
142 ondisk_docket = revlog._nodemap_docket
143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145
145
146 data = None
146 data = None
147 # first attemp an incremental update of the data
147 # first attemp an incremental update of the data
148 if can_incremental and ondisk_docket is not None:
148 if can_incremental and ondisk_docket is not None:
149 target_docket = revlog._nodemap_docket.copy()
149 target_docket = revlog._nodemap_docket.copy()
150 (
150 (
151 src_docket,
151 src_docket,
152 data_changed_count,
152 data_changed_count,
153 data,
153 data,
154 ) = revlog.index.nodemap_data_incremental()
154 ) = revlog.index.nodemap_data_incremental()
155 new_length = target_docket.data_length + len(data)
155 new_length = target_docket.data_length + len(data)
156 new_unused = target_docket.data_unused + data_changed_count
156 new_unused = target_docket.data_unused + data_changed_count
157 if src_docket != target_docket:
157 if src_docket != target_docket:
158 data = None
158 data = None
159 elif new_length <= (new_unused * 10): # under 10% of unused data
159 elif new_length <= (new_unused * 10): # under 10% of unused data
160 data = None
160 data = None
161 else:
161 else:
162 datafile = _rawdata_filepath(revlog, target_docket)
162 datafile = _rawdata_filepath(revlog, target_docket)
163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
164 # store vfs
164 # store vfs
165 tr.add(datafile, target_docket.data_length)
165 tr.add(datafile, target_docket.data_length)
166 with revlog.opener(datafile, b'r+') as fd:
166 with revlog.opener(datafile, b'r+') as fd:
167 fd.seek(target_docket.data_length)
167 fd.seek(target_docket.data_length)
168 fd.write(data)
168 fd.write(data)
169 if feed_data:
169 if feed_data:
170 if use_mmap:
170 if use_mmap:
171 fd.seek(0)
171 fd.seek(0)
172 new_data = fd.read(new_length)
172 new_data = fd.read(new_length)
173 else:
173 else:
174 fd.flush()
174 fd.flush()
175 new_data = util.buffer(util.mmapread(fd, new_length))
175 new_data = util.buffer(util.mmapread(fd, new_length))
176 target_docket.data_length = new_length
176 target_docket.data_length = new_length
177 target_docket.data_unused = new_unused
177 target_docket.data_unused = new_unused
178
178
179 if data is None:
179 if data is None:
180 # otherwise fallback to a full new export
180 # otherwise fallback to a full new export
181 target_docket = NodeMapDocket()
181 target_docket = NodeMapDocket()
182 datafile = _rawdata_filepath(revlog, target_docket)
182 datafile = _rawdata_filepath(revlog, target_docket)
183 if util.safehasattr(revlog.index, "nodemap_data_all"):
183 if util.safehasattr(revlog.index, "nodemap_data_all"):
184 data = revlog.index.nodemap_data_all()
184 data = revlog.index.nodemap_data_all()
185 else:
185 else:
186 data = persistent_data(revlog.index)
186 data = persistent_data(revlog.index)
187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
188 # store vfs
188 # store vfs
189
189
190 tryunlink = revlog.opener.tryunlink
190 tryunlink = revlog.opener.tryunlink
191
191
192 def abortck(tr):
192 def abortck(tr):
193 tryunlink(datafile)
193 tryunlink(datafile)
194
194
195 callback_id = b"delete-%s" % datafile
195 callback_id = b"delete-%s" % datafile
196
196
197 # some flavor of the transaction abort does not cleanup new file, it
197 # some flavor of the transaction abort does not cleanup new file, it
198 # simply empty them.
198 # simply empty them.
199 tr.addabort(callback_id, abortck)
199 tr.addabort(callback_id, abortck)
200 with revlog.opener(datafile, b'w+') as fd:
200 with revlog.opener(datafile, b'w+') as fd:
201 fd.write(data)
201 fd.write(data)
202 if feed_data:
202 if feed_data:
203 if use_mmap:
203 if use_mmap:
204 new_data = data
204 new_data = data
205 else:
205 else:
206 fd.flush()
206 fd.flush()
207 new_data = util.buffer(util.mmapread(fd, len(data)))
207 new_data = util.buffer(util.mmapread(fd, len(data)))
208 target_docket.data_length = len(data)
208 target_docket.data_length = len(data)
209 target_docket.tip_rev = revlog.tiprev()
209 target_docket.tip_rev = revlog.tiprev()
210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
212 # store vfs
212 # store vfs
213 file_path = revlog.nodemap_file
213 file_path = revlog.nodemap_file
214 if pending:
214 if pending:
215 file_path += b'.a'
215 file_path += b'.a'
216 tr.registertmp(file_path)
216 tr.registertmp(file_path)
217 else:
217 else:
218 tr.addbackup(file_path)
218 tr.addbackup(file_path)
219
219
220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
221 fp.write(target_docket.serialize())
221 fp.write(target_docket.serialize())
222 revlog._nodemap_docket = target_docket
222 revlog._nodemap_docket = target_docket
223 if feed_data:
223 if feed_data:
224 revlog.index.update_nodemap_data(target_docket, new_data)
224 revlog.index.update_nodemap_data(target_docket, new_data)
225
225
226 # search for old index file in all cases, some older process might have
226 # search for old index file in all cases, some older process might have
227 # left one behind.
227 # left one behind.
228 olds = _other_rawdata_filepath(revlog, target_docket)
228 olds = _other_rawdata_filepath(revlog, target_docket)
229 if olds:
229 if olds:
230 realvfs = getattr(revlog, '_realopener', revlog.opener)
230 realvfs = getattr(revlog, '_realopener', revlog.opener)
231
231
232 def cleanup(tr):
232 def cleanup(tr):
233 for oldfile in olds:
233 for oldfile in olds:
234 realvfs.tryunlink(oldfile)
234 realvfs.tryunlink(oldfile)
235
235
236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
237 tr.addpostclose(callback_id, cleanup)
237 tr.addpostclose(callback_id, cleanup)
238
238
239
239
240 ### Nodemap docket file
240 ### Nodemap docket file
241 #
241 #
242 # The nodemap data are stored on disk using 2 files:
242 # The nodemap data are stored on disk using 2 files:
243 #
243 #
244 # * a raw data files containing a persistent nodemap
244 # * a raw data files containing a persistent nodemap
245 # (see `Nodemap Trie` section)
245 # (see `Nodemap Trie` section)
246 #
246 #
247 # * a small "docket" file containing medatadata
247 # * a small "docket" file containing medatadata
248 #
248 #
249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
250 # small, it is easy to update it automatically or to duplicated its content
250 # small, it is easy to update it automatically or to duplicated its content
251 # during a transaction.
251 # during a transaction.
252 #
252 #
253 # Multiple raw data can exist at the same time (The currently valid one and a
253 # Multiple raw data can exist at the same time (The currently valid one and a
254 # new one beind used by an in progress transaction). To accomodate this, the
254 # new one beind used by an in progress transaction). To accomodate this, the
255 # filename hosting the raw data has a variable parts. The exact filename is
255 # filename hosting the raw data has a variable parts. The exact filename is
256 # specified inside the "docket" file.
256 # specified inside the "docket" file.
257 #
257 #
258 # The docket file contains information to find, qualify and validate the raw
258 # The docket file contains information to find, qualify and validate the raw
259 # data. Its content is currently very light, but it will expand as the on disk
259 # data. Its content is currently very light, but it will expand as the on disk
260 # nodemap gains the necessary features to be used in production.
260 # nodemap gains the necessary features to be used in production.
261
261
262 ONDISK_VERSION = 1
262 ONDISK_VERSION = 1
263 S_VERSION = struct.Struct(">B")
263 S_VERSION = struct.Struct(">B")
264 S_HEADER = struct.Struct(">BQQQQ")
264 S_HEADER = struct.Struct(">BQQQQ")
265
265
266 ID_SIZE = 8
266 ID_SIZE = 8
267
267
268
268
269 def _make_uid():
269 def _make_uid():
270 """return a new unique identifier.
270 """return a new unique identifier.
271
271
272 The identifier is random and composed of ascii characters."""
272 The identifier is random and composed of ascii characters."""
273 return hex(os.urandom(ID_SIZE))
273 return hex(os.urandom(ID_SIZE))
274
274
275
275
276 class NodeMapDocket(object):
276 class NodeMapDocket(object):
277 """metadata associated with persistent nodemap data
277 """metadata associated with persistent nodemap data
278
278
279 The persistent data may come from disk or be on their way to disk.
279 The persistent data may come from disk or be on their way to disk.
280 """
280 """
281
281
282 def __init__(self, uid=None):
282 def __init__(self, uid=None):
283 if uid is None:
283 if uid is None:
284 uid = _make_uid()
284 uid = _make_uid()
285 # a unique identifier for the data file:
285 # a unique identifier for the data file:
286 # - When new data are appended, it is preserved.
286 # - When new data are appended, it is preserved.
287 # - When a new data file is created, a new identifier is generated.
287 # - When a new data file is created, a new identifier is generated.
288 self.uid = uid
288 self.uid = uid
289 # the tipmost revision stored in the data file. This revision and all
289 # the tipmost revision stored in the data file. This revision and all
290 # revision before it are expected to be encoded in the data file.
290 # revision before it are expected to be encoded in the data file.
291 self.tip_rev = None
291 self.tip_rev = None
292 # the node of that tipmost revision, if it mismatch the current index
292 # the node of that tipmost revision, if it mismatch the current index
293 # data the docket is not valid for the current index and should be
293 # data the docket is not valid for the current index and should be
294 # discarded.
294 # discarded.
295 #
295 #
296 # note: this method is not perfect as some destructive operation could
296 # note: this method is not perfect as some destructive operation could
297 # preserve the same tip_rev + tip_node while altering lower revision.
297 # preserve the same tip_rev + tip_node while altering lower revision.
298 # However this multiple other caches have the same vulnerability (eg:
298 # However this multiple other caches have the same vulnerability (eg:
299 # brancmap cache).
299 # brancmap cache).
300 self.tip_node = None
300 self.tip_node = None
301 # the size (in bytes) of the persisted data to encode the nodemap valid
301 # the size (in bytes) of the persisted data to encode the nodemap valid
302 # for `tip_rev`.
302 # for `tip_rev`.
303 # - data file shorter than this are corrupted,
303 # - data file shorter than this are corrupted,
304 # - any extra data should be ignored.
304 # - any extra data should be ignored.
305 self.data_length = None
305 self.data_length = None
306 # the amount (in bytes) of "dead" data, still in the data file but no
306 # the amount (in bytes) of "dead" data, still in the data file but no
307 # longer used for the nodemap.
307 # longer used for the nodemap.
308 self.data_unused = 0
308 self.data_unused = 0
309
309
310 def copy(self):
310 def copy(self):
311 new = NodeMapDocket(uid=self.uid)
311 new = NodeMapDocket(uid=self.uid)
312 new.tip_rev = self.tip_rev
312 new.tip_rev = self.tip_rev
313 new.tip_node = self.tip_node
313 new.tip_node = self.tip_node
314 new.data_length = self.data_length
314 new.data_length = self.data_length
315 new.data_unused = self.data_unused
315 new.data_unused = self.data_unused
316 return new
316 return new
317
317
318 def __cmp__(self, other):
318 def __cmp__(self, other):
319 if self.uid < other.uid:
319 if self.uid < other.uid:
320 return -1
320 return -1
321 if self.uid > other.uid:
321 if self.uid > other.uid:
322 return 1
322 return 1
323 elif self.data_length < other.data_length:
323 elif self.data_length < other.data_length:
324 return -1
324 return -1
325 elif self.data_length > other.data_length:
325 elif self.data_length > other.data_length:
326 return 1
326 return 1
327 return 0
327 return 0
328
328
329 def __eq__(self, other):
329 def __eq__(self, other):
330 return self.uid == other.uid and self.data_length == other.data_length
330 return self.uid == other.uid and self.data_length == other.data_length
331
331
332 def serialize(self):
332 def serialize(self):
333 """return serialized bytes for a docket using the passed uid"""
333 """return serialized bytes for a docket using the passed uid"""
334 data = []
334 data = []
335 data.append(S_VERSION.pack(ONDISK_VERSION))
335 data.append(S_VERSION.pack(ONDISK_VERSION))
336 headers = (
336 headers = (
337 len(self.uid),
337 len(self.uid),
338 self.tip_rev,
338 self.tip_rev,
339 self.data_length,
339 self.data_length,
340 self.data_unused,
340 self.data_unused,
341 len(self.tip_node),
341 len(self.tip_node),
342 )
342 )
343 data.append(S_HEADER.pack(*headers))
343 data.append(S_HEADER.pack(*headers))
344 data.append(self.uid)
344 data.append(self.uid)
345 data.append(self.tip_node)
345 data.append(self.tip_node)
346 return b''.join(data)
346 return b''.join(data)
347
347
348
348
349 def _rawdata_filepath(revlog, docket):
349 def _rawdata_filepath(revlog, docket):
350 """The (vfs relative) nodemap's rawdata file for a given uid"""
350 """The (vfs relative) nodemap's rawdata file for a given uid"""
351 if revlog.nodemap_file.endswith(b'.n.a'):
351 if revlog.nodemap_file.endswith(b'.n.a'):
352 prefix = revlog.nodemap_file[:-4]
352 prefix = revlog.nodemap_file[:-4]
353 else:
353 else:
354 prefix = revlog.nodemap_file[:-2]
354 prefix = revlog.nodemap_file[:-2]
355 return b"%s-%s.nd" % (prefix, docket.uid)
355 return b"%s-%s.nd" % (prefix, docket.uid)
356
356
357
357
358 def _other_rawdata_filepath(revlog, docket):
358 def _other_rawdata_filepath(revlog, docket):
359 prefix = revlog.nodemap_file[:-2]
359 prefix = revlog.nodemap_file[:-2]
360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
361 new_file_path = _rawdata_filepath(revlog, docket)
361 new_file_path = _rawdata_filepath(revlog, docket)
362 new_file_name = revlog.opener.basename(new_file_path)
362 new_file_name = revlog.opener.basename(new_file_path)
363 dirpath = revlog.opener.dirname(new_file_path)
363 dirpath = revlog.opener.dirname(new_file_path)
364 others = []
364 others = []
365 for f in revlog.opener.listdir(dirpath):
365 for f in revlog.opener.listdir(dirpath):
366 if pattern.match(f) and f != new_file_name:
366 if pattern.match(f) and f != new_file_name:
367 others.append(f)
367 others.append(f)
368 return others
368 return others
369
369
370
370
371 ### Nodemap Trie
371 ### Nodemap Trie
372 #
372 #
373 # This is a simple reference implementation to compute and persist a nodemap
373 # This is a simple reference implementation to compute and persist a nodemap
374 # trie. This reference implementation is write only. The python version of this
374 # trie. This reference implementation is write only. The python version of this
375 # is not expected to be actually used, since it wont provide performance
375 # is not expected to be actually used, since it wont provide performance
376 # improvement over existing non-persistent C implementation.
376 # improvement over existing non-persistent C implementation.
377 #
377 #
378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
379 # revision can be adressed using its node shortest prefix.
379 # revision can be adressed using its node shortest prefix.
380 #
380 #
381 # The trie is stored as a sequence of block. Each block contains 16 entries
381 # The trie is stored as a sequence of block. Each block contains 16 entries
382 # (signed 64bit integer, big endian). Each entry can be one of the following:
382 # (signed 64bit integer, big endian). Each entry can be one of the following:
383 #
383 #
384 # * value >= 0 -> index of sub-block
384 # * value >= 0 -> index of sub-block
385 # * value == -1 -> no value
385 # * value == -1 -> no value
386 # * value < -1 -> encoded revision: rev = -(value+2)
386 # * value < -1 -> encoded revision: rev = -(value+2)
387 #
387 #
388 # See REV_OFFSET and _transform_rev below.
388 # See REV_OFFSET and _transform_rev below.
389 #
389 #
390 # The implementation focus on simplicity, not on performance. A Rust
390 # The implementation focus on simplicity, not on performance. A Rust
391 # implementation should provide a efficient version of the same binary
391 # implementation should provide a efficient version of the same binary
392 # persistence. This reference python implementation is never meant to be
392 # persistence. This reference python implementation is never meant to be
393 # extensively use in production.
393 # extensively use in production.
394
394
395
395
396 def persistent_data(index):
396 def persistent_data(index):
397 """return the persistent binary form for a nodemap for a given index"""
397 """return the persistent binary form for a nodemap for a given index"""
398 trie = _build_trie(index)
398 trie = _build_trie(index)
399 return _persist_trie(trie)
399 return _persist_trie(trie)
400
400
401
401
402 def update_persistent_data(index, root, max_idx, last_rev):
402 def update_persistent_data(index, root, max_idx, last_rev):
403 """return the incremental update for persistent nodemap from a given index"""
403 """return the incremental update for persistent nodemap from a given index"""
404 changed_block, trie = _update_trie(index, root, last_rev)
404 changed_block, trie = _update_trie(index, root, last_rev)
405 return (
405 return (
406 changed_block * S_BLOCK.size,
406 changed_block * S_BLOCK.size,
407 _persist_trie(trie, existing_idx=max_idx),
407 _persist_trie(trie, existing_idx=max_idx),
408 )
408 )
409
409
410
410
411 S_BLOCK = struct.Struct(">" + ("l" * 16))
411 S_BLOCK = struct.Struct(">" + ("l" * 16))
412
412
413 NO_ENTRY = -1
413 NO_ENTRY = -1
414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
415 REV_OFFSET = 2
415 REV_OFFSET = 2
416
416
417
417
418 def _transform_rev(rev):
418 def _transform_rev(rev):
419 """Return the number used to represent the rev in the tree.
419 """Return the number used to represent the rev in the tree.
420
420
421 (or retrieve a rev number from such representation)
421 (or retrieve a rev number from such representation)
422
422
423 Note that this is an involution, a function equal to its inverse (i.e.
423 Note that this is an involution, a function equal to its inverse (i.e.
424 which gives the identity when applied to itself).
424 which gives the identity when applied to itself).
425 """
425 """
426 return -(rev + REV_OFFSET)
426 return -(rev + REV_OFFSET)
427
427
428
428
429 def _to_int(hex_digit):
429 def _to_int(hex_digit):
430 """turn an hexadecimal digit into a proper integer"""
430 """turn an hexadecimal digit into a proper integer"""
431 return int(hex_digit, 16)
431 return int(hex_digit, 16)
432
432
433
433
434 class Block(dict):
434 class Block(dict):
435 """represent a block of the Trie
435 """represent a block of the Trie
436
436
437 contains up to 16 entry indexed from 0 to 15"""
437 contains up to 16 entry indexed from 0 to 15"""
438
438
439 def __init__(self):
439 def __init__(self):
440 super(Block, self).__init__()
440 super(Block, self).__init__()
441 # If this block exist on disk, here is its ID
441 # If this block exist on disk, here is its ID
442 self.ondisk_id = None
442 self.ondisk_id = None
443
443
444 def __iter__(self):
444 def __iter__(self):
445 return iter(self.get(i) for i in range(16))
445 return iter(self.get(i) for i in range(16))
446
446
447
447
448 def _build_trie(index):
448 def _build_trie(index):
449 """build a nodemap trie
449 """build a nodemap trie
450
450
451 The nodemap stores revision number for each unique prefix.
451 The nodemap stores revision number for each unique prefix.
452
452
453 Each block is a dictionary with keys in `[0, 15]`. Values are either
453 Each block is a dictionary with keys in `[0, 15]`. Values are either
454 another block or a revision number.
454 another block or a revision number.
455 """
455 """
456 root = Block()
456 root = Block()
457 for rev in range(len(index)):
457 for rev in range(len(index)):
458 current_hex = hex(index[rev][7])
458 current_hex = hex(index[rev][7])
459 _insert_into_block(index, 0, root, rev, current_hex)
459 _insert_into_block(index, 0, root, rev, current_hex)
460 return root
460 return root
461
461
462
462
463 def _update_trie(index, root, last_rev):
463 def _update_trie(index, root, last_rev):
464 """consume"""
464 """consume"""
465 changed = 0
465 changed = 0
466 for rev in range(last_rev + 1, len(index)):
466 for rev in range(last_rev + 1, len(index)):
467 current_hex = hex(index[rev][7])
467 current_hex = hex(index[rev][7])
468 changed += _insert_into_block(index, 0, root, rev, current_hex)
468 changed += _insert_into_block(index, 0, root, rev, current_hex)
469 return changed, root
469 return changed, root
470
470
471
471
472 def _insert_into_block(index, level, block, current_rev, current_hex):
472 def _insert_into_block(index, level, block, current_rev, current_hex):
473 """insert a new revision in a block
473 """insert a new revision in a block
474
474
475 index: the index we are adding revision for
475 index: the index we are adding revision for
476 level: the depth of the current block in the trie
476 level: the depth of the current block in the trie
477 block: the block currently being considered
477 block: the block currently being considered
478 current_rev: the revision number we are adding
478 current_rev: the revision number we are adding
479 current_hex: the hexadecimal representation of the of that revision
479 current_hex: the hexadecimal representation of the of that revision
480 """
480 """
481 changed = 1
481 changed = 1
482 if block.ondisk_id is not None:
482 if block.ondisk_id is not None:
483 block.ondisk_id = None
483 block.ondisk_id = None
484 hex_digit = _to_int(current_hex[level : level + 1])
484 hex_digit = _to_int(current_hex[level : level + 1])
485 entry = block.get(hex_digit)
485 entry = block.get(hex_digit)
486 if entry is None:
486 if entry is None:
487 # no entry, simply store the revision number
487 # no entry, simply store the revision number
488 block[hex_digit] = current_rev
488 block[hex_digit] = current_rev
489 elif isinstance(entry, dict):
489 elif isinstance(entry, dict):
490 # need to recurse to an underlying block
490 # need to recurse to an underlying block
491 changed += _insert_into_block(
491 changed += _insert_into_block(
492 index, level + 1, entry, current_rev, current_hex
492 index, level + 1, entry, current_rev, current_hex
493 )
493 )
494 else:
494 else:
495 # collision with a previously unique prefix, inserting new
495 # collision with a previously unique prefix, inserting new
496 # vertices to fit both entry.
496 # vertices to fit both entry.
497 other_hex = hex(index[entry][7])
497 other_hex = hex(index[entry][7])
498 other_rev = entry
498 other_rev = entry
499 new = Block()
499 new = Block()
500 block[hex_digit] = new
500 block[hex_digit] = new
501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
503 return changed
503 return changed
504
504
505
505
506 def _persist_trie(root, existing_idx=None):
506 def _persist_trie(root, existing_idx=None):
507 """turn a nodemap trie into persistent binary data
507 """turn a nodemap trie into persistent binary data
508
508
509 See `_build_trie` for nodemap trie structure"""
509 See `_build_trie` for nodemap trie structure"""
510 block_map = {}
510 block_map = {}
511 if existing_idx is not None:
511 if existing_idx is not None:
512 base_idx = existing_idx + 1
512 base_idx = existing_idx + 1
513 else:
513 else:
514 base_idx = 0
514 base_idx = 0
515 chunks = []
515 chunks = []
516 for tn in _walk_trie(root):
516 for tn in _walk_trie(root):
517 if tn.ondisk_id is not None:
517 if tn.ondisk_id is not None:
518 block_map[id(tn)] = tn.ondisk_id
518 block_map[id(tn)] = tn.ondisk_id
519 else:
519 else:
520 block_map[id(tn)] = len(chunks) + base_idx
520 block_map[id(tn)] = len(chunks) + base_idx
521 chunks.append(_persist_block(tn, block_map))
521 chunks.append(_persist_block(tn, block_map))
522 return b''.join(chunks)
522 return b''.join(chunks)
523
523
524
524
525 def _walk_trie(block):
525 def _walk_trie(block):
526 """yield all the block in a trie
526 """yield all the block in a trie
527
527
528 Children blocks are always yield before their parent block.
528 Children blocks are always yield before their parent block.
529 """
529 """
530 for (__, item) in sorted(block.items()):
530 for (__, item) in sorted(block.items()):
531 if isinstance(item, dict):
531 if isinstance(item, dict):
532 for sub_block in _walk_trie(item):
532 for sub_block in _walk_trie(item):
533 yield sub_block
533 yield sub_block
534 yield block
534 yield block
535
535
536
536
537 def _persist_block(block_node, block_map):
537 def _persist_block(block_node, block_map):
538 """produce persistent binary data for a single block
538 """produce persistent binary data for a single block
539
539
540 Children block are assumed to be already persisted and present in
540 Children block are assumed to be already persisted and present in
541 block_map.
541 block_map.
542 """
542 """
543 data = tuple(_to_value(v, block_map) for v in block_node)
543 data = tuple(_to_value(v, block_map) for v in block_node)
544 return S_BLOCK.pack(*data)
544 return S_BLOCK.pack(*data)
545
545
546
546
547 def _to_value(item, block_map):
547 def _to_value(item, block_map):
548 """persist any value as an integer"""
548 """persist any value as an integer"""
549 if item is None:
549 if item is None:
550 return NO_ENTRY
550 return NO_ENTRY
551 elif isinstance(item, dict):
551 elif isinstance(item, dict):
552 return block_map[id(item)]
552 return block_map[id(item)]
553 else:
553 else:
554 return _transform_rev(item)
554 return _transform_rev(item)
555
555
556
556
557 def parse_data(data):
557 def parse_data(data):
558 """parse parse nodemap data into a nodemap Trie"""
558 """parse parse nodemap data into a nodemap Trie"""
559 if (len(data) % S_BLOCK.size) != 0:
559 if (len(data) % S_BLOCK.size) != 0:
560 msg = "nodemap data size is not a multiple of block size (%d): %d"
560 msg = "nodemap data size is not a multiple of block size (%d): %d"
561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
562 if not data:
562 if not data:
563 return Block(), None
563 return Block(), None
564 block_map = {}
564 block_map = {}
565 new_blocks = []
565 new_blocks = []
566 for i in range(0, len(data), S_BLOCK.size):
566 for i in range(0, len(data), S_BLOCK.size):
567 block = Block()
567 block = Block()
568 block.ondisk_id = len(block_map)
568 block.ondisk_id = len(block_map)
569 block_map[block.ondisk_id] = block
569 block_map[block.ondisk_id] = block
570 block_data = data[i : i + S_BLOCK.size]
570 block_data = data[i : i + S_BLOCK.size]
571 values = S_BLOCK.unpack(block_data)
571 values = S_BLOCK.unpack(block_data)
572 new_blocks.append((block, values))
572 new_blocks.append((block, values))
573 for b, values in new_blocks:
573 for b, values in new_blocks:
574 for idx, v in enumerate(values):
574 for idx, v in enumerate(values):
575 if v == NO_ENTRY:
575 if v == NO_ENTRY:
576 continue
576 continue
577 elif v >= 0:
577 elif v >= 0:
578 b[idx] = block_map[v]
578 b[idx] = block_map[v]
579 else:
579 else:
580 b[idx] = _transform_rev(v)
580 b[idx] = _transform_rev(v)
581 return block, i // S_BLOCK.size
581 return block, i // S_BLOCK.size
582
582
583
583
584 # debug utility
584 # debug utility
585
585
586
586
587 def check_data(ui, index, data):
587 def check_data(ui, index, data):
588 """verify that the provided nodemap data are valid for the given idex"""
588 """verify that the provided nodemap data are valid for the given idex"""
589 ret = 0
589 ret = 0
590 ui.status((b"revision in index: %d\n") % len(index))
590 ui.status((b"revision in index: %d\n") % len(index))
591 root, __ = parse_data(data)
591 root, __ = parse_data(data)
592 all_revs = set(_all_revisions(root))
592 all_revs = set(_all_revisions(root))
593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
594 for r in range(len(index)):
594 for r in range(len(index)):
595 if r not in all_revs:
595 if r not in all_revs:
596 msg = b" revision missing from nodemap: %d\n" % r
596 msg = b" revision missing from nodemap: %d\n" % r
597 ui.write_err(msg)
597 ui.write_err(msg)
598 ret = 1
598 ret = 1
599 else:
599 else:
600 all_revs.remove(r)
600 all_revs.remove(r)
601 nm_rev = _find_node(root, hex(index[r][7]))
601 nm_rev = _find_node(root, hex(index[r][7]))
602 if nm_rev is None:
602 if nm_rev is None:
603 msg = b" revision node does not match any entries: %d\n" % r
603 msg = b" revision node does not match any entries: %d\n" % r
604 ui.write_err(msg)
604 ui.write_err(msg)
605 ret = 1
605 ret = 1
606 elif nm_rev != r:
606 elif nm_rev != r:
607 msg = (
607 msg = (
608 b" revision node does not match the expected revision: "
608 b" revision node does not match the expected revision: "
609 b"%d != %d\n" % (r, nm_rev)
609 b"%d != %d\n" % (r, nm_rev)
610 )
610 )
611 ui.write_err(msg)
611 ui.write_err(msg)
612 ret = 1
612 ret = 1
613
613
614 if all_revs:
614 if all_revs:
615 for r in sorted(all_revs):
615 for r in sorted(all_revs):
616 msg = b" extra revision in nodemap: %d\n" % r
616 msg = b" extra revision in nodemap: %d\n" % r
617 ui.write_err(msg)
617 ui.write_err(msg)
618 ret = 1
618 ret = 1
619 return ret
619 return ret
620
620
621
621
622 def _all_revisions(root):
622 def _all_revisions(root):
623 """return all revisions stored in a Trie"""
623 """return all revisions stored in a Trie"""
624 for block in _walk_trie(root):
624 for block in _walk_trie(root):
625 for v in block:
625 for v in block:
626 if v is None or isinstance(v, Block):
626 if v is None or isinstance(v, Block):
627 continue
627 continue
628 yield v
628 yield v
629
629
630
630
631 def _find_node(block, node):
631 def _find_node(block, node):
632 """find the revision associated with a given node"""
632 """find the revision associated with a given node"""
633 entry = block.get(_to_int(node[0:1]))
633 entry = block.get(_to_int(node[0:1]))
634 if isinstance(entry, dict):
634 if isinstance(entry, dict):
635 return _find_node(entry, node[1:])
635 return _find_node(entry, node[1:])
636 return entry
636 return entry
General Comments 0
You need to be logged in to leave comments. Login now