##// END OF EJS Templates
nodemap: add a optional `nodemap_add_full` method on indexes...
marmoute -
r44795:7f4f7ef3 default
parent child Browse files
Show More
@@ -1,251 +1,258 b''
1 # parsers.py - Python implementation of parsers.c
1 # parsers.py - Python implementation of parsers.c
2 #
2 #
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import struct
10 import struct
11 import zlib
11 import zlib
12
12
13 from ..node import nullid, nullrev
13 from ..node import nullid, nullrev
14 from .. import (
14 from .. import (
15 pycompat,
15 pycompat,
16 util,
16 util,
17 )
17 )
18
18
19 from ..revlogutils import nodemap as nodemaputil
19 from ..revlogutils import nodemap as nodemaputil
20
20
21 stringio = pycompat.bytesio
21 stringio = pycompat.bytesio
22
22
23
23
24 _pack = struct.pack
24 _pack = struct.pack
25 _unpack = struct.unpack
25 _unpack = struct.unpack
26 _compress = zlib.compress
26 _compress = zlib.compress
27 _decompress = zlib.decompress
27 _decompress = zlib.decompress
28
28
29 # Some code below makes tuples directly because it's more convenient. However,
29 # Some code below makes tuples directly because it's more convenient. However,
30 # code outside this module should always use dirstatetuple.
30 # code outside this module should always use dirstatetuple.
31 def dirstatetuple(*x):
31 def dirstatetuple(*x):
32 # x is a tuple
32 # x is a tuple
33 return x
33 return x
34
34
35
35
36 indexformatng = b">Qiiiiii20s12x"
36 indexformatng = b">Qiiiiii20s12x"
37 indexfirst = struct.calcsize(b'Q')
37 indexfirst = struct.calcsize(b'Q')
38 sizeint = struct.calcsize(b'i')
38 sizeint = struct.calcsize(b'i')
39 indexsize = struct.calcsize(indexformatng)
39 indexsize = struct.calcsize(indexformatng)
40
40
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45
45
46 def offset_type(offset, type):
46 def offset_type(offset, type):
47 return int(int(offset) << 16 | type)
47 return int(int(offset) << 16 | type)
48
48
49
49
50 class BaseIndexObject(object):
50 class BaseIndexObject(object):
51 @property
51 @property
52 def nodemap(self):
52 def nodemap(self):
53 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
53 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
54 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
54 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
55 return self._nodemap
55 return self._nodemap
56
56
57 @util.propertycache
57 @util.propertycache
58 def _nodemap(self):
58 def _nodemap(self):
59 nodemap = nodemaputil.NodeMap({nullid: nullrev})
59 nodemap = nodemaputil.NodeMap({nullid: nullrev})
60 for r in range(0, len(self)):
60 for r in range(0, len(self)):
61 n = self[r][7]
61 n = self[r][7]
62 nodemap[n] = r
62 nodemap[n] = r
63 return nodemap
63 return nodemap
64
64
65 def has_node(self, node):
65 def has_node(self, node):
66 """return True if the node exist in the index"""
66 """return True if the node exist in the index"""
67 return node in self._nodemap
67 return node in self._nodemap
68
68
69 def rev(self, node):
69 def rev(self, node):
70 """return a revision for a node
70 """return a revision for a node
71
71
72 If the node is unknown, raise a RevlogError"""
72 If the node is unknown, raise a RevlogError"""
73 return self._nodemap[node]
73 return self._nodemap[node]
74
74
75 def get_rev(self, node):
75 def get_rev(self, node):
76 """return a revision for a node
76 """return a revision for a node
77
77
78 If the node is unknown, return None"""
78 If the node is unknown, return None"""
79 return self._nodemap.get(node)
79 return self._nodemap.get(node)
80
80
81 def _stripnodes(self, start):
81 def _stripnodes(self, start):
82 if '_nodemap' in vars(self):
82 if '_nodemap' in vars(self):
83 for r in range(start, len(self)):
83 for r in range(start, len(self)):
84 n = self[r][7]
84 n = self[r][7]
85 del self._nodemap[n]
85 del self._nodemap[n]
86
86
87 def clearcaches(self):
87 def clearcaches(self):
88 self.__dict__.pop('_nodemap', None)
88 self.__dict__.pop('_nodemap', None)
89
89
90 def __len__(self):
90 def __len__(self):
91 return self._lgt + len(self._extra)
91 return self._lgt + len(self._extra)
92
92
93 def append(self, tup):
93 def append(self, tup):
94 if '_nodemap' in vars(self):
94 if '_nodemap' in vars(self):
95 self._nodemap[tup[7]] = len(self)
95 self._nodemap[tup[7]] = len(self)
96 self._extra.append(tup)
96 self._extra.append(tup)
97
97
98 def _check_index(self, i):
98 def _check_index(self, i):
99 if not isinstance(i, int):
99 if not isinstance(i, int):
100 raise TypeError(b"expecting int indexes")
100 raise TypeError(b"expecting int indexes")
101 if i < 0 or i >= len(self):
101 if i < 0 or i >= len(self):
102 raise IndexError
102 raise IndexError
103
103
104 def __getitem__(self, i):
104 def __getitem__(self, i):
105 if i == -1:
105 if i == -1:
106 return (0, 0, 0, -1, -1, -1, -1, nullid)
106 return (0, 0, 0, -1, -1, -1, -1, nullid)
107 self._check_index(i)
107 self._check_index(i)
108 if i >= self._lgt:
108 if i >= self._lgt:
109 return self._extra[i - self._lgt]
109 return self._extra[i - self._lgt]
110 index = self._calculate_index(i)
110 index = self._calculate_index(i)
111 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
111 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
112 if i == 0:
112 if i == 0:
113 e = list(r)
113 e = list(r)
114 type = gettype(e[0])
114 type = gettype(e[0])
115 e[0] = offset_type(0, type)
115 e[0] = offset_type(0, type)
116 return tuple(e)
116 return tuple(e)
117 return r
117 return r
118
118
119
119
120 class IndexObject(BaseIndexObject):
120 class IndexObject(BaseIndexObject):
121 def __init__(self, data):
121 def __init__(self, data):
122 assert len(data) % indexsize == 0
122 assert len(data) % indexsize == 0
123 self._data = data
123 self._data = data
124 self._lgt = len(data) // indexsize
124 self._lgt = len(data) // indexsize
125 self._extra = []
125 self._extra = []
126
126
127 def _calculate_index(self, i):
127 def _calculate_index(self, i):
128 return i * indexsize
128 return i * indexsize
129
129
130 def __delitem__(self, i):
130 def __delitem__(self, i):
131 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
131 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
132 raise ValueError(b"deleting slices only supports a:-1 with step 1")
132 raise ValueError(b"deleting slices only supports a:-1 with step 1")
133 i = i.start
133 i = i.start
134 self._check_index(i)
134 self._check_index(i)
135 self._stripnodes(i)
135 self._stripnodes(i)
136 if i < self._lgt:
136 if i < self._lgt:
137 self._data = self._data[: i * indexsize]
137 self._data = self._data[: i * indexsize]
138 self._lgt = i
138 self._lgt = i
139 self._extra = []
139 self._extra = []
140 else:
140 else:
141 self._extra = self._extra[: i - self._lgt]
141 self._extra = self._extra[: i - self._lgt]
142
142
143
143
144 class PersistentNodeMapIndexObject(IndexObject):
144 class PersistentNodeMapIndexObject(IndexObject):
145 """a Debug oriented class to test persistent nodemap
145 """a Debug oriented class to test persistent nodemap
146
146
147 We need a simple python object to test API and higher level behavior. See
147 We need a simple python object to test API and higher level behavior. See
148 the Rust implementation for more serious usage. This should be used only
148 the Rust implementation for more serious usage. This should be used only
149 through the dedicated `devel.persistent-nodemap` config.
149 through the dedicated `devel.persistent-nodemap` config.
150 """
150 """
151
151
152 def nodemap_data_all(self):
153 """Return bytes containing a full serialization of a nodemap
154
155 The nodemap should be valid for the full set of revisions in the
156 index."""
157 return nodemaputil.persistent_data(self)
158
152
159
153 class InlinedIndexObject(BaseIndexObject):
160 class InlinedIndexObject(BaseIndexObject):
154 def __init__(self, data, inline=0):
161 def __init__(self, data, inline=0):
155 self._data = data
162 self._data = data
156 self._lgt = self._inline_scan(None)
163 self._lgt = self._inline_scan(None)
157 self._inline_scan(self._lgt)
164 self._inline_scan(self._lgt)
158 self._extra = []
165 self._extra = []
159
166
160 def _inline_scan(self, lgt):
167 def _inline_scan(self, lgt):
161 off = 0
168 off = 0
162 if lgt is not None:
169 if lgt is not None:
163 self._offsets = [0] * lgt
170 self._offsets = [0] * lgt
164 count = 0
171 count = 0
165 while off <= len(self._data) - indexsize:
172 while off <= len(self._data) - indexsize:
166 (s,) = struct.unpack(
173 (s,) = struct.unpack(
167 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
174 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
168 )
175 )
169 if lgt is not None:
176 if lgt is not None:
170 self._offsets[count] = off
177 self._offsets[count] = off
171 count += 1
178 count += 1
172 off += indexsize + s
179 off += indexsize + s
173 if off != len(self._data):
180 if off != len(self._data):
174 raise ValueError(b"corrupted data")
181 raise ValueError(b"corrupted data")
175 return count
182 return count
176
183
177 def __delitem__(self, i):
184 def __delitem__(self, i):
178 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
185 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
179 raise ValueError(b"deleting slices only supports a:-1 with step 1")
186 raise ValueError(b"deleting slices only supports a:-1 with step 1")
180 i = i.start
187 i = i.start
181 self._check_index(i)
188 self._check_index(i)
182 self._stripnodes(i)
189 self._stripnodes(i)
183 if i < self._lgt:
190 if i < self._lgt:
184 self._offsets = self._offsets[:i]
191 self._offsets = self._offsets[:i]
185 self._lgt = i
192 self._lgt = i
186 self._extra = []
193 self._extra = []
187 else:
194 else:
188 self._extra = self._extra[: i - self._lgt]
195 self._extra = self._extra[: i - self._lgt]
189
196
190 def _calculate_index(self, i):
197 def _calculate_index(self, i):
191 return self._offsets[i]
198 return self._offsets[i]
192
199
193
200
194 def parse_index2(data, inline):
201 def parse_index2(data, inline):
195 if not inline:
202 if not inline:
196 return IndexObject(data), None
203 return IndexObject(data), None
197 return InlinedIndexObject(data, inline), (0, data)
204 return InlinedIndexObject(data, inline), (0, data)
198
205
199
206
200 def parse_index_devel_nodemap(data, inline):
207 def parse_index_devel_nodemap(data, inline):
201 """like parse_index2, but alway return a PersistentNodeMapIndexObject
208 """like parse_index2, but alway return a PersistentNodeMapIndexObject
202 """
209 """
203 return PersistentNodeMapIndexObject(data), None
210 return PersistentNodeMapIndexObject(data), None
204
211
205
212
206 def parse_dirstate(dmap, copymap, st):
213 def parse_dirstate(dmap, copymap, st):
207 parents = [st[:20], st[20:40]]
214 parents = [st[:20], st[20:40]]
208 # dereference fields so they will be local in loop
215 # dereference fields so they will be local in loop
209 format = b">cllll"
216 format = b">cllll"
210 e_size = struct.calcsize(format)
217 e_size = struct.calcsize(format)
211 pos1 = 40
218 pos1 = 40
212 l = len(st)
219 l = len(st)
213
220
214 # the inner loop
221 # the inner loop
215 while pos1 < l:
222 while pos1 < l:
216 pos2 = pos1 + e_size
223 pos2 = pos1 + e_size
217 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
224 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
218 pos1 = pos2 + e[4]
225 pos1 = pos2 + e[4]
219 f = st[pos2:pos1]
226 f = st[pos2:pos1]
220 if b'\0' in f:
227 if b'\0' in f:
221 f, c = f.split(b'\0')
228 f, c = f.split(b'\0')
222 copymap[f] = c
229 copymap[f] = c
223 dmap[f] = e[:4]
230 dmap[f] = e[:4]
224 return parents
231 return parents
225
232
226
233
227 def pack_dirstate(dmap, copymap, pl, now):
234 def pack_dirstate(dmap, copymap, pl, now):
228 now = int(now)
235 now = int(now)
229 cs = stringio()
236 cs = stringio()
230 write = cs.write
237 write = cs.write
231 write(b"".join(pl))
238 write(b"".join(pl))
232 for f, e in pycompat.iteritems(dmap):
239 for f, e in pycompat.iteritems(dmap):
233 if e[0] == b'n' and e[3] == now:
240 if e[0] == b'n' and e[3] == now:
234 # The file was last modified "simultaneously" with the current
241 # The file was last modified "simultaneously" with the current
235 # write to dirstate (i.e. within the same second for file-
242 # write to dirstate (i.e. within the same second for file-
236 # systems with a granularity of 1 sec). This commonly happens
243 # systems with a granularity of 1 sec). This commonly happens
237 # for at least a couple of files on 'update'.
244 # for at least a couple of files on 'update'.
238 # The user could change the file without changing its size
245 # The user could change the file without changing its size
239 # within the same second. Invalidate the file's mtime in
246 # within the same second. Invalidate the file's mtime in
240 # dirstate, forcing future 'status' calls to compare the
247 # dirstate, forcing future 'status' calls to compare the
241 # contents of the file if the size is the same. This prevents
248 # contents of the file if the size is the same. This prevents
242 # mistakenly treating such files as clean.
249 # mistakenly treating such files as clean.
243 e = dirstatetuple(e[0], e[1], e[2], -1)
250 e = dirstatetuple(e[0], e[1], e[2], -1)
244 dmap[f] = e
251 dmap[f] = e
245
252
246 if f in copymap:
253 if f in copymap:
247 f = b"%s\0%s" % (f, copymap[f])
254 f = b"%s\0%s" % (f, copymap[f])
248 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
255 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
249 write(e)
256 write(e)
250 write(f)
257 write(f)
251 return cs.getvalue()
258 return cs.getvalue()
@@ -1,300 +1,303 b''
1 # nodemap.py - nodemap related code and utilities
1 # nodemap.py - nodemap related code and utilities
2 #
2 #
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 #
5 #
6 # This software may be used and distributed according to the terms of the
6 # This software may be used and distributed according to the terms of the
7 # GNU General Public License version 2 or any later version.
7 # GNU General Public License version 2 or any later version.
8
8
9 from __future__ import absolute_import
9 from __future__ import absolute_import
10
10
11 import os
11 import os
12 import re
12 import re
13 import struct
13 import struct
14
14
15 from .. import (
15 from .. import (
16 error,
16 error,
17 node as nodemod,
17 node as nodemod,
18 pycompat,
18 util,
19 )
19 )
20
20
21
21
22 class NodeMap(dict):
22 class NodeMap(dict):
23 def __missing__(self, x):
23 def __missing__(self, x):
24 raise error.RevlogError(b'unknown node: %s' % x)
24 raise error.RevlogError(b'unknown node: %s' % x)
25
25
26
26
27 def persisted_data(revlog):
27 def persisted_data(revlog):
28 """read the nodemap for a revlog from disk"""
28 """read the nodemap for a revlog from disk"""
29 if revlog.nodemap_file is None:
29 if revlog.nodemap_file is None:
30 return None
30 return None
31 pdata = revlog.opener.tryread(revlog.nodemap_file)
31 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 if not pdata:
32 if not pdata:
33 return None
33 return None
34 offset = 0
34 offset = 0
35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 if version != ONDISK_VERSION:
36 if version != ONDISK_VERSION:
37 return None
37 return None
38 offset += S_VERSION.size
38 offset += S_VERSION.size
39 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
39 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 offset += S_HEADER.size
40 offset += S_HEADER.size
41 uid = pdata[offset : offset + uuid_size]
41 uid = pdata[offset : offset + uuid_size]
42
42
43 filename = _rawdata_filepath(revlog, uid)
43 filename = _rawdata_filepath(revlog, uid)
44 return revlog.opener.tryread(filename)
44 return revlog.opener.tryread(filename)
45
45
46
46
47 def setup_persistent_nodemap(tr, revlog):
47 def setup_persistent_nodemap(tr, revlog):
48 """Install whatever is needed transaction side to persist a nodemap on disk
48 """Install whatever is needed transaction side to persist a nodemap on disk
49
49
50 (only actually persist the nodemap if this is relevant for this revlog)
50 (only actually persist the nodemap if this is relevant for this revlog)
51 """
51 """
52 if revlog._inline:
52 if revlog._inline:
53 return # inlined revlog are too small for this to be relevant
53 return # inlined revlog are too small for this to be relevant
54 if revlog.nodemap_file is None:
54 if revlog.nodemap_file is None:
55 return # we do not use persistent_nodemap on this revlog
55 return # we do not use persistent_nodemap on this revlog
56 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
56 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
57 if tr.hasfinalize(callback_id):
57 if tr.hasfinalize(callback_id):
58 return # no need to register again
58 return # no need to register again
59 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
59 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
60
60
61
61
62 def _persist_nodemap(tr, revlog):
62 def _persist_nodemap(tr, revlog):
63 """Write nodemap data on disk for a given revlog
63 """Write nodemap data on disk for a given revlog
64 """
64 """
65 if getattr(revlog, 'filteredrevs', ()):
65 if getattr(revlog, 'filteredrevs', ()):
66 raise error.ProgrammingError(
66 raise error.ProgrammingError(
67 "cannot persist nodemap of a filtered changelog"
67 "cannot persist nodemap of a filtered changelog"
68 )
68 )
69 if revlog.nodemap_file is None:
69 if revlog.nodemap_file is None:
70 msg = "calling persist nodemap on a revlog without the feature enableb"
70 msg = "calling persist nodemap on a revlog without the feature enableb"
71 raise error.ProgrammingError(msg)
71 raise error.ProgrammingError(msg)
72 data = persistent_data(revlog.index)
72 if util.safehasattr(revlog.index, "nodemap_data_all"):
73 data = revlog.index.nodemap_data_all()
74 else:
75 data = persistent_data(revlog.index)
73 uid = _make_uid()
76 uid = _make_uid()
74 datafile = _rawdata_filepath(revlog, uid)
77 datafile = _rawdata_filepath(revlog, uid)
75 olds = _other_rawdata_filepath(revlog, uid)
78 olds = _other_rawdata_filepath(revlog, uid)
76 if olds:
79 if olds:
77 realvfs = getattr(revlog, '_realopener', revlog.opener)
80 realvfs = getattr(revlog, '_realopener', revlog.opener)
78
81
79 def cleanup(tr):
82 def cleanup(tr):
80 for oldfile in olds:
83 for oldfile in olds:
81 realvfs.tryunlink(oldfile)
84 realvfs.tryunlink(oldfile)
82
85
83 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
86 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
84 tr.addpostclose(callback_id, cleanup)
87 tr.addpostclose(callback_id, cleanup)
85 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
88 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
86 # store vfs
89 # store vfs
87 with revlog.opener(datafile, b'w') as fd:
90 with revlog.opener(datafile, b'w') as fd:
88 fd.write(data)
91 fd.write(data)
89 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
92 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
90 # store vfs
93 # store vfs
91 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
94 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
92 fp.write(_serialize_docket(uid))
95 fp.write(_serialize_docket(uid))
93 # EXP-TODO: if the transaction abort, we should remove the new data and
96 # EXP-TODO: if the transaction abort, we should remove the new data and
94 # reinstall the old one.
97 # reinstall the old one.
95
98
96
99
97 ### Nodemap docket file
100 ### Nodemap docket file
98 #
101 #
99 # The nodemap data are stored on disk using 2 files:
102 # The nodemap data are stored on disk using 2 files:
100 #
103 #
101 # * a raw data files containing a persistent nodemap
104 # * a raw data files containing a persistent nodemap
102 # (see `Nodemap Trie` section)
105 # (see `Nodemap Trie` section)
103 #
106 #
104 # * a small "docket" file containing medatadata
107 # * a small "docket" file containing medatadata
105 #
108 #
106 # While the nodemap data can be multiple tens of megabytes, the "docket" is
109 # While the nodemap data can be multiple tens of megabytes, the "docket" is
107 # small, it is easy to update it automatically or to duplicated its content
110 # small, it is easy to update it automatically or to duplicated its content
108 # during a transaction.
111 # during a transaction.
109 #
112 #
110 # Multiple raw data can exist at the same time (The currently valid one and a
113 # Multiple raw data can exist at the same time (The currently valid one and a
111 # new one beind used by an in progress transaction). To accomodate this, the
114 # new one beind used by an in progress transaction). To accomodate this, the
112 # filename hosting the raw data has a variable parts. The exact filename is
115 # filename hosting the raw data has a variable parts. The exact filename is
113 # specified inside the "docket" file.
116 # specified inside the "docket" file.
114 #
117 #
115 # The docket file contains information to find, qualify and validate the raw
118 # The docket file contains information to find, qualify and validate the raw
116 # data. Its content is currently very light, but it will expand as the on disk
119 # data. Its content is currently very light, but it will expand as the on disk
117 # nodemap gains the necessary features to be used in production.
120 # nodemap gains the necessary features to be used in production.
118
121
119 # version 0 is experimental, no BC garantee, do no use outside of tests.
122 # version 0 is experimental, no BC garantee, do no use outside of tests.
120 ONDISK_VERSION = 0
123 ONDISK_VERSION = 0
121
124
122 S_VERSION = struct.Struct(">B")
125 S_VERSION = struct.Struct(">B")
123 S_HEADER = struct.Struct(">B")
126 S_HEADER = struct.Struct(">B")
124
127
125 ID_SIZE = 8
128 ID_SIZE = 8
126
129
127
130
128 def _make_uid():
131 def _make_uid():
129 """return a new unique identifier.
132 """return a new unique identifier.
130
133
131 The identifier is random and composed of ascii characters."""
134 The identifier is random and composed of ascii characters."""
132 return nodemod.hex(os.urandom(ID_SIZE))
135 return nodemod.hex(os.urandom(ID_SIZE))
133
136
134
137
135 def _serialize_docket(uid):
138 def _serialize_docket(uid):
136 """return serialized bytes for a docket using the passed uid"""
139 """return serialized bytes for a docket using the passed uid"""
137 data = []
140 data = []
138 data.append(S_VERSION.pack(ONDISK_VERSION))
141 data.append(S_VERSION.pack(ONDISK_VERSION))
139 data.append(S_HEADER.pack(len(uid)))
142 data.append(S_HEADER.pack(len(uid)))
140 data.append(uid)
143 data.append(uid)
141 return b''.join(data)
144 return b''.join(data)
142
145
143
146
144 def _rawdata_filepath(revlog, uid):
147 def _rawdata_filepath(revlog, uid):
145 """The (vfs relative) nodemap's rawdata file for a given uid"""
148 """The (vfs relative) nodemap's rawdata file for a given uid"""
146 prefix = revlog.nodemap_file[:-2]
149 prefix = revlog.nodemap_file[:-2]
147 return b"%s-%s.nd" % (prefix, uid)
150 return b"%s-%s.nd" % (prefix, uid)
148
151
149
152
150 def _other_rawdata_filepath(revlog, uid):
153 def _other_rawdata_filepath(revlog, uid):
151 prefix = revlog.nodemap_file[:-2]
154 prefix = revlog.nodemap_file[:-2]
152 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
155 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
153 new_file_path = _rawdata_filepath(revlog, uid)
156 new_file_path = _rawdata_filepath(revlog, uid)
154 new_file_name = revlog.opener.basename(new_file_path)
157 new_file_name = revlog.opener.basename(new_file_path)
155 dirpath = revlog.opener.dirname(new_file_path)
158 dirpath = revlog.opener.dirname(new_file_path)
156 others = []
159 others = []
157 for f in revlog.opener.listdir(dirpath):
160 for f in revlog.opener.listdir(dirpath):
158 if pattern.match(f) and f != new_file_name:
161 if pattern.match(f) and f != new_file_name:
159 others.append(f)
162 others.append(f)
160 return others
163 return others
161
164
162
165
163 ### Nodemap Trie
166 ### Nodemap Trie
164 #
167 #
165 # This is a simple reference implementation to compute and persist a nodemap
168 # This is a simple reference implementation to compute and persist a nodemap
166 # trie. This reference implementation is write only. The python version of this
169 # trie. This reference implementation is write only. The python version of this
167 # is not expected to be actually used, since it wont provide performance
170 # is not expected to be actually used, since it wont provide performance
168 # improvement over existing non-persistent C implementation.
171 # improvement over existing non-persistent C implementation.
169 #
172 #
170 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
173 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
171 # revision can be adressed using its node shortest prefix.
174 # revision can be adressed using its node shortest prefix.
172 #
175 #
173 # The trie is stored as a sequence of block. Each block contains 16 entries
176 # The trie is stored as a sequence of block. Each block contains 16 entries
174 # (signed 64bit integer, big endian). Each entry can be one of the following:
177 # (signed 64bit integer, big endian). Each entry can be one of the following:
175 #
178 #
176 # * value >= 0 -> index of sub-block
179 # * value >= 0 -> index of sub-block
177 # * value == -1 -> no value
180 # * value == -1 -> no value
178 # * value < -1 -> a revision value: rev = -(value+10)
181 # * value < -1 -> a revision value: rev = -(value+10)
179 #
182 #
180 # The implementation focus on simplicity, not on performance. A Rust
183 # The implementation focus on simplicity, not on performance. A Rust
181 # implementation should provide a efficient version of the same binary
184 # implementation should provide a efficient version of the same binary
182 # persistence. This reference python implementation is never meant to be
185 # persistence. This reference python implementation is never meant to be
183 # extensively use in production.
186 # extensively use in production.
184
187
185
188
186 def persistent_data(index):
189 def persistent_data(index):
187 """return the persistent binary form for a nodemap for a given index
190 """return the persistent binary form for a nodemap for a given index
188 """
191 """
189 trie = _build_trie(index)
192 trie = _build_trie(index)
190 return _persist_trie(trie)
193 return _persist_trie(trie)
191
194
192
195
193 S_BLOCK = struct.Struct(">" + ("l" * 16))
196 S_BLOCK = struct.Struct(">" + ("l" * 16))
194
197
195 NO_ENTRY = -1
198 NO_ENTRY = -1
196 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
199 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
197 REV_OFFSET = 2
200 REV_OFFSET = 2
198
201
199
202
200 def _transform_rev(rev):
203 def _transform_rev(rev):
201 """Return the number used to represent the rev in the tree.
204 """Return the number used to represent the rev in the tree.
202
205
203 (or retrieve a rev number from such representation)
206 (or retrieve a rev number from such representation)
204
207
205 Note that this is an involution, a function equal to its inverse (i.e.
208 Note that this is an involution, a function equal to its inverse (i.e.
206 which gives the identity when applied to itself).
209 which gives the identity when applied to itself).
207 """
210 """
208 return -(rev + REV_OFFSET)
211 return -(rev + REV_OFFSET)
209
212
210
213
211 def _to_int(hex_digit):
214 def _to_int(hex_digit):
212 """turn an hexadecimal digit into a proper integer"""
215 """turn an hexadecimal digit into a proper integer"""
213 return int(hex_digit, 16)
216 return int(hex_digit, 16)
214
217
215
218
216 def _build_trie(index):
219 def _build_trie(index):
217 """build a nodemap trie
220 """build a nodemap trie
218
221
219 The nodemap stores revision number for each unique prefix.
222 The nodemap stores revision number for each unique prefix.
220
223
221 Each block is a dictionary with keys in `[0, 15]`. Values are either
224 Each block is a dictionary with keys in `[0, 15]`. Values are either
222 another block or a revision number.
225 another block or a revision number.
223 """
226 """
224 root = {}
227 root = {}
225 for rev in range(len(index)):
228 for rev in range(len(index)):
226 hex = nodemod.hex(index[rev][7])
229 hex = nodemod.hex(index[rev][7])
227 _insert_into_block(index, 0, root, rev, hex)
230 _insert_into_block(index, 0, root, rev, hex)
228 return root
231 return root
229
232
230
233
231 def _insert_into_block(index, level, block, current_rev, current_hex):
234 def _insert_into_block(index, level, block, current_rev, current_hex):
232 """insert a new revision in a block
235 """insert a new revision in a block
233
236
234 index: the index we are adding revision for
237 index: the index we are adding revision for
235 level: the depth of the current block in the trie
238 level: the depth of the current block in the trie
236 block: the block currently being considered
239 block: the block currently being considered
237 current_rev: the revision number we are adding
240 current_rev: the revision number we are adding
238 current_hex: the hexadecimal representation of the of that revision
241 current_hex: the hexadecimal representation of the of that revision
239 """
242 """
240 hex_digit = _to_int(current_hex[level : level + 1])
243 hex_digit = _to_int(current_hex[level : level + 1])
241 entry = block.get(hex_digit)
244 entry = block.get(hex_digit)
242 if entry is None:
245 if entry is None:
243 # no entry, simply store the revision number
246 # no entry, simply store the revision number
244 block[hex_digit] = current_rev
247 block[hex_digit] = current_rev
245 elif isinstance(entry, dict):
248 elif isinstance(entry, dict):
246 # need to recurse to an underlying block
249 # need to recurse to an underlying block
247 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
250 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
248 else:
251 else:
249 # collision with a previously unique prefix, inserting new
252 # collision with a previously unique prefix, inserting new
250 # vertices to fit both entry.
253 # vertices to fit both entry.
251 other_hex = nodemod.hex(index[entry][7])
254 other_hex = nodemod.hex(index[entry][7])
252 other_rev = entry
255 other_rev = entry
253 new = {}
256 new = {}
254 block[hex_digit] = new
257 block[hex_digit] = new
255 _insert_into_block(index, level + 1, new, other_rev, other_hex)
258 _insert_into_block(index, level + 1, new, other_rev, other_hex)
256 _insert_into_block(index, level + 1, new, current_rev, current_hex)
259 _insert_into_block(index, level + 1, new, current_rev, current_hex)
257
260
258
261
259 def _persist_trie(root):
262 def _persist_trie(root):
260 """turn a nodemap trie into persistent binary data
263 """turn a nodemap trie into persistent binary data
261
264
262 See `_build_trie` for nodemap trie structure"""
265 See `_build_trie` for nodemap trie structure"""
263 block_map = {}
266 block_map = {}
264 chunks = []
267 chunks = []
265 for tn in _walk_trie(root):
268 for tn in _walk_trie(root):
266 block_map[id(tn)] = len(chunks)
269 block_map[id(tn)] = len(chunks)
267 chunks.append(_persist_block(tn, block_map))
270 chunks.append(_persist_block(tn, block_map))
268 return b''.join(chunks)
271 return b''.join(chunks)
269
272
270
273
271 def _walk_trie(block):
274 def _walk_trie(block):
272 """yield all the block in a trie
275 """yield all the block in a trie
273
276
274 Children blocks are always yield before their parent block.
277 Children blocks are always yield before their parent block.
275 """
278 """
276 for (_, item) in sorted(block.items()):
279 for (_, item) in sorted(block.items()):
277 if isinstance(item, dict):
280 if isinstance(item, dict):
278 for sub_block in _walk_trie(item):
281 for sub_block in _walk_trie(item):
279 yield sub_block
282 yield sub_block
280 yield block
283 yield block
281
284
282
285
283 def _persist_block(block_node, block_map):
286 def _persist_block(block_node, block_map):
284 """produce persistent binary data for a single block
287 """produce persistent binary data for a single block
285
288
286 Children block are assumed to be already persisted and present in
289 Children block are assumed to be already persisted and present in
287 block_map.
290 block_map.
288 """
291 """
289 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
292 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
290 return S_BLOCK.pack(*data)
293 return S_BLOCK.pack(*data)
291
294
292
295
293 def _to_value(item, block_map):
296 def _to_value(item, block_map):
294 """persist any value as an integer"""
297 """persist any value as an integer"""
295 if item is None:
298 if item is None:
296 return NO_ENTRY
299 return NO_ENTRY
297 elif isinstance(item, dict):
300 elif isinstance(item, dict):
298 return block_map[id(item)]
301 return block_map[id(item)]
299 else:
302 else:
300 return _transform_rev(item)
303 return _transform_rev(item)
General Comments 0
You need to be logged in to leave comments. Login now