##// END OF EJS Templates
nodemap: add a optional `nodemap_add_full` method on indexes...
marmoute -
r44795:7f4f7ef3 default
parent child Browse files
Show More
@@ -1,251 +1,258 b''
1 1 # parsers.py - Python implementation of parsers.c
2 2 #
3 3 # Copyright 2009 Matt Mackall <mpm@selenic.com> and others
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11 import zlib
12 12
13 13 from ..node import nullid, nullrev
14 14 from .. import (
15 15 pycompat,
16 16 util,
17 17 )
18 18
19 19 from ..revlogutils import nodemap as nodemaputil
20 20
21 21 stringio = pycompat.bytesio
22 22
23 23
24 24 _pack = struct.pack
25 25 _unpack = struct.unpack
26 26 _compress = zlib.compress
27 27 _decompress = zlib.decompress
28 28
29 29 # Some code below makes tuples directly because it's more convenient. However,
30 30 # code outside this module should always use dirstatetuple.
31 31 def dirstatetuple(*x):
32 32 # x is a tuple
33 33 return x
34 34
35 35
36 36 indexformatng = b">Qiiiiii20s12x"
37 37 indexfirst = struct.calcsize(b'Q')
38 38 sizeint = struct.calcsize(b'i')
39 39 indexsize = struct.calcsize(indexformatng)
40 40
41 41
42 42 def gettype(q):
43 43 return int(q & 0xFFFF)
44 44
45 45
46 46 def offset_type(offset, type):
47 47 return int(int(offset) << 16 | type)
48 48
49 49
50 50 class BaseIndexObject(object):
51 51 @property
52 52 def nodemap(self):
53 53 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
54 54 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
55 55 return self._nodemap
56 56
57 57 @util.propertycache
58 58 def _nodemap(self):
59 59 nodemap = nodemaputil.NodeMap({nullid: nullrev})
60 60 for r in range(0, len(self)):
61 61 n = self[r][7]
62 62 nodemap[n] = r
63 63 return nodemap
64 64
65 65 def has_node(self, node):
66 66 """return True if the node exist in the index"""
67 67 return node in self._nodemap
68 68
69 69 def rev(self, node):
70 70 """return a revision for a node
71 71
72 72 If the node is unknown, raise a RevlogError"""
73 73 return self._nodemap[node]
74 74
75 75 def get_rev(self, node):
76 76 """return a revision for a node
77 77
78 78 If the node is unknown, return None"""
79 79 return self._nodemap.get(node)
80 80
81 81 def _stripnodes(self, start):
82 82 if '_nodemap' in vars(self):
83 83 for r in range(start, len(self)):
84 84 n = self[r][7]
85 85 del self._nodemap[n]
86 86
87 87 def clearcaches(self):
88 88 self.__dict__.pop('_nodemap', None)
89 89
90 90 def __len__(self):
91 91 return self._lgt + len(self._extra)
92 92
93 93 def append(self, tup):
94 94 if '_nodemap' in vars(self):
95 95 self._nodemap[tup[7]] = len(self)
96 96 self._extra.append(tup)
97 97
98 98 def _check_index(self, i):
99 99 if not isinstance(i, int):
100 100 raise TypeError(b"expecting int indexes")
101 101 if i < 0 or i >= len(self):
102 102 raise IndexError
103 103
104 104 def __getitem__(self, i):
105 105 if i == -1:
106 106 return (0, 0, 0, -1, -1, -1, -1, nullid)
107 107 self._check_index(i)
108 108 if i >= self._lgt:
109 109 return self._extra[i - self._lgt]
110 110 index = self._calculate_index(i)
111 111 r = struct.unpack(indexformatng, self._data[index : index + indexsize])
112 112 if i == 0:
113 113 e = list(r)
114 114 type = gettype(e[0])
115 115 e[0] = offset_type(0, type)
116 116 return tuple(e)
117 117 return r
118 118
119 119
120 120 class IndexObject(BaseIndexObject):
121 121 def __init__(self, data):
122 122 assert len(data) % indexsize == 0
123 123 self._data = data
124 124 self._lgt = len(data) // indexsize
125 125 self._extra = []
126 126
127 127 def _calculate_index(self, i):
128 128 return i * indexsize
129 129
130 130 def __delitem__(self, i):
131 131 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
132 132 raise ValueError(b"deleting slices only supports a:-1 with step 1")
133 133 i = i.start
134 134 self._check_index(i)
135 135 self._stripnodes(i)
136 136 if i < self._lgt:
137 137 self._data = self._data[: i * indexsize]
138 138 self._lgt = i
139 139 self._extra = []
140 140 else:
141 141 self._extra = self._extra[: i - self._lgt]
142 142
143 143
144 144 class PersistentNodeMapIndexObject(IndexObject):
145 145 """a Debug oriented class to test persistent nodemap
146 146
147 147 We need a simple python object to test API and higher level behavior. See
148 148 the Rust implementation for more serious usage. This should be used only
149 149 through the dedicated `devel.persistent-nodemap` config.
150 150 """
151 151
152 def nodemap_data_all(self):
153 """Return bytes containing a full serialization of a nodemap
154
155 The nodemap should be valid for the full set of revisions in the
156 index."""
157 return nodemaputil.persistent_data(self)
158
152 159
153 160 class InlinedIndexObject(BaseIndexObject):
154 161 def __init__(self, data, inline=0):
155 162 self._data = data
156 163 self._lgt = self._inline_scan(None)
157 164 self._inline_scan(self._lgt)
158 165 self._extra = []
159 166
160 167 def _inline_scan(self, lgt):
161 168 off = 0
162 169 if lgt is not None:
163 170 self._offsets = [0] * lgt
164 171 count = 0
165 172 while off <= len(self._data) - indexsize:
166 173 (s,) = struct.unpack(
167 174 b'>i', self._data[off + indexfirst : off + sizeint + indexfirst]
168 175 )
169 176 if lgt is not None:
170 177 self._offsets[count] = off
171 178 count += 1
172 179 off += indexsize + s
173 180 if off != len(self._data):
174 181 raise ValueError(b"corrupted data")
175 182 return count
176 183
177 184 def __delitem__(self, i):
178 185 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
179 186 raise ValueError(b"deleting slices only supports a:-1 with step 1")
180 187 i = i.start
181 188 self._check_index(i)
182 189 self._stripnodes(i)
183 190 if i < self._lgt:
184 191 self._offsets = self._offsets[:i]
185 192 self._lgt = i
186 193 self._extra = []
187 194 else:
188 195 self._extra = self._extra[: i - self._lgt]
189 196
190 197 def _calculate_index(self, i):
191 198 return self._offsets[i]
192 199
193 200
194 201 def parse_index2(data, inline):
195 202 if not inline:
196 203 return IndexObject(data), None
197 204 return InlinedIndexObject(data, inline), (0, data)
198 205
199 206
200 207 def parse_index_devel_nodemap(data, inline):
201 208 """like parse_index2, but alway return a PersistentNodeMapIndexObject
202 209 """
203 210 return PersistentNodeMapIndexObject(data), None
204 211
205 212
206 213 def parse_dirstate(dmap, copymap, st):
207 214 parents = [st[:20], st[20:40]]
208 215 # dereference fields so they will be local in loop
209 216 format = b">cllll"
210 217 e_size = struct.calcsize(format)
211 218 pos1 = 40
212 219 l = len(st)
213 220
214 221 # the inner loop
215 222 while pos1 < l:
216 223 pos2 = pos1 + e_size
217 224 e = _unpack(b">cllll", st[pos1:pos2]) # a literal here is faster
218 225 pos1 = pos2 + e[4]
219 226 f = st[pos2:pos1]
220 227 if b'\0' in f:
221 228 f, c = f.split(b'\0')
222 229 copymap[f] = c
223 230 dmap[f] = e[:4]
224 231 return parents
225 232
226 233
227 234 def pack_dirstate(dmap, copymap, pl, now):
228 235 now = int(now)
229 236 cs = stringio()
230 237 write = cs.write
231 238 write(b"".join(pl))
232 239 for f, e in pycompat.iteritems(dmap):
233 240 if e[0] == b'n' and e[3] == now:
234 241 # The file was last modified "simultaneously" with the current
235 242 # write to dirstate (i.e. within the same second for file-
236 243 # systems with a granularity of 1 sec). This commonly happens
237 244 # for at least a couple of files on 'update'.
238 245 # The user could change the file without changing its size
239 246 # within the same second. Invalidate the file's mtime in
240 247 # dirstate, forcing future 'status' calls to compare the
241 248 # contents of the file if the size is the same. This prevents
242 249 # mistakenly treating such files as clean.
243 250 e = dirstatetuple(e[0], e[1], e[2], -1)
244 251 dmap[f] = e
245 252
246 253 if f in copymap:
247 254 f = b"%s\0%s" % (f, copymap[f])
248 255 e = _pack(b">cllll", e[0], e[1], e[2], e[3], len(f))
249 256 write(e)
250 257 write(f)
251 258 return cs.getvalue()
@@ -1,300 +1,303 b''
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import re
13 13 import struct
14 14
15 15 from .. import (
16 16 error,
17 17 node as nodemod,
18 pycompat,
18 util,
19 19 )
20 20
21 21
22 22 class NodeMap(dict):
23 23 def __missing__(self, x):
24 24 raise error.RevlogError(b'unknown node: %s' % x)
25 25
26 26
27 27 def persisted_data(revlog):
28 28 """read the nodemap for a revlog from disk"""
29 29 if revlog.nodemap_file is None:
30 30 return None
31 31 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 32 if not pdata:
33 33 return None
34 34 offset = 0
35 35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 36 if version != ONDISK_VERSION:
37 37 return None
38 38 offset += S_VERSION.size
39 39 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 40 offset += S_HEADER.size
41 41 uid = pdata[offset : offset + uuid_size]
42 42
43 43 filename = _rawdata_filepath(revlog, uid)
44 44 return revlog.opener.tryread(filename)
45 45
46 46
47 47 def setup_persistent_nodemap(tr, revlog):
48 48 """Install whatever is needed transaction side to persist a nodemap on disk
49 49
50 50 (only actually persist the nodemap if this is relevant for this revlog)
51 51 """
52 52 if revlog._inline:
53 53 return # inlined revlog are too small for this to be relevant
54 54 if revlog.nodemap_file is None:
55 55 return # we do not use persistent_nodemap on this revlog
56 56 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
57 57 if tr.hasfinalize(callback_id):
58 58 return # no need to register again
59 59 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
60 60
61 61
62 62 def _persist_nodemap(tr, revlog):
63 63 """Write nodemap data on disk for a given revlog
64 64 """
65 65 if getattr(revlog, 'filteredrevs', ()):
66 66 raise error.ProgrammingError(
67 67 "cannot persist nodemap of a filtered changelog"
68 68 )
69 69 if revlog.nodemap_file is None:
70 70 msg = "calling persist nodemap on a revlog without the feature enableb"
71 71 raise error.ProgrammingError(msg)
72 data = persistent_data(revlog.index)
72 if util.safehasattr(revlog.index, "nodemap_data_all"):
73 data = revlog.index.nodemap_data_all()
74 else:
75 data = persistent_data(revlog.index)
73 76 uid = _make_uid()
74 77 datafile = _rawdata_filepath(revlog, uid)
75 78 olds = _other_rawdata_filepath(revlog, uid)
76 79 if olds:
77 80 realvfs = getattr(revlog, '_realopener', revlog.opener)
78 81
79 82 def cleanup(tr):
80 83 for oldfile in olds:
81 84 realvfs.tryunlink(oldfile)
82 85
83 86 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
84 87 tr.addpostclose(callback_id, cleanup)
85 88 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
86 89 # store vfs
87 90 with revlog.opener(datafile, b'w') as fd:
88 91 fd.write(data)
89 92 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
90 93 # store vfs
91 94 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
92 95 fp.write(_serialize_docket(uid))
93 96 # EXP-TODO: if the transaction abort, we should remove the new data and
94 97 # reinstall the old one.
95 98
96 99
97 100 ### Nodemap docket file
98 101 #
99 102 # The nodemap data are stored on disk using 2 files:
100 103 #
101 104 # * a raw data files containing a persistent nodemap
102 105 # (see `Nodemap Trie` section)
103 106 #
104 107 # * a small "docket" file containing medatadata
105 108 #
106 109 # While the nodemap data can be multiple tens of megabytes, the "docket" is
107 110 # small, it is easy to update it automatically or to duplicated its content
108 111 # during a transaction.
109 112 #
110 113 # Multiple raw data can exist at the same time (The currently valid one and a
111 114 # new one beind used by an in progress transaction). To accomodate this, the
112 115 # filename hosting the raw data has a variable parts. The exact filename is
113 116 # specified inside the "docket" file.
114 117 #
115 118 # The docket file contains information to find, qualify and validate the raw
116 119 # data. Its content is currently very light, but it will expand as the on disk
117 120 # nodemap gains the necessary features to be used in production.
118 121
119 122 # version 0 is experimental, no BC garantee, do no use outside of tests.
120 123 ONDISK_VERSION = 0
121 124
122 125 S_VERSION = struct.Struct(">B")
123 126 S_HEADER = struct.Struct(">B")
124 127
125 128 ID_SIZE = 8
126 129
127 130
128 131 def _make_uid():
129 132 """return a new unique identifier.
130 133
131 134 The identifier is random and composed of ascii characters."""
132 135 return nodemod.hex(os.urandom(ID_SIZE))
133 136
134 137
135 138 def _serialize_docket(uid):
136 139 """return serialized bytes for a docket using the passed uid"""
137 140 data = []
138 141 data.append(S_VERSION.pack(ONDISK_VERSION))
139 142 data.append(S_HEADER.pack(len(uid)))
140 143 data.append(uid)
141 144 return b''.join(data)
142 145
143 146
144 147 def _rawdata_filepath(revlog, uid):
145 148 """The (vfs relative) nodemap's rawdata file for a given uid"""
146 149 prefix = revlog.nodemap_file[:-2]
147 150 return b"%s-%s.nd" % (prefix, uid)
148 151
149 152
150 153 def _other_rawdata_filepath(revlog, uid):
151 154 prefix = revlog.nodemap_file[:-2]
152 155 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
153 156 new_file_path = _rawdata_filepath(revlog, uid)
154 157 new_file_name = revlog.opener.basename(new_file_path)
155 158 dirpath = revlog.opener.dirname(new_file_path)
156 159 others = []
157 160 for f in revlog.opener.listdir(dirpath):
158 161 if pattern.match(f) and f != new_file_name:
159 162 others.append(f)
160 163 return others
161 164
162 165
163 166 ### Nodemap Trie
164 167 #
165 168 # This is a simple reference implementation to compute and persist a nodemap
166 169 # trie. This reference implementation is write only. The python version of this
167 170 # is not expected to be actually used, since it wont provide performance
168 171 # improvement over existing non-persistent C implementation.
169 172 #
170 173 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
171 174 # revision can be adressed using its node shortest prefix.
172 175 #
173 176 # The trie is stored as a sequence of block. Each block contains 16 entries
174 177 # (signed 64bit integer, big endian). Each entry can be one of the following:
175 178 #
176 179 # * value >= 0 -> index of sub-block
177 180 # * value == -1 -> no value
178 181 # * value < -1 -> a revision value: rev = -(value+10)
179 182 #
180 183 # The implementation focus on simplicity, not on performance. A Rust
181 184 # implementation should provide a efficient version of the same binary
182 185 # persistence. This reference python implementation is never meant to be
183 186 # extensively use in production.
184 187
185 188
186 189 def persistent_data(index):
187 190 """return the persistent binary form for a nodemap for a given index
188 191 """
189 192 trie = _build_trie(index)
190 193 return _persist_trie(trie)
191 194
192 195
193 196 S_BLOCK = struct.Struct(">" + ("l" * 16))
194 197
195 198 NO_ENTRY = -1
196 199 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
197 200 REV_OFFSET = 2
198 201
199 202
200 203 def _transform_rev(rev):
201 204 """Return the number used to represent the rev in the tree.
202 205
203 206 (or retrieve a rev number from such representation)
204 207
205 208 Note that this is an involution, a function equal to its inverse (i.e.
206 209 which gives the identity when applied to itself).
207 210 """
208 211 return -(rev + REV_OFFSET)
209 212
210 213
211 214 def _to_int(hex_digit):
212 215 """turn an hexadecimal digit into a proper integer"""
213 216 return int(hex_digit, 16)
214 217
215 218
216 219 def _build_trie(index):
217 220 """build a nodemap trie
218 221
219 222 The nodemap stores revision number for each unique prefix.
220 223
221 224 Each block is a dictionary with keys in `[0, 15]`. Values are either
222 225 another block or a revision number.
223 226 """
224 227 root = {}
225 228 for rev in range(len(index)):
226 229 hex = nodemod.hex(index[rev][7])
227 230 _insert_into_block(index, 0, root, rev, hex)
228 231 return root
229 232
230 233
231 234 def _insert_into_block(index, level, block, current_rev, current_hex):
232 235 """insert a new revision in a block
233 236
234 237 index: the index we are adding revision for
235 238 level: the depth of the current block in the trie
236 239 block: the block currently being considered
237 240 current_rev: the revision number we are adding
238 241 current_hex: the hexadecimal representation of the of that revision
239 242 """
240 243 hex_digit = _to_int(current_hex[level : level + 1])
241 244 entry = block.get(hex_digit)
242 245 if entry is None:
243 246 # no entry, simply store the revision number
244 247 block[hex_digit] = current_rev
245 248 elif isinstance(entry, dict):
246 249 # need to recurse to an underlying block
247 250 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
248 251 else:
249 252 # collision with a previously unique prefix, inserting new
250 253 # vertices to fit both entry.
251 254 other_hex = nodemod.hex(index[entry][7])
252 255 other_rev = entry
253 256 new = {}
254 257 block[hex_digit] = new
255 258 _insert_into_block(index, level + 1, new, other_rev, other_hex)
256 259 _insert_into_block(index, level + 1, new, current_rev, current_hex)
257 260
258 261
259 262 def _persist_trie(root):
260 263 """turn a nodemap trie into persistent binary data
261 264
262 265 See `_build_trie` for nodemap trie structure"""
263 266 block_map = {}
264 267 chunks = []
265 268 for tn in _walk_trie(root):
266 269 block_map[id(tn)] = len(chunks)
267 270 chunks.append(_persist_block(tn, block_map))
268 271 return b''.join(chunks)
269 272
270 273
271 274 def _walk_trie(block):
272 275 """yield all the block in a trie
273 276
274 277 Children blocks are always yield before their parent block.
275 278 """
276 279 for (_, item) in sorted(block.items()):
277 280 if isinstance(item, dict):
278 281 for sub_block in _walk_trie(item):
279 282 yield sub_block
280 283 yield block
281 284
282 285
283 286 def _persist_block(block_node, block_map):
284 287 """produce persistent binary data for a single block
285 288
286 289 Children block are assumed to be already persisted and present in
287 290 block_map.
288 291 """
289 292 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
290 293 return S_BLOCK.pack(*data)
291 294
292 295
293 296 def _to_value(item, block_map):
294 297 """persist any value as an integer"""
295 298 if item is None:
296 299 return NO_ENTRY
297 300 elif isinstance(item, dict):
298 301 return block_map[id(item)]
299 302 else:
300 303 return _transform_rev(item)
General Comments 0
You need to be logged in to leave comments. Login now