##// END OF EJS Templates
nodemap: move the iteratio inside the Block object...
marmoute -
r44797:55b12f25 default
parent child Browse files
Show More
@@ -1,309 +1,312 b''
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import os
12 12 import re
13 13 import struct
14 14
15 15 from .. import (
16 16 error,
17 17 node as nodemod,
18 18 util,
19 19 )
20 20
21 21
22 22 class NodeMap(dict):
23 23 def __missing__(self, x):
24 24 raise error.RevlogError(b'unknown node: %s' % x)
25 25
26 26
27 27 def persisted_data(revlog):
28 28 """read the nodemap for a revlog from disk"""
29 29 if revlog.nodemap_file is None:
30 30 return None
31 31 pdata = revlog.opener.tryread(revlog.nodemap_file)
32 32 if not pdata:
33 33 return None
34 34 offset = 0
35 35 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
36 36 if version != ONDISK_VERSION:
37 37 return None
38 38 offset += S_VERSION.size
39 39 (uuid_size,) = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
40 40 offset += S_HEADER.size
41 41 uid = pdata[offset : offset + uuid_size]
42 42
43 43 filename = _rawdata_filepath(revlog, uid)
44 44 return revlog.opener.tryread(filename)
45 45
46 46
47 47 def setup_persistent_nodemap(tr, revlog):
48 48 """Install whatever is needed transaction side to persist a nodemap on disk
49 49
50 50 (only actually persist the nodemap if this is relevant for this revlog)
51 51 """
52 52 if revlog._inline:
53 53 return # inlined revlog are too small for this to be relevant
54 54 if revlog.nodemap_file is None:
55 55 return # we do not use persistent_nodemap on this revlog
56 56 callback_id = b"revlog-persistent-nodemap-%s" % revlog.nodemap_file
57 57 if tr.hasfinalize(callback_id):
58 58 return # no need to register again
59 59 tr.addfinalize(callback_id, lambda tr: _persist_nodemap(tr, revlog))
60 60
61 61
62 62 def _persist_nodemap(tr, revlog):
63 63 """Write nodemap data on disk for a given revlog
64 64 """
65 65 if getattr(revlog, 'filteredrevs', ()):
66 66 raise error.ProgrammingError(
67 67 "cannot persist nodemap of a filtered changelog"
68 68 )
69 69 if revlog.nodemap_file is None:
70 70 msg = "calling persist nodemap on a revlog without the feature enableb"
71 71 raise error.ProgrammingError(msg)
72 72 if util.safehasattr(revlog.index, "nodemap_data_all"):
73 73 data = revlog.index.nodemap_data_all()
74 74 else:
75 75 data = persistent_data(revlog.index)
76 76 uid = _make_uid()
77 77 datafile = _rawdata_filepath(revlog, uid)
78 78 olds = _other_rawdata_filepath(revlog, uid)
79 79 if olds:
80 80 realvfs = getattr(revlog, '_realopener', revlog.opener)
81 81
82 82 def cleanup(tr):
83 83 for oldfile in olds:
84 84 realvfs.tryunlink(oldfile)
85 85
86 86 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
87 87 tr.addpostclose(callback_id, cleanup)
88 88 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
89 89 # store vfs
90 90 with revlog.opener(datafile, b'w') as fd:
91 91 fd.write(data)
92 92 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
93 93 # store vfs
94 94 with revlog.opener(revlog.nodemap_file, b'w', atomictemp=True) as fp:
95 95 fp.write(_serialize_docket(uid))
96 96 # EXP-TODO: if the transaction abort, we should remove the new data and
97 97 # reinstall the old one.
98 98
99 99
100 100 ### Nodemap docket file
101 101 #
102 102 # The nodemap data are stored on disk using 2 files:
103 103 #
104 104 # * a raw data files containing a persistent nodemap
105 105 # (see `Nodemap Trie` section)
106 106 #
107 107 # * a small "docket" file containing medatadata
108 108 #
109 109 # While the nodemap data can be multiple tens of megabytes, the "docket" is
110 110 # small, it is easy to update it automatically or to duplicated its content
111 111 # during a transaction.
112 112 #
113 113 # Multiple raw data can exist at the same time (The currently valid one and a
114 114 # new one beind used by an in progress transaction). To accomodate this, the
115 115 # filename hosting the raw data has a variable parts. The exact filename is
116 116 # specified inside the "docket" file.
117 117 #
118 118 # The docket file contains information to find, qualify and validate the raw
119 119 # data. Its content is currently very light, but it will expand as the on disk
120 120 # nodemap gains the necessary features to be used in production.
121 121
122 122 # version 0 is experimental, no BC garantee, do no use outside of tests.
123 123 ONDISK_VERSION = 0
124 124
125 125 S_VERSION = struct.Struct(">B")
126 126 S_HEADER = struct.Struct(">B")
127 127
128 128 ID_SIZE = 8
129 129
130 130
131 131 def _make_uid():
132 132 """return a new unique identifier.
133 133
134 134 The identifier is random and composed of ascii characters."""
135 135 return nodemod.hex(os.urandom(ID_SIZE))
136 136
137 137
138 138 def _serialize_docket(uid):
139 139 """return serialized bytes for a docket using the passed uid"""
140 140 data = []
141 141 data.append(S_VERSION.pack(ONDISK_VERSION))
142 142 data.append(S_HEADER.pack(len(uid)))
143 143 data.append(uid)
144 144 return b''.join(data)
145 145
146 146
147 147 def _rawdata_filepath(revlog, uid):
148 148 """The (vfs relative) nodemap's rawdata file for a given uid"""
149 149 prefix = revlog.nodemap_file[:-2]
150 150 return b"%s-%s.nd" % (prefix, uid)
151 151
152 152
153 153 def _other_rawdata_filepath(revlog, uid):
154 154 prefix = revlog.nodemap_file[:-2]
155 155 pattern = re.compile(b"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
156 156 new_file_path = _rawdata_filepath(revlog, uid)
157 157 new_file_name = revlog.opener.basename(new_file_path)
158 158 dirpath = revlog.opener.dirname(new_file_path)
159 159 others = []
160 160 for f in revlog.opener.listdir(dirpath):
161 161 if pattern.match(f) and f != new_file_name:
162 162 others.append(f)
163 163 return others
164 164
165 165
166 166 ### Nodemap Trie
167 167 #
168 168 # This is a simple reference implementation to compute and persist a nodemap
169 169 # trie. This reference implementation is write only. The python version of this
170 170 # is not expected to be actually used, since it wont provide performance
171 171 # improvement over existing non-persistent C implementation.
172 172 #
173 173 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
174 174 # revision can be adressed using its node shortest prefix.
175 175 #
176 176 # The trie is stored as a sequence of block. Each block contains 16 entries
177 177 # (signed 64bit integer, big endian). Each entry can be one of the following:
178 178 #
179 179 # * value >= 0 -> index of sub-block
180 180 # * value == -1 -> no value
181 181 # * value < -1 -> a revision value: rev = -(value+10)
182 182 #
183 183 # The implementation focus on simplicity, not on performance. A Rust
184 184 # implementation should provide a efficient version of the same binary
185 185 # persistence. This reference python implementation is never meant to be
186 186 # extensively use in production.
187 187
188 188
189 189 def persistent_data(index):
190 190 """return the persistent binary form for a nodemap for a given index
191 191 """
192 192 trie = _build_trie(index)
193 193 return _persist_trie(trie)
194 194
195 195
196 196 S_BLOCK = struct.Struct(">" + ("l" * 16))
197 197
198 198 NO_ENTRY = -1
199 199 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
200 200 REV_OFFSET = 2
201 201
202 202
203 203 def _transform_rev(rev):
204 204 """Return the number used to represent the rev in the tree.
205 205
206 206 (or retrieve a rev number from such representation)
207 207
208 208 Note that this is an involution, a function equal to its inverse (i.e.
209 209 which gives the identity when applied to itself).
210 210 """
211 211 return -(rev + REV_OFFSET)
212 212
213 213
214 214 def _to_int(hex_digit):
215 215 """turn an hexadecimal digit into a proper integer"""
216 216 return int(hex_digit, 16)
217 217
218 218
219 219 class Block(dict):
220 220 """represent a block of the Trie
221 221
222 222 contains up to 16 entry indexed from 0 to 15"""
223 223
224 def __iter__(self):
225 return iter(self.get(i) for i in range(16))
226
224 227
225 228 def _build_trie(index):
226 229 """build a nodemap trie
227 230
228 231 The nodemap stores revision number for each unique prefix.
229 232
230 233 Each block is a dictionary with keys in `[0, 15]`. Values are either
231 234 another block or a revision number.
232 235 """
233 236 root = Block()
234 237 for rev in range(len(index)):
235 238 hex = nodemod.hex(index[rev][7])
236 239 _insert_into_block(index, 0, root, rev, hex)
237 240 return root
238 241
239 242
240 243 def _insert_into_block(index, level, block, current_rev, current_hex):
241 244 """insert a new revision in a block
242 245
243 246 index: the index we are adding revision for
244 247 level: the depth of the current block in the trie
245 248 block: the block currently being considered
246 249 current_rev: the revision number we are adding
247 250 current_hex: the hexadecimal representation of the of that revision
248 251 """
249 252 hex_digit = _to_int(current_hex[level : level + 1])
250 253 entry = block.get(hex_digit)
251 254 if entry is None:
252 255 # no entry, simply store the revision number
253 256 block[hex_digit] = current_rev
254 257 elif isinstance(entry, dict):
255 258 # need to recurse to an underlying block
256 259 _insert_into_block(index, level + 1, entry, current_rev, current_hex)
257 260 else:
258 261 # collision with a previously unique prefix, inserting new
259 262 # vertices to fit both entry.
260 263 other_hex = nodemod.hex(index[entry][7])
261 264 other_rev = entry
262 265 new = Block()
263 266 block[hex_digit] = new
264 267 _insert_into_block(index, level + 1, new, other_rev, other_hex)
265 268 _insert_into_block(index, level + 1, new, current_rev, current_hex)
266 269
267 270
268 271 def _persist_trie(root):
269 272 """turn a nodemap trie into persistent binary data
270 273
271 274 See `_build_trie` for nodemap trie structure"""
272 275 block_map = {}
273 276 chunks = []
274 277 for tn in _walk_trie(root):
275 278 block_map[id(tn)] = len(chunks)
276 279 chunks.append(_persist_block(tn, block_map))
277 280 return b''.join(chunks)
278 281
279 282
280 283 def _walk_trie(block):
281 284 """yield all the block in a trie
282 285
283 286 Children blocks are always yield before their parent block.
284 287 """
285 288 for (_, item) in sorted(block.items()):
286 289 if isinstance(item, dict):
287 290 for sub_block in _walk_trie(item):
288 291 yield sub_block
289 292 yield block
290 293
291 294
292 295 def _persist_block(block_node, block_map):
293 296 """produce persistent binary data for a single block
294 297
295 298 Children block are assumed to be already persisted and present in
296 299 block_map.
297 300 """
298 data = tuple(_to_value(block_node.get(i), block_map) for i in range(16))
301 data = tuple(_to_value(v, block_map) for v in block_node)
299 302 return S_BLOCK.pack(*data)
300 303
301 304
302 305 def _to_value(item, block_map):
303 306 """persist any value as an integer"""
304 307 if item is None:
305 308 return NO_ENTRY
306 309 elif isinstance(item, dict):
307 310 return block_map[id(item)]
308 311 else:
309 312 return _transform_rev(item)
General Comments 0
You need to be logged in to leave comments. Login now