##// END OF EJS Templates
nodemap: fix a typo in error message...
Pulkit Goyal -
r47197:dadb4db5 default
parent child Browse files
Show More
@@ -1,647 +1,647 b''
1 1 # nodemap.py - nodemap related code and utilities
2 2 #
3 3 # Copyright 2019 Pierre-Yves David <pierre-yves.david@octobus.net>
4 4 # Copyright 2019 George Racinet <georges.racinet@octobus.net>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import absolute_import
10 10
11 11 import errno
12 12 import os
13 13 import re
14 14 import struct
15 15
16 16 from ..node import hex
17 17
18 18 from .. import (
19 19 error,
20 20 util,
21 21 )
22 22
23 23
24 24 class NodeMap(dict):
25 25 def __missing__(self, x):
26 26 raise error.RevlogError(b'unknown node: %s' % x)
27 27
28 28
29 29 def persisted_data(revlog):
30 30 """read the nodemap for a revlog from disk"""
31 31 if revlog.nodemap_file is None:
32 32 return None
33 33 pdata = revlog.opener.tryread(revlog.nodemap_file)
34 34 if not pdata:
35 35 return None
36 36 offset = 0
37 37 (version,) = S_VERSION.unpack(pdata[offset : offset + S_VERSION.size])
38 38 if version != ONDISK_VERSION:
39 39 return None
40 40 offset += S_VERSION.size
41 41 headers = S_HEADER.unpack(pdata[offset : offset + S_HEADER.size])
42 42 uid_size, tip_rev, data_length, data_unused, tip_node_size = headers
43 43 offset += S_HEADER.size
44 44 docket = NodeMapDocket(pdata[offset : offset + uid_size])
45 45 offset += uid_size
46 46 docket.tip_rev = tip_rev
47 47 docket.tip_node = pdata[offset : offset + tip_node_size]
48 48 docket.data_length = data_length
49 49 docket.data_unused = data_unused
50 50
51 51 filename = _rawdata_filepath(revlog, docket)
52 52 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
53 53 try:
54 54 with revlog.opener(filename) as fd:
55 55 if use_mmap:
56 56 data = util.buffer(util.mmapread(fd, data_length))
57 57 else:
58 58 data = fd.read(data_length)
59 59 except (IOError, OSError) as e:
60 60 if e.errno == errno.ENOENT:
61 61 return None
62 62 else:
63 63 raise
64 64 if len(data) < data_length:
65 65 return None
66 66 return docket, data
67 67
68 68
69 69 def setup_persistent_nodemap(tr, revlog):
70 70 """Install whatever is needed transaction side to persist a nodemap on disk
71 71
72 72 (only actually persist the nodemap if this is relevant for this revlog)
73 73 """
74 74 if revlog._inline:
75 75 return # inlined revlog are too small for this to be relevant
76 76 if revlog.nodemap_file is None:
77 77 return # we do not use persistent_nodemap on this revlog
78 78
79 79 # we need to happen after the changelog finalization, in that use "cl-"
80 80 callback_id = b"nm-revlog-persistent-nodemap-%s" % revlog.nodemap_file
81 81 if tr.hasfinalize(callback_id):
82 82 return # no need to register again
83 83 tr.addpending(
84 84 callback_id, lambda tr: persist_nodemap(tr, revlog, pending=True)
85 85 )
86 86 tr.addfinalize(callback_id, lambda tr: persist_nodemap(tr, revlog))
87 87
88 88
89 89 class _NoTransaction(object):
90 90 """transaction like object to update the nodemap outside a transaction"""
91 91
92 92 def __init__(self):
93 93 self._postclose = {}
94 94
95 95 def addpostclose(self, callback_id, callback_func):
96 96 self._postclose[callback_id] = callback_func
97 97
98 98 def registertmp(self, *args, **kwargs):
99 99 pass
100 100
101 101 def addbackup(self, *args, **kwargs):
102 102 pass
103 103
104 104 def add(self, *args, **kwargs):
105 105 pass
106 106
107 107 def addabort(self, *args, **kwargs):
108 108 pass
109 109
110 110 def _report(self, *args):
111 111 pass
112 112
113 113
114 114 def update_persistent_nodemap(revlog):
115 115 """update the persistent nodemap right now
116 116
117 117 To be used for updating the nodemap on disk outside of a normal transaction
118 118 setup (eg, `debugupdatecache`).
119 119 """
120 120 if revlog._inline:
121 121 return # inlined revlog are too small for this to be relevant
122 122 if revlog.nodemap_file is None:
123 123 return # we do not use persistent_nodemap on this revlog
124 124
125 125 notr = _NoTransaction()
126 126 persist_nodemap(notr, revlog)
127 127 for k in sorted(notr._postclose):
128 128 notr._postclose[k](None)
129 129
130 130
131 131 def persist_nodemap(tr, revlog, pending=False):
132 132 """Write nodemap data on disk for a given revlog"""
133 133 if getattr(revlog, 'filteredrevs', ()):
134 134 raise error.ProgrammingError(
135 135 "cannot persist nodemap of a filtered changelog"
136 136 )
137 137 if revlog.nodemap_file is None:
138 msg = "calling persist nodemap on a revlog without the feature enableb"
138 msg = "calling persist nodemap on a revlog without the feature enabled"
139 139 raise error.ProgrammingError(msg)
140 140
141 141 can_incremental = util.safehasattr(revlog.index, "nodemap_data_incremental")
142 142 ondisk_docket = revlog._nodemap_docket
143 143 feed_data = util.safehasattr(revlog.index, "update_nodemap_data")
144 144 use_mmap = revlog.opener.options.get(b"persistent-nodemap.mmap")
145 145
146 146 data = None
147 147 # first attemp an incremental update of the data
148 148 if can_incremental and ondisk_docket is not None:
149 149 target_docket = revlog._nodemap_docket.copy()
150 150 (
151 151 src_docket,
152 152 data_changed_count,
153 153 data,
154 154 ) = revlog.index.nodemap_data_incremental()
155 155 new_length = target_docket.data_length + len(data)
156 156 new_unused = target_docket.data_unused + data_changed_count
157 157 if src_docket != target_docket:
158 158 data = None
159 159 elif new_length <= (new_unused * 10): # under 10% of unused data
160 160 data = None
161 161 else:
162 162 datafile = _rawdata_filepath(revlog, target_docket)
163 163 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
164 164 # store vfs
165 165 tr.add(datafile, target_docket.data_length)
166 166 with revlog.opener(datafile, b'r+') as fd:
167 167 fd.seek(target_docket.data_length)
168 168 fd.write(data)
169 169 if feed_data:
170 170 if use_mmap:
171 171 fd.seek(0)
172 172 new_data = fd.read(new_length)
173 173 else:
174 174 fd.flush()
175 175 new_data = util.buffer(util.mmapread(fd, new_length))
176 176 target_docket.data_length = new_length
177 177 target_docket.data_unused = new_unused
178 178
179 179 if data is None:
180 180 # otherwise fallback to a full new export
181 181 target_docket = NodeMapDocket()
182 182 datafile = _rawdata_filepath(revlog, target_docket)
183 183 if util.safehasattr(revlog.index, "nodemap_data_all"):
184 184 data = revlog.index.nodemap_data_all()
185 185 else:
186 186 data = persistent_data(revlog.index)
187 187 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
188 188 # store vfs
189 189
190 190 tryunlink = revlog.opener.tryunlink
191 191
192 192 def abortck(tr):
193 193 tryunlink(datafile)
194 194
195 195 callback_id = b"delete-%s" % datafile
196 196
197 197 # some flavor of the transaction abort does not cleanup new file, it
198 198 # simply empty them.
199 199 tr.addabort(callback_id, abortck)
200 200 with revlog.opener(datafile, b'w+') as fd:
201 201 fd.write(data)
202 202 if feed_data:
203 203 if use_mmap:
204 204 new_data = data
205 205 else:
206 206 fd.flush()
207 207 new_data = util.buffer(util.mmapread(fd, len(data)))
208 208 target_docket.data_length = len(data)
209 209 target_docket.tip_rev = revlog.tiprev()
210 210 target_docket.tip_node = revlog.node(target_docket.tip_rev)
211 211 # EXP-TODO: if this is a cache, this should use a cache vfs, not a
212 212 # store vfs
213 213 file_path = revlog.nodemap_file
214 214 if pending:
215 215 file_path += b'.a'
216 216 tr.registertmp(file_path)
217 217 else:
218 218 tr.addbackup(file_path)
219 219
220 220 with revlog.opener(file_path, b'w', atomictemp=True) as fp:
221 221 fp.write(target_docket.serialize())
222 222 revlog._nodemap_docket = target_docket
223 223 if feed_data:
224 224 revlog.index.update_nodemap_data(target_docket, new_data)
225 225
226 226 # search for old index file in all cases, some older process might have
227 227 # left one behind.
228 228 olds = _other_rawdata_filepath(revlog, target_docket)
229 229 if olds:
230 230 realvfs = getattr(revlog, '_realopener', revlog.opener)
231 231
232 232 def cleanup(tr):
233 233 for oldfile in olds:
234 234 realvfs.tryunlink(oldfile)
235 235
236 236 callback_id = b"revlog-cleanup-nodemap-%s" % revlog.nodemap_file
237 237 tr.addpostclose(callback_id, cleanup)
238 238
239 239
240 240 ### Nodemap docket file
241 241 #
242 242 # The nodemap data are stored on disk using 2 files:
243 243 #
244 244 # * a raw data files containing a persistent nodemap
245 245 # (see `Nodemap Trie` section)
246 246 #
247 247 # * a small "docket" file containing medatadata
248 248 #
249 249 # While the nodemap data can be multiple tens of megabytes, the "docket" is
250 250 # small, it is easy to update it automatically or to duplicated its content
251 251 # during a transaction.
252 252 #
253 253 # Multiple raw data can exist at the same time (The currently valid one and a
254 254 # new one beind used by an in progress transaction). To accomodate this, the
255 255 # filename hosting the raw data has a variable parts. The exact filename is
256 256 # specified inside the "docket" file.
257 257 #
258 258 # The docket file contains information to find, qualify and validate the raw
259 259 # data. Its content is currently very light, but it will expand as the on disk
260 260 # nodemap gains the necessary features to be used in production.
261 261
262 262 ONDISK_VERSION = 1
263 263 S_VERSION = struct.Struct(">B")
264 264 S_HEADER = struct.Struct(">BQQQQ")
265 265
266 266 ID_SIZE = 8
267 267
268 268
269 269 def _make_uid():
270 270 """return a new unique identifier.
271 271
272 272 The identifier is random and composed of ascii characters."""
273 273 return hex(os.urandom(ID_SIZE))
274 274
275 275
276 276 class NodeMapDocket(object):
277 277 """metadata associated with persistent nodemap data
278 278
279 279 The persistent data may come from disk or be on their way to disk.
280 280 """
281 281
282 282 def __init__(self, uid=None):
283 283 if uid is None:
284 284 uid = _make_uid()
285 285 # a unique identifier for the data file:
286 286 # - When new data are appended, it is preserved.
287 287 # - When a new data file is created, a new identifier is generated.
288 288 self.uid = uid
289 289 # the tipmost revision stored in the data file. This revision and all
290 290 # revision before it are expected to be encoded in the data file.
291 291 self.tip_rev = None
292 292 # the node of that tipmost revision, if it mismatch the current index
293 293 # data the docket is not valid for the current index and should be
294 294 # discarded.
295 295 #
296 296 # note: this method is not perfect as some destructive operation could
297 297 # preserve the same tip_rev + tip_node while altering lower revision.
298 298 # However this multiple other caches have the same vulnerability (eg:
299 299 # brancmap cache).
300 300 self.tip_node = None
301 301 # the size (in bytes) of the persisted data to encode the nodemap valid
302 302 # for `tip_rev`.
303 303 # - data file shorter than this are corrupted,
304 304 # - any extra data should be ignored.
305 305 self.data_length = None
306 306 # the amount (in bytes) of "dead" data, still in the data file but no
307 307 # longer used for the nodemap.
308 308 self.data_unused = 0
309 309
310 310 def copy(self):
311 311 new = NodeMapDocket(uid=self.uid)
312 312 new.tip_rev = self.tip_rev
313 313 new.tip_node = self.tip_node
314 314 new.data_length = self.data_length
315 315 new.data_unused = self.data_unused
316 316 return new
317 317
318 318 def __cmp__(self, other):
319 319 if self.uid < other.uid:
320 320 return -1
321 321 if self.uid > other.uid:
322 322 return 1
323 323 elif self.data_length < other.data_length:
324 324 return -1
325 325 elif self.data_length > other.data_length:
326 326 return 1
327 327 return 0
328 328
329 329 def __eq__(self, other):
330 330 return self.uid == other.uid and self.data_length == other.data_length
331 331
332 332 def serialize(self):
333 333 """return serialized bytes for a docket using the passed uid"""
334 334 data = []
335 335 data.append(S_VERSION.pack(ONDISK_VERSION))
336 336 headers = (
337 337 len(self.uid),
338 338 self.tip_rev,
339 339 self.data_length,
340 340 self.data_unused,
341 341 len(self.tip_node),
342 342 )
343 343 data.append(S_HEADER.pack(*headers))
344 344 data.append(self.uid)
345 345 data.append(self.tip_node)
346 346 return b''.join(data)
347 347
348 348
349 349 def _rawdata_filepath(revlog, docket):
350 350 """The (vfs relative) nodemap's rawdata file for a given uid"""
351 351 if revlog.nodemap_file.endswith(b'.n.a'):
352 352 prefix = revlog.nodemap_file[:-4]
353 353 else:
354 354 prefix = revlog.nodemap_file[:-2]
355 355 return b"%s-%s.nd" % (prefix, docket.uid)
356 356
357 357
358 358 def _other_rawdata_filepath(revlog, docket):
359 359 prefix = revlog.nodemap_file[:-2]
360 360 pattern = re.compile(br"(^|/)%s-[0-9a-f]+\.nd$" % prefix)
361 361 new_file_path = _rawdata_filepath(revlog, docket)
362 362 new_file_name = revlog.opener.basename(new_file_path)
363 363 dirpath = revlog.opener.dirname(new_file_path)
364 364 others = []
365 365 for f in revlog.opener.listdir(dirpath):
366 366 if pattern.match(f) and f != new_file_name:
367 367 others.append(f)
368 368 return others
369 369
370 370
371 371 ### Nodemap Trie
372 372 #
373 373 # This is a simple reference implementation to compute and persist a nodemap
374 374 # trie. This reference implementation is write only. The python version of this
375 375 # is not expected to be actually used, since it wont provide performance
376 376 # improvement over existing non-persistent C implementation.
377 377 #
378 378 # The nodemap is persisted as Trie using 4bits-address/16-entries block. each
379 379 # revision can be adressed using its node shortest prefix.
380 380 #
381 381 # The trie is stored as a sequence of block. Each block contains 16 entries
382 382 # (signed 64bit integer, big endian). Each entry can be one of the following:
383 383 #
384 384 # * value >= 0 -> index of sub-block
385 385 # * value == -1 -> no value
386 386 # * value < -1 -> encoded revision: rev = -(value+2)
387 387 #
388 388 # See REV_OFFSET and _transform_rev below.
389 389 #
390 390 # The implementation focus on simplicity, not on performance. A Rust
391 391 # implementation should provide a efficient version of the same binary
392 392 # persistence. This reference python implementation is never meant to be
393 393 # extensively use in production.
394 394
395 395
396 396 def persistent_data(index):
397 397 """return the persistent binary form for a nodemap for a given index"""
398 398 trie = _build_trie(index)
399 399 return _persist_trie(trie)
400 400
401 401
402 402 def update_persistent_data(index, root, max_idx, last_rev):
403 403 """return the incremental update for persistent nodemap from a given index"""
404 404 changed_block, trie = _update_trie(index, root, last_rev)
405 405 return (
406 406 changed_block * S_BLOCK.size,
407 407 _persist_trie(trie, existing_idx=max_idx),
408 408 )
409 409
410 410
411 411 S_BLOCK = struct.Struct(">" + ("l" * 16))
412 412
413 413 NO_ENTRY = -1
414 414 # rev 0 need to be -2 because 0 is used by block, -1 is a special value.
415 415 REV_OFFSET = 2
416 416
417 417
418 418 def _transform_rev(rev):
419 419 """Return the number used to represent the rev in the tree.
420 420
421 421 (or retrieve a rev number from such representation)
422 422
423 423 Note that this is an involution, a function equal to its inverse (i.e.
424 424 which gives the identity when applied to itself).
425 425 """
426 426 return -(rev + REV_OFFSET)
427 427
428 428
429 429 def _to_int(hex_digit):
430 430 """turn an hexadecimal digit into a proper integer"""
431 431 return int(hex_digit, 16)
432 432
433 433
434 434 class Block(dict):
435 435 """represent a block of the Trie
436 436
437 437 contains up to 16 entry indexed from 0 to 15"""
438 438
439 439 def __init__(self):
440 440 super(Block, self).__init__()
441 441 # If this block exist on disk, here is its ID
442 442 self.ondisk_id = None
443 443
444 444 def __iter__(self):
445 445 return iter(self.get(i) for i in range(16))
446 446
447 447
448 448 def _build_trie(index):
449 449 """build a nodemap trie
450 450
451 451 The nodemap stores revision number for each unique prefix.
452 452
453 453 Each block is a dictionary with keys in `[0, 15]`. Values are either
454 454 another block or a revision number.
455 455 """
456 456 root = Block()
457 457 for rev in range(len(index)):
458 458 current_hex = hex(index[rev][7])
459 459 _insert_into_block(index, 0, root, rev, current_hex)
460 460 return root
461 461
462 462
463 463 def _update_trie(index, root, last_rev):
464 464 """consume"""
465 465 changed = 0
466 466 for rev in range(last_rev + 1, len(index)):
467 467 current_hex = hex(index[rev][7])
468 468 changed += _insert_into_block(index, 0, root, rev, current_hex)
469 469 return changed, root
470 470
471 471
472 472 def _insert_into_block(index, level, block, current_rev, current_hex):
473 473 """insert a new revision in a block
474 474
475 475 index: the index we are adding revision for
476 476 level: the depth of the current block in the trie
477 477 block: the block currently being considered
478 478 current_rev: the revision number we are adding
479 479 current_hex: the hexadecimal representation of the of that revision
480 480 """
481 481 changed = 1
482 482 if block.ondisk_id is not None:
483 483 block.ondisk_id = None
484 484 hex_digit = _to_int(current_hex[level : level + 1])
485 485 entry = block.get(hex_digit)
486 486 if entry is None:
487 487 # no entry, simply store the revision number
488 488 block[hex_digit] = current_rev
489 489 elif isinstance(entry, dict):
490 490 # need to recurse to an underlying block
491 491 changed += _insert_into_block(
492 492 index, level + 1, entry, current_rev, current_hex
493 493 )
494 494 else:
495 495 # collision with a previously unique prefix, inserting new
496 496 # vertices to fit both entry.
497 497 other_hex = hex(index[entry][7])
498 498 other_rev = entry
499 499 new = Block()
500 500 block[hex_digit] = new
501 501 _insert_into_block(index, level + 1, new, other_rev, other_hex)
502 502 _insert_into_block(index, level + 1, new, current_rev, current_hex)
503 503 return changed
504 504
505 505
506 506 def _persist_trie(root, existing_idx=None):
507 507 """turn a nodemap trie into persistent binary data
508 508
509 509 See `_build_trie` for nodemap trie structure"""
510 510 block_map = {}
511 511 if existing_idx is not None:
512 512 base_idx = existing_idx + 1
513 513 else:
514 514 base_idx = 0
515 515 chunks = []
516 516 for tn in _walk_trie(root):
517 517 if tn.ondisk_id is not None:
518 518 block_map[id(tn)] = tn.ondisk_id
519 519 else:
520 520 block_map[id(tn)] = len(chunks) + base_idx
521 521 chunks.append(_persist_block(tn, block_map))
522 522 return b''.join(chunks)
523 523
524 524
525 525 def _walk_trie(block):
526 526 """yield all the block in a trie
527 527
528 528 Children blocks are always yield before their parent block.
529 529 """
530 530 for (__, item) in sorted(block.items()):
531 531 if isinstance(item, dict):
532 532 for sub_block in _walk_trie(item):
533 533 yield sub_block
534 534 yield block
535 535
536 536
537 537 def _persist_block(block_node, block_map):
538 538 """produce persistent binary data for a single block
539 539
540 540 Children block are assumed to be already persisted and present in
541 541 block_map.
542 542 """
543 543 data = tuple(_to_value(v, block_map) for v in block_node)
544 544 return S_BLOCK.pack(*data)
545 545
546 546
547 547 def _to_value(item, block_map):
548 548 """persist any value as an integer"""
549 549 if item is None:
550 550 return NO_ENTRY
551 551 elif isinstance(item, dict):
552 552 return block_map[id(item)]
553 553 else:
554 554 return _transform_rev(item)
555 555
556 556
557 557 def parse_data(data):
558 558 """parse parse nodemap data into a nodemap Trie"""
559 559 if (len(data) % S_BLOCK.size) != 0:
560 560 msg = "nodemap data size is not a multiple of block size (%d): %d"
561 561 raise error.Abort(msg % (S_BLOCK.size, len(data)))
562 562 if not data:
563 563 return Block(), None
564 564 block_map = {}
565 565 new_blocks = []
566 566 for i in range(0, len(data), S_BLOCK.size):
567 567 block = Block()
568 568 block.ondisk_id = len(block_map)
569 569 block_map[block.ondisk_id] = block
570 570 block_data = data[i : i + S_BLOCK.size]
571 571 values = S_BLOCK.unpack(block_data)
572 572 new_blocks.append((block, values))
573 573 for b, values in new_blocks:
574 574 for idx, v in enumerate(values):
575 575 if v == NO_ENTRY:
576 576 continue
577 577 elif v >= 0:
578 578 b[idx] = block_map[v]
579 579 else:
580 580 b[idx] = _transform_rev(v)
581 581 return block, i // S_BLOCK.size
582 582
583 583
584 584 # debug utility
585 585
586 586
587 587 def check_data(ui, index, data):
588 588 """verify that the provided nodemap data are valid for the given idex"""
589 589 ret = 0
590 590 ui.status((b"revision in index: %d\n") % len(index))
591 591 root, __ = parse_data(data)
592 592 all_revs = set(_all_revisions(root))
593 593 ui.status((b"revision in nodemap: %d\n") % len(all_revs))
594 594 for r in range(len(index)):
595 595 if r not in all_revs:
596 596 msg = b" revision missing from nodemap: %d\n" % r
597 597 ui.write_err(msg)
598 598 ret = 1
599 599 else:
600 600 all_revs.remove(r)
601 601 nm_rev = _find_node(root, hex(index[r][7]))
602 602 if nm_rev is None:
603 603 msg = b" revision node does not match any entries: %d\n" % r
604 604 ui.write_err(msg)
605 605 ret = 1
606 606 elif nm_rev != r:
607 607 msg = (
608 608 b" revision node does not match the expected revision: "
609 609 b"%d != %d\n" % (r, nm_rev)
610 610 )
611 611 ui.write_err(msg)
612 612 ret = 1
613 613
614 614 if all_revs:
615 615 for r in sorted(all_revs):
616 616 msg = b" extra revision in nodemap: %d\n" % r
617 617 ui.write_err(msg)
618 618 ret = 1
619 619 return ret
620 620
621 621
622 622 def _all_revisions(root):
623 623 """return all revisions stored in a Trie"""
624 624 for block in _walk_trie(root):
625 625 for v in block:
626 626 if v is None or isinstance(v, Block):
627 627 continue
628 628 yield v
629 629
630 630
631 631 def _find_node(block, node):
632 632 """find the revision associated with a given node"""
633 633 entry = block.get(_to_int(node[0:1]))
634 634 if isinstance(entry, dict):
635 635 return _find_node(entry, node[1:])
636 636 return entry
637 637
638 638
639 639 def get_nodemap_file(opener, indexfile):
640 640 if indexfile.endswith(b'.a'):
641 641 pending_path = indexfile[:-4] + b".n.a"
642 642 if opener.exists(pending_path):
643 643 return pending_path
644 644 else:
645 645 return indexfile[:-4] + b".n"
646 646 else:
647 647 return indexfile[:-2] + b".n"
General Comments 0
You need to be logged in to leave comments. Login now