##// END OF EJS Templates
simplestorerepo: migrate to in-hg CBOR code...
Augie Fackler -
r41193:ad51e611 default
parent child Browse files
Show More
@@ -1,704 +1,705 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import (
25 25 attr,
26 cbor,
27 26 )
28 27 from mercurial import (
29 28 ancestor,
30 29 bundlerepo,
31 30 error,
32 31 extensions,
33 32 localrepo,
34 33 mdiff,
35 34 pycompat,
36 35 repository,
37 36 revlog,
38 37 store,
39 38 verify,
40 39 )
41 40 from mercurial.utils import (
41 cborutil,
42 42 interfaceutil,
43 43 storageutil,
44 44 )
45 45
46 46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 48 # be specifying the version(s) of Mercurial they are tested with, or
49 49 # leave the attribute unspecified.
50 50 testedwith = 'ships-with-hg-core'
51 51
52 52 REQUIREMENT = 'testonly-simplestore'
53 53
54 54 def validatenode(node):
55 55 if isinstance(node, int):
56 56 raise ValueError('expected node; got int')
57 57
58 58 if len(node) != 20:
59 59 raise ValueError('expected 20 byte node')
60 60
61 61 def validaterev(rev):
62 62 if not isinstance(rev, int):
63 63 raise ValueError('expected int')
64 64
65 65 class simplestoreerror(error.StorageError):
66 66 pass
67 67
68 68 @interfaceutil.implementer(repository.irevisiondelta)
69 69 @attr.s(slots=True)
70 70 class simplestorerevisiondelta(object):
71 71 node = attr.ib()
72 72 p1node = attr.ib()
73 73 p2node = attr.ib()
74 74 basenode = attr.ib()
75 75 flags = attr.ib()
76 76 baserevisionsize = attr.ib()
77 77 revision = attr.ib()
78 78 delta = attr.ib()
79 79 linknode = attr.ib(default=None)
80 80
81 81 @interfaceutil.implementer(repository.iverifyproblem)
82 82 @attr.s(frozen=True)
83 83 class simplefilestoreproblem(object):
84 84 warning = attr.ib(default=None)
85 85 error = attr.ib(default=None)
86 86 node = attr.ib(default=None)
87 87
88 88 @interfaceutil.implementer(repository.ifilestorage)
89 89 class filestorage(object):
90 90 """Implements storage for a tracked path.
91 91
92 92 Data is stored in the VFS in a directory corresponding to the tracked
93 93 path.
94 94
95 95 Index data is stored in an ``index`` file using CBOR.
96 96
97 97 Fulltext data is stored in files having names of the node.
98 98 """
99 99
100 100 def __init__(self, svfs, path):
101 101 self._svfs = svfs
102 102 self._path = path
103 103
104 104 self._storepath = b'/'.join([b'data', path])
105 105 self._indexpath = b'/'.join([self._storepath, b'index'])
106 106
107 107 indexdata = self._svfs.tryread(self._indexpath)
108 108 if indexdata:
109 indexdata = cbor.loads(indexdata)
109 indexdata = cborutil.decodeall(indexdata)
110 110
111 111 self._indexdata = indexdata or []
112 112 self._indexbynode = {}
113 113 self._indexbyrev = {}
114 114 self._index = []
115 115 self._refreshindex()
116 116
117 117 def _refreshindex(self):
118 118 self._indexbynode.clear()
119 119 self._indexbyrev.clear()
120 120 self._index = []
121 121
122 122 for i, entry in enumerate(self._indexdata):
123 123 self._indexbynode[entry[b'node']] = entry
124 124 self._indexbyrev[i] = entry
125 125
126 126 self._indexbynode[nullid] = {
127 127 b'node': nullid,
128 128 b'p1': nullid,
129 129 b'p2': nullid,
130 130 b'linkrev': nullrev,
131 131 b'flags': 0,
132 132 }
133 133
134 134 self._indexbyrev[nullrev] = {
135 135 b'node': nullid,
136 136 b'p1': nullid,
137 137 b'p2': nullid,
138 138 b'linkrev': nullrev,
139 139 b'flags': 0,
140 140 }
141 141
142 142 for i, entry in enumerate(self._indexdata):
143 143 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
144 144
145 145 # start, length, rawsize, chainbase, linkrev, p1, p2, node
146 146 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
147 147 entry[b'node']))
148 148
149 149 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150 150
151 151 def __len__(self):
152 152 return len(self._indexdata)
153 153
154 154 def __iter__(self):
155 155 return iter(range(len(self)))
156 156
157 157 def revs(self, start=0, stop=None):
158 158 step = 1
159 159 if stop is not None:
160 160 if start > stop:
161 161 step = -1
162 162
163 163 stop += step
164 164 else:
165 165 stop = len(self)
166 166
167 167 return range(start, stop, step)
168 168
169 169 def parents(self, node):
170 170 validatenode(node)
171 171
172 172 if node not in self._indexbynode:
173 173 raise KeyError('unknown node')
174 174
175 175 entry = self._indexbynode[node]
176 176
177 177 return entry[b'p1'], entry[b'p2']
178 178
179 179 def parentrevs(self, rev):
180 180 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
181 181 return self.rev(p1), self.rev(p2)
182 182
183 183 def rev(self, node):
184 184 validatenode(node)
185 185
186 186 try:
187 187 self._indexbynode[node]
188 188 except KeyError:
189 189 raise error.LookupError(node, self._indexpath, _('no node'))
190 190
191 191 for rev, entry in self._indexbyrev.items():
192 192 if entry[b'node'] == node:
193 193 return rev
194 194
195 195 raise error.ProgrammingError('this should not occur')
196 196
197 197 def node(self, rev):
198 198 validaterev(rev)
199 199
200 200 return self._indexbyrev[rev][b'node']
201 201
202 202 def hasnode(self, node):
203 203 validatenode(node)
204 204 return node in self._indexbynode
205 205
206 206 def censorrevision(self, tr, censornode, tombstone=b''):
207 207 raise NotImplementedError('TODO')
208 208
209 209 def lookup(self, node):
210 210 if isinstance(node, int):
211 211 return self.node(node)
212 212
213 213 if len(node) == 20:
214 214 self.rev(node)
215 215 return node
216 216
217 217 try:
218 218 rev = int(node)
219 219 if '%d' % rev != node:
220 220 raise ValueError
221 221
222 222 if rev < 0:
223 223 rev = len(self) + rev
224 224 if rev < 0 or rev >= len(self):
225 225 raise ValueError
226 226
227 227 return self.node(rev)
228 228 except (ValueError, OverflowError):
229 229 pass
230 230
231 231 if len(node) == 40:
232 232 try:
233 233 rawnode = bin(node)
234 234 self.rev(rawnode)
235 235 return rawnode
236 236 except TypeError:
237 237 pass
238 238
239 239 raise error.LookupError(node, self._path, _('invalid lookup input'))
240 240
241 241 def linkrev(self, rev):
242 242 validaterev(rev)
243 243
244 244 return self._indexbyrev[rev][b'linkrev']
245 245
246 246 def _flags(self, rev):
247 247 validaterev(rev)
248 248
249 249 return self._indexbyrev[rev][b'flags']
250 250
251 251 def _candelta(self, baserev, rev):
252 252 validaterev(baserev)
253 253 validaterev(rev)
254 254
255 255 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
256 256 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
257 257 return False
258 258
259 259 return True
260 260
261 261 def _processflags(self, text, flags, operation, raw=False):
262 262 if flags == 0:
263 263 return text, True
264 264
265 265 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
266 266 raise simplestoreerror(_("incompatible revision flag '%#x'") %
267 267 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
268 268
269 269 validatehash = True
270 270 # Depending on the operation (read or write), the order might be
271 271 # reversed due to non-commutative transforms.
272 272 orderedflags = revlog.REVIDX_FLAGS_ORDER
273 273 if operation == 'write':
274 274 orderedflags = reversed(orderedflags)
275 275
276 276 for flag in orderedflags:
277 277 # If a flagprocessor has been registered for a known flag, apply the
278 278 # related operation transform and update result tuple.
279 279 if flag & flags:
280 280 vhash = True
281 281
282 282 if flag not in revlog._flagprocessors:
283 283 message = _("missing processor for flag '%#x'") % (flag)
284 284 raise simplestoreerror(message)
285 285
286 286 processor = revlog._flagprocessors[flag]
287 287 if processor is not None:
288 288 readtransform, writetransform, rawtransform = processor
289 289
290 290 if raw:
291 291 vhash = rawtransform(self, text)
292 292 elif operation == 'read':
293 293 text, vhash = readtransform(self, text)
294 294 else: # write operation
295 295 text, vhash = writetransform(self, text)
296 296 validatehash = validatehash and vhash
297 297
298 298 return text, validatehash
299 299
300 300 def checkhash(self, text, node, p1=None, p2=None, rev=None):
301 301 if p1 is None and p2 is None:
302 302 p1, p2 = self.parents(node)
303 303 if node != storageutil.hashrevisionsha1(text, p1, p2):
304 304 raise simplestoreerror(_("integrity check failed on %s") %
305 305 self._path)
306 306
307 307 def revision(self, nodeorrev, raw=False):
308 308 if isinstance(nodeorrev, int):
309 309 node = self.node(nodeorrev)
310 310 else:
311 311 node = nodeorrev
312 312 validatenode(node)
313 313
314 314 if node == nullid:
315 315 return b''
316 316
317 317 rev = self.rev(node)
318 318 flags = self._flags(rev)
319 319
320 320 path = b'/'.join([self._storepath, hex(node)])
321 321 rawtext = self._svfs.read(path)
322 322
323 323 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
324 324 if validatehash:
325 325 self.checkhash(text, node, rev=rev)
326 326
327 327 return text
328 328
329 329 def read(self, node):
330 330 validatenode(node)
331 331
332 332 revision = self.revision(node)
333 333
334 334 if not revision.startswith(b'\1\n'):
335 335 return revision
336 336
337 337 start = revision.index(b'\1\n', 2)
338 338 return revision[start + 2:]
339 339
340 340 def renamed(self, node):
341 341 validatenode(node)
342 342
343 343 if self.parents(node)[0] != nullid:
344 344 return False
345 345
346 346 fulltext = self.revision(node)
347 347 m = storageutil.parsemeta(fulltext)[0]
348 348
349 349 if m and 'copy' in m:
350 350 return m['copy'], bin(m['copyrev'])
351 351
352 352 return False
353 353
354 354 def cmp(self, node, text):
355 355 validatenode(node)
356 356
357 357 t = text
358 358
359 359 if text.startswith(b'\1\n'):
360 360 t = b'\1\n\1\n' + text
361 361
362 362 p1, p2 = self.parents(node)
363 363
364 364 if storageutil.hashrevisionsha1(t, p1, p2) == node:
365 365 return False
366 366
367 367 if self.iscensored(self.rev(node)):
368 368 return text != b''
369 369
370 370 if self.renamed(node):
371 371 t2 = self.read(node)
372 372 return t2 != text
373 373
374 374 return True
375 375
376 376 def size(self, rev):
377 377 validaterev(rev)
378 378
379 379 node = self._indexbyrev[rev][b'node']
380 380
381 381 if self.renamed(node):
382 382 return len(self.read(node))
383 383
384 384 if self.iscensored(rev):
385 385 return 0
386 386
387 387 return len(self.revision(node))
388 388
389 389 def iscensored(self, rev):
390 390 validaterev(rev)
391 391
392 392 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
393 393
394 394 def commonancestorsheads(self, a, b):
395 395 validatenode(a)
396 396 validatenode(b)
397 397
398 398 a = self.rev(a)
399 399 b = self.rev(b)
400 400
401 401 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
402 402 return pycompat.maplist(self.node, ancestors)
403 403
404 404 def descendants(self, revs):
405 405 # This is a copy of revlog.descendants()
406 406 first = min(revs)
407 407 if first == nullrev:
408 408 for i in self:
409 409 yield i
410 410 return
411 411
412 412 seen = set(revs)
413 413 for i in self.revs(start=first + 1):
414 414 for x in self.parentrevs(i):
415 415 if x != nullrev and x in seen:
416 416 seen.add(i)
417 417 yield i
418 418 break
419 419
420 420 # Required by verify.
421 421 def files(self):
422 422 entries = self._svfs.listdir(self._storepath)
423 423
424 424 # Strip out undo.backup.* files created as part of transaction
425 425 # recording.
426 426 entries = [f for f in entries if not f.startswith('undo.backup.')]
427 427
428 428 return [b'/'.join((self._storepath, f)) for f in entries]
429 429
430 430 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
431 431 revisionscount=False, trackedsize=False,
432 432 storedsize=False):
433 433 # TODO do a real implementation of this
434 434 return {
435 435 'exclusivefiles': [],
436 436 'sharedfiles': [],
437 437 'revisionscount': len(self),
438 438 'trackedsize': 0,
439 439 'storedsize': None,
440 440 }
441 441
442 442 def verifyintegrity(self, state):
443 443 state['skipread'] = set()
444 444 for rev in self:
445 445 node = self.node(rev)
446 446 try:
447 447 self.revision(node)
448 448 except Exception as e:
449 449 yield simplefilestoreproblem(
450 450 error='unpacking %s: %s' % (node, e),
451 451 node=node)
452 452 state['skipread'].add(node)
453 453
454 454 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
455 455 assumehaveparentrevisions=False,
456 456 deltamode=repository.CG_DELTAMODE_STD):
457 457 # TODO this will probably break on some ordering options.
458 458 nodes = [n for n in nodes if n != nullid]
459 459 if not nodes:
460 460 return
461 461 for delta in storageutil.emitrevisions(
462 462 self, nodes, nodesorder, simplestorerevisiondelta,
463 463 revisiondata=revisiondata,
464 464 assumehaveparentrevisions=assumehaveparentrevisions,
465 465 deltamode=deltamode):
466 466 yield delta
467 467
468 468 def add(self, text, meta, transaction, linkrev, p1, p2):
469 469 if meta or text.startswith(b'\1\n'):
470 470 text = storageutil.packmeta(meta, text)
471 471
472 472 return self.addrevision(text, transaction, linkrev, p1, p2)
473 473
474 474 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
475 475 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
476 476 validatenode(p1)
477 477 validatenode(p2)
478 478
479 479 if flags:
480 480 node = node or storageutil.hashrevisionsha1(text, p1, p2)
481 481
482 482 rawtext, validatehash = self._processflags(text, flags, 'write')
483 483
484 484 node = node or storageutil.hashrevisionsha1(text, p1, p2)
485 485
486 486 if node in self._indexbynode:
487 487 return node
488 488
489 489 if validatehash:
490 490 self.checkhash(rawtext, node, p1=p1, p2=p2)
491 491
492 492 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
493 493 flags)
494 494
495 495 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
496 496 transaction.addbackup(self._indexpath)
497 497
498 498 path = b'/'.join([self._storepath, hex(node)])
499 499
500 500 self._svfs.write(path, rawtext)
501 501
502 502 self._indexdata.append({
503 503 b'node': node,
504 504 b'p1': p1,
505 505 b'p2': p2,
506 506 b'linkrev': link,
507 507 b'flags': flags,
508 508 })
509 509
510 510 self._reflectindexupdate()
511 511
512 512 return node
513 513
514 514 def _reflectindexupdate(self):
515 515 self._refreshindex()
516 self._svfs.write(self._indexpath, cbor.dumps(self._indexdata))
516 self._svfs.write(self._indexpath,
517 ''.join(cborutil.streamencode(self._indexdata)))
517 518
518 519 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
519 520 maybemissingparents=False):
520 521 if maybemissingparents:
521 522 raise error.Abort(_('simple store does not support missing parents '
522 523 'write mode'))
523 524
524 525 nodes = []
525 526
526 527 transaction.addbackup(self._indexpath)
527 528
528 529 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
529 530 linkrev = linkmapper(linknode)
530 531 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
531 532
532 533 nodes.append(node)
533 534
534 535 if node in self._indexbynode:
535 536 continue
536 537
537 538 # Need to resolve the fulltext from the delta base.
538 539 if deltabase == nullid:
539 540 text = mdiff.patch(b'', delta)
540 541 else:
541 542 text = mdiff.patch(self.revision(deltabase), delta)
542 543
543 544 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
544 545 flags)
545 546
546 547 if addrevisioncb:
547 548 addrevisioncb(self, node)
548 549 return nodes
549 550
550 551 def _headrevs(self):
551 552 # Assume all revisions are heads by default.
552 553 revishead = {rev: True for rev in self._indexbyrev}
553 554
554 555 for rev, entry in self._indexbyrev.items():
555 556 # Unset head flag for all seen parents.
556 557 revishead[self.rev(entry[b'p1'])] = False
557 558 revishead[self.rev(entry[b'p2'])] = False
558 559
559 560 return [rev for rev, ishead in sorted(revishead.items())
560 561 if ishead]
561 562
562 563 def heads(self, start=None, stop=None):
563 564 # This is copied from revlog.py.
564 565 if start is None and stop is None:
565 566 if not len(self):
566 567 return [nullid]
567 568 return [self.node(r) for r in self._headrevs()]
568 569
569 570 if start is None:
570 571 start = nullid
571 572 if stop is None:
572 573 stop = []
573 574 stoprevs = set([self.rev(n) for n in stop])
574 575 startrev = self.rev(start)
575 576 reachable = {startrev}
576 577 heads = {startrev}
577 578
578 579 parentrevs = self.parentrevs
579 580 for r in self.revs(start=startrev + 1):
580 581 for p in parentrevs(r):
581 582 if p in reachable:
582 583 if r not in stoprevs:
583 584 reachable.add(r)
584 585 heads.add(r)
585 586 if p in heads and p not in stoprevs:
586 587 heads.remove(p)
587 588
588 589 return [self.node(r) for r in heads]
589 590
590 591 def children(self, node):
591 592 validatenode(node)
592 593
593 594 # This is a copy of revlog.children().
594 595 c = []
595 596 p = self.rev(node)
596 597 for r in self.revs(start=p + 1):
597 598 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
598 599 if prevs:
599 600 for pr in prevs:
600 601 if pr == p:
601 602 c.append(self.node(r))
602 603 elif p == nullrev:
603 604 c.append(self.node(r))
604 605 return c
605 606
606 607 def getstrippoint(self, minlink):
607 608 return storageutil.resolvestripinfo(
608 609 minlink, len(self) - 1, self._headrevs(), self.linkrev,
609 610 self.parentrevs)
610 611
611 612 def strip(self, minlink, transaction):
612 613 if not len(self):
613 614 return
614 615
615 616 rev, _ignored = self.getstrippoint(minlink)
616 617 if rev == len(self):
617 618 return
618 619
619 620 # Purge index data starting at the requested revision.
620 621 self._indexdata[rev:] = []
621 622 self._reflectindexupdate()
622 623
623 624 def issimplestorefile(f, kind, st):
624 625 if kind != stat.S_IFREG:
625 626 return False
626 627
627 628 if store.isrevlog(f, kind, st):
628 629 return False
629 630
630 631 # Ignore transaction undo files.
631 632 if f.startswith('undo.'):
632 633 return False
633 634
634 635 # Otherwise assume it belongs to the simple store.
635 636 return True
636 637
637 638 class simplestore(store.encodedstore):
638 639 def datafiles(self):
639 640 for x in super(simplestore, self).datafiles():
640 641 yield x
641 642
642 643 # Supplement with non-revlog files.
643 644 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
644 645
645 646 for unencoded, encoded, size in extrafiles:
646 647 try:
647 648 unencoded = store.decodefilename(unencoded)
648 649 except KeyError:
649 650 unencoded = None
650 651
651 652 yield unencoded, encoded, size
652 653
653 654 def reposetup(ui, repo):
654 655 if not repo.local():
655 656 return
656 657
657 658 if isinstance(repo, bundlerepo.bundlerepository):
658 659 raise error.Abort(_('cannot use simple store with bundlerepo'))
659 660
660 661 class simplestorerepo(repo.__class__):
661 662 def file(self, f):
662 663 return filestorage(self.svfs, f)
663 664
664 665 repo.__class__ = simplestorerepo
665 666
666 667 def featuresetup(ui, supported):
667 668 supported.add(REQUIREMENT)
668 669
669 670 def newreporequirements(orig, ui, createopts):
670 671 """Modifies default requirements for new repos to use the simple store."""
671 672 requirements = orig(ui, createopts)
672 673
673 674 # These requirements are only used to affect creation of the store
674 675 # object. We have our own store. So we can remove them.
675 676 # TODO do this once we feel like taking the test hit.
676 677 #if 'fncache' in requirements:
677 678 # requirements.remove('fncache')
678 679 #if 'dotencode' in requirements:
679 680 # requirements.remove('dotencode')
680 681
681 682 requirements.add(REQUIREMENT)
682 683
683 684 return requirements
684 685
685 686 def makestore(orig, requirements, path, vfstype):
686 687 if REQUIREMENT not in requirements:
687 688 return orig(requirements, path, vfstype)
688 689
689 690 return simplestore(path, vfstype)
690 691
691 692 def verifierinit(orig, self, *args, **kwargs):
692 693 orig(self, *args, **kwargs)
693 694
694 695 # We don't care that files in the store don't align with what is
695 696 # advertised. So suppress these warnings.
696 697 self.warnorphanstorefiles = False
697 698
698 699 def extsetup(ui):
699 700 localrepo.featuresetupfuncs.add(featuresetup)
700 701
701 702 extensions.wrapfunction(localrepo, 'newreporequirements',
702 703 newreporequirements)
703 704 extensions.wrapfunction(localrepo, 'makestore', makestore)
704 705 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now