##// END OF EJS Templates
rawdata: implement `rawdata` for `simplestore` too...
marmoute -
r42950:21771337 default
parent child Browse files
Show More
@@ -1,705 +1,708 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import (
25 25 attr,
26 26 )
27 27 from mercurial import (
28 28 ancestor,
29 29 bundlerepo,
30 30 error,
31 31 extensions,
32 32 localrepo,
33 33 mdiff,
34 34 pycompat,
35 35 repository,
36 36 revlog,
37 37 store,
38 38 verify,
39 39 )
40 40 from mercurial.utils import (
41 41 cborutil,
42 42 interfaceutil,
43 43 storageutil,
44 44 )
45 45
46 46 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
47 47 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
48 48 # be specifying the version(s) of Mercurial they are tested with, or
49 49 # leave the attribute unspecified.
50 50 testedwith = 'ships-with-hg-core'
51 51
52 52 REQUIREMENT = 'testonly-simplestore'
53 53
54 54 def validatenode(node):
55 55 if isinstance(node, int):
56 56 raise ValueError('expected node; got int')
57 57
58 58 if len(node) != 20:
59 59 raise ValueError('expected 20 byte node')
60 60
61 61 def validaterev(rev):
62 62 if not isinstance(rev, int):
63 63 raise ValueError('expected int')
64 64
65 65 class simplestoreerror(error.StorageError):
66 66 pass
67 67
68 68 @interfaceutil.implementer(repository.irevisiondelta)
69 69 @attr.s(slots=True)
70 70 class simplestorerevisiondelta(object):
71 71 node = attr.ib()
72 72 p1node = attr.ib()
73 73 p2node = attr.ib()
74 74 basenode = attr.ib()
75 75 flags = attr.ib()
76 76 baserevisionsize = attr.ib()
77 77 revision = attr.ib()
78 78 delta = attr.ib()
79 79 linknode = attr.ib(default=None)
80 80
81 81 @interfaceutil.implementer(repository.iverifyproblem)
82 82 @attr.s(frozen=True)
83 83 class simplefilestoreproblem(object):
84 84 warning = attr.ib(default=None)
85 85 error = attr.ib(default=None)
86 86 node = attr.ib(default=None)
87 87
88 88 @interfaceutil.implementer(repository.ifilestorage)
89 89 class filestorage(object):
90 90 """Implements storage for a tracked path.
91 91
92 92 Data is stored in the VFS in a directory corresponding to the tracked
93 93 path.
94 94
95 95 Index data is stored in an ``index`` file using CBOR.
96 96
97 97 Fulltext data is stored in files having names of the node.
98 98 """
99 99
100 100 def __init__(self, svfs, path):
101 101 self._svfs = svfs
102 102 self._path = path
103 103
104 104 self._storepath = b'/'.join([b'data', path])
105 105 self._indexpath = b'/'.join([self._storepath, b'index'])
106 106
107 107 indexdata = self._svfs.tryread(self._indexpath)
108 108 if indexdata:
109 109 indexdata = cborutil.decodeall(indexdata)
110 110
111 111 self._indexdata = indexdata or []
112 112 self._indexbynode = {}
113 113 self._indexbyrev = {}
114 114 self._index = []
115 115 self._refreshindex()
116 116
117 117 def _refreshindex(self):
118 118 self._indexbynode.clear()
119 119 self._indexbyrev.clear()
120 120 self._index = []
121 121
122 122 for i, entry in enumerate(self._indexdata):
123 123 self._indexbynode[entry[b'node']] = entry
124 124 self._indexbyrev[i] = entry
125 125
126 126 self._indexbynode[nullid] = {
127 127 b'node': nullid,
128 128 b'p1': nullid,
129 129 b'p2': nullid,
130 130 b'linkrev': nullrev,
131 131 b'flags': 0,
132 132 }
133 133
134 134 self._indexbyrev[nullrev] = {
135 135 b'node': nullid,
136 136 b'p1': nullid,
137 137 b'p2': nullid,
138 138 b'linkrev': nullrev,
139 139 b'flags': 0,
140 140 }
141 141
142 142 for i, entry in enumerate(self._indexdata):
143 143 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
144 144
145 145 # start, length, rawsize, chainbase, linkrev, p1, p2, node
146 146 self._index.append((0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev,
147 147 entry[b'node']))
148 148
149 149 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
150 150
151 151 def __len__(self):
152 152 return len(self._indexdata)
153 153
154 154 def __iter__(self):
155 155 return iter(range(len(self)))
156 156
157 157 def revs(self, start=0, stop=None):
158 158 step = 1
159 159 if stop is not None:
160 160 if start > stop:
161 161 step = -1
162 162
163 163 stop += step
164 164 else:
165 165 stop = len(self)
166 166
167 167 return range(start, stop, step)
168 168
169 169 def parents(self, node):
170 170 validatenode(node)
171 171
172 172 if node not in self._indexbynode:
173 173 raise KeyError('unknown node')
174 174
175 175 entry = self._indexbynode[node]
176 176
177 177 return entry[b'p1'], entry[b'p2']
178 178
179 179 def parentrevs(self, rev):
180 180 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
181 181 return self.rev(p1), self.rev(p2)
182 182
183 183 def rev(self, node):
184 184 validatenode(node)
185 185
186 186 try:
187 187 self._indexbynode[node]
188 188 except KeyError:
189 189 raise error.LookupError(node, self._indexpath, _('no node'))
190 190
191 191 for rev, entry in self._indexbyrev.items():
192 192 if entry[b'node'] == node:
193 193 return rev
194 194
195 195 raise error.ProgrammingError('this should not occur')
196 196
197 197 def node(self, rev):
198 198 validaterev(rev)
199 199
200 200 return self._indexbyrev[rev][b'node']
201 201
202 202 def hasnode(self, node):
203 203 validatenode(node)
204 204 return node in self._indexbynode
205 205
206 206 def censorrevision(self, tr, censornode, tombstone=b''):
207 207 raise NotImplementedError('TODO')
208 208
209 209 def lookup(self, node):
210 210 if isinstance(node, int):
211 211 return self.node(node)
212 212
213 213 if len(node) == 20:
214 214 self.rev(node)
215 215 return node
216 216
217 217 try:
218 218 rev = int(node)
219 219 if '%d' % rev != node:
220 220 raise ValueError
221 221
222 222 if rev < 0:
223 223 rev = len(self) + rev
224 224 if rev < 0 or rev >= len(self):
225 225 raise ValueError
226 226
227 227 return self.node(rev)
228 228 except (ValueError, OverflowError):
229 229 pass
230 230
231 231 if len(node) == 40:
232 232 try:
233 233 rawnode = bin(node)
234 234 self.rev(rawnode)
235 235 return rawnode
236 236 except TypeError:
237 237 pass
238 238
239 239 raise error.LookupError(node, self._path, _('invalid lookup input'))
240 240
241 241 def linkrev(self, rev):
242 242 validaterev(rev)
243 243
244 244 return self._indexbyrev[rev][b'linkrev']
245 245
246 246 def _flags(self, rev):
247 247 validaterev(rev)
248 248
249 249 return self._indexbyrev[rev][b'flags']
250 250
251 251 def _candelta(self, baserev, rev):
252 252 validaterev(baserev)
253 253 validaterev(rev)
254 254
255 255 if ((self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)
256 256 or (self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS)):
257 257 return False
258 258
259 259 return True
260 260
261 261 def _processflags(self, text, flags, operation, raw=False):
262 262 if flags == 0:
263 263 return text, True
264 264
265 265 if flags & ~revlog.REVIDX_KNOWN_FLAGS:
266 266 raise simplestoreerror(_("incompatible revision flag '%#x'") %
267 267 (flags & ~revlog.REVIDX_KNOWN_FLAGS))
268 268
269 269 validatehash = True
270 270 # Depending on the operation (read or write), the order might be
271 271 # reversed due to non-commutative transforms.
272 272 orderedflags = revlog.REVIDX_FLAGS_ORDER
273 273 if operation == 'write':
274 274 orderedflags = reversed(orderedflags)
275 275
276 276 for flag in orderedflags:
277 277 # If a flagprocessor has been registered for a known flag, apply the
278 278 # related operation transform and update result tuple.
279 279 if flag & flags:
280 280 vhash = True
281 281
282 282 if flag not in revlog._flagprocessors:
283 283 message = _("missing processor for flag '%#x'") % (flag)
284 284 raise simplestoreerror(message)
285 285
286 286 processor = revlog._flagprocessors[flag]
287 287 if processor is not None:
288 288 readtransform, writetransform, rawtransform = processor
289 289
290 290 if raw:
291 291 vhash = rawtransform(self, text)
292 292 elif operation == 'read':
293 293 text, vhash = readtransform(self, text)
294 294 else: # write operation
295 295 text, vhash = writetransform(self, text)
296 296 validatehash = validatehash and vhash
297 297
298 298 return text, validatehash
299 299
300 300 def checkhash(self, text, node, p1=None, p2=None, rev=None):
301 301 if p1 is None and p2 is None:
302 302 p1, p2 = self.parents(node)
303 303 if node != storageutil.hashrevisionsha1(text, p1, p2):
304 304 raise simplestoreerror(_("integrity check failed on %s") %
305 305 self._path)
306 306
307 307 def revision(self, nodeorrev, raw=False):
308 308 if isinstance(nodeorrev, int):
309 309 node = self.node(nodeorrev)
310 310 else:
311 311 node = nodeorrev
312 312 validatenode(node)
313 313
314 314 if node == nullid:
315 315 return b''
316 316
317 317 rev = self.rev(node)
318 318 flags = self._flags(rev)
319 319
320 320 path = b'/'.join([self._storepath, hex(node)])
321 321 rawtext = self._svfs.read(path)
322 322
323 323 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
324 324 if validatehash:
325 325 self.checkhash(text, node, rev=rev)
326 326
327 327 return text
328 328
329 def rawdata(self, nodeorrev):
330 return self.revision(raw=True)
331
329 332 def read(self, node):
330 333 validatenode(node)
331 334
332 335 revision = self.revision(node)
333 336
334 337 if not revision.startswith(b'\1\n'):
335 338 return revision
336 339
337 340 start = revision.index(b'\1\n', 2)
338 341 return revision[start + 2:]
339 342
340 343 def renamed(self, node):
341 344 validatenode(node)
342 345
343 346 if self.parents(node)[0] != nullid:
344 347 return False
345 348
346 349 fulltext = self.revision(node)
347 350 m = storageutil.parsemeta(fulltext)[0]
348 351
349 352 if m and 'copy' in m:
350 353 return m['copy'], bin(m['copyrev'])
351 354
352 355 return False
353 356
354 357 def cmp(self, node, text):
355 358 validatenode(node)
356 359
357 360 t = text
358 361
359 362 if text.startswith(b'\1\n'):
360 363 t = b'\1\n\1\n' + text
361 364
362 365 p1, p2 = self.parents(node)
363 366
364 367 if storageutil.hashrevisionsha1(t, p1, p2) == node:
365 368 return False
366 369
367 370 if self.iscensored(self.rev(node)):
368 371 return text != b''
369 372
370 373 if self.renamed(node):
371 374 t2 = self.read(node)
372 375 return t2 != text
373 376
374 377 return True
375 378
376 379 def size(self, rev):
377 380 validaterev(rev)
378 381
379 382 node = self._indexbyrev[rev][b'node']
380 383
381 384 if self.renamed(node):
382 385 return len(self.read(node))
383 386
384 387 if self.iscensored(rev):
385 388 return 0
386 389
387 390 return len(self.revision(node))
388 391
389 392 def iscensored(self, rev):
390 393 validaterev(rev)
391 394
392 395 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
393 396
394 397 def commonancestorsheads(self, a, b):
395 398 validatenode(a)
396 399 validatenode(b)
397 400
398 401 a = self.rev(a)
399 402 b = self.rev(b)
400 403
401 404 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
402 405 return pycompat.maplist(self.node, ancestors)
403 406
404 407 def descendants(self, revs):
405 408 # This is a copy of revlog.descendants()
406 409 first = min(revs)
407 410 if first == nullrev:
408 411 for i in self:
409 412 yield i
410 413 return
411 414
412 415 seen = set(revs)
413 416 for i in self.revs(start=first + 1):
414 417 for x in self.parentrevs(i):
415 418 if x != nullrev and x in seen:
416 419 seen.add(i)
417 420 yield i
418 421 break
419 422
420 423 # Required by verify.
421 424 def files(self):
422 425 entries = self._svfs.listdir(self._storepath)
423 426
424 427 # Strip out undo.backup.* files created as part of transaction
425 428 # recording.
426 429 entries = [f for f in entries if not f.startswith('undo.backup.')]
427 430
428 431 return [b'/'.join((self._storepath, f)) for f in entries]
429 432
430 433 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
431 434 revisionscount=False, trackedsize=False,
432 435 storedsize=False):
433 436 # TODO do a real implementation of this
434 437 return {
435 438 'exclusivefiles': [],
436 439 'sharedfiles': [],
437 440 'revisionscount': len(self),
438 441 'trackedsize': 0,
439 442 'storedsize': None,
440 443 }
441 444
442 445 def verifyintegrity(self, state):
443 446 state['skipread'] = set()
444 447 for rev in self:
445 448 node = self.node(rev)
446 449 try:
447 450 self.revision(node)
448 451 except Exception as e:
449 452 yield simplefilestoreproblem(
450 453 error='unpacking %s: %s' % (node, e),
451 454 node=node)
452 455 state['skipread'].add(node)
453 456
454 457 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
455 458 assumehaveparentrevisions=False,
456 459 deltamode=repository.CG_DELTAMODE_STD):
457 460 # TODO this will probably break on some ordering options.
458 461 nodes = [n for n in nodes if n != nullid]
459 462 if not nodes:
460 463 return
461 464 for delta in storageutil.emitrevisions(
462 465 self, nodes, nodesorder, simplestorerevisiondelta,
463 466 revisiondata=revisiondata,
464 467 assumehaveparentrevisions=assumehaveparentrevisions,
465 468 deltamode=deltamode):
466 469 yield delta
467 470
468 471 def add(self, text, meta, transaction, linkrev, p1, p2):
469 472 if meta or text.startswith(b'\1\n'):
470 473 text = storageutil.packmeta(meta, text)
471 474
472 475 return self.addrevision(text, transaction, linkrev, p1, p2)
473 476
474 477 def addrevision(self, text, transaction, linkrev, p1, p2, node=None,
475 478 flags=revlog.REVIDX_DEFAULT_FLAGS, cachedelta=None):
476 479 validatenode(p1)
477 480 validatenode(p2)
478 481
479 482 if flags:
480 483 node = node or storageutil.hashrevisionsha1(text, p1, p2)
481 484
482 485 rawtext, validatehash = self._processflags(text, flags, 'write')
483 486
484 487 node = node or storageutil.hashrevisionsha1(text, p1, p2)
485 488
486 489 if node in self._indexbynode:
487 490 return node
488 491
489 492 if validatehash:
490 493 self.checkhash(rawtext, node, p1=p1, p2=p2)
491 494
492 495 return self._addrawrevision(node, rawtext, transaction, linkrev, p1, p2,
493 496 flags)
494 497
495 498 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
496 499 transaction.addbackup(self._indexpath)
497 500
498 501 path = b'/'.join([self._storepath, hex(node)])
499 502
500 503 self._svfs.write(path, rawtext)
501 504
502 505 self._indexdata.append({
503 506 b'node': node,
504 507 b'p1': p1,
505 508 b'p2': p2,
506 509 b'linkrev': link,
507 510 b'flags': flags,
508 511 })
509 512
510 513 self._reflectindexupdate()
511 514
512 515 return node
513 516
514 517 def _reflectindexupdate(self):
515 518 self._refreshindex()
516 519 self._svfs.write(self._indexpath,
517 520 ''.join(cborutil.streamencode(self._indexdata)))
518 521
519 522 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None,
520 523 maybemissingparents=False):
521 524 if maybemissingparents:
522 525 raise error.Abort(_('simple store does not support missing parents '
523 526 'write mode'))
524 527
525 528 nodes = []
526 529
527 530 transaction.addbackup(self._indexpath)
528 531
529 532 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
530 533 linkrev = linkmapper(linknode)
531 534 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
532 535
533 536 nodes.append(node)
534 537
535 538 if node in self._indexbynode:
536 539 continue
537 540
538 541 # Need to resolve the fulltext from the delta base.
539 542 if deltabase == nullid:
540 543 text = mdiff.patch(b'', delta)
541 544 else:
542 545 text = mdiff.patch(self.revision(deltabase), delta)
543 546
544 547 self._addrawrevision(node, text, transaction, linkrev, p1, p2,
545 548 flags)
546 549
547 550 if addrevisioncb:
548 551 addrevisioncb(self, node)
549 552 return nodes
550 553
551 554 def _headrevs(self):
552 555 # Assume all revisions are heads by default.
553 556 revishead = {rev: True for rev in self._indexbyrev}
554 557
555 558 for rev, entry in self._indexbyrev.items():
556 559 # Unset head flag for all seen parents.
557 560 revishead[self.rev(entry[b'p1'])] = False
558 561 revishead[self.rev(entry[b'p2'])] = False
559 562
560 563 return [rev for rev, ishead in sorted(revishead.items())
561 564 if ishead]
562 565
563 566 def heads(self, start=None, stop=None):
564 567 # This is copied from revlog.py.
565 568 if start is None and stop is None:
566 569 if not len(self):
567 570 return [nullid]
568 571 return [self.node(r) for r in self._headrevs()]
569 572
570 573 if start is None:
571 574 start = nullid
572 575 if stop is None:
573 576 stop = []
574 577 stoprevs = set([self.rev(n) for n in stop])
575 578 startrev = self.rev(start)
576 579 reachable = {startrev}
577 580 heads = {startrev}
578 581
579 582 parentrevs = self.parentrevs
580 583 for r in self.revs(start=startrev + 1):
581 584 for p in parentrevs(r):
582 585 if p in reachable:
583 586 if r not in stoprevs:
584 587 reachable.add(r)
585 588 heads.add(r)
586 589 if p in heads and p not in stoprevs:
587 590 heads.remove(p)
588 591
589 592 return [self.node(r) for r in heads]
590 593
591 594 def children(self, node):
592 595 validatenode(node)
593 596
594 597 # This is a copy of revlog.children().
595 598 c = []
596 599 p = self.rev(node)
597 600 for r in self.revs(start=p + 1):
598 601 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
599 602 if prevs:
600 603 for pr in prevs:
601 604 if pr == p:
602 605 c.append(self.node(r))
603 606 elif p == nullrev:
604 607 c.append(self.node(r))
605 608 return c
606 609
607 610 def getstrippoint(self, minlink):
608 611 return storageutil.resolvestripinfo(
609 612 minlink, len(self) - 1, self._headrevs(), self.linkrev,
610 613 self.parentrevs)
611 614
612 615 def strip(self, minlink, transaction):
613 616 if not len(self):
614 617 return
615 618
616 619 rev, _ignored = self.getstrippoint(minlink)
617 620 if rev == len(self):
618 621 return
619 622
620 623 # Purge index data starting at the requested revision.
621 624 self._indexdata[rev:] = []
622 625 self._reflectindexupdate()
623 626
624 627 def issimplestorefile(f, kind, st):
625 628 if kind != stat.S_IFREG:
626 629 return False
627 630
628 631 if store.isrevlog(f, kind, st):
629 632 return False
630 633
631 634 # Ignore transaction undo files.
632 635 if f.startswith('undo.'):
633 636 return False
634 637
635 638 # Otherwise assume it belongs to the simple store.
636 639 return True
637 640
638 641 class simplestore(store.encodedstore):
639 642 def datafiles(self):
640 643 for x in super(simplestore, self).datafiles():
641 644 yield x
642 645
643 646 # Supplement with non-revlog files.
644 647 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
645 648
646 649 for unencoded, encoded, size in extrafiles:
647 650 try:
648 651 unencoded = store.decodefilename(unencoded)
649 652 except KeyError:
650 653 unencoded = None
651 654
652 655 yield unencoded, encoded, size
653 656
654 657 def reposetup(ui, repo):
655 658 if not repo.local():
656 659 return
657 660
658 661 if isinstance(repo, bundlerepo.bundlerepository):
659 662 raise error.Abort(_('cannot use simple store with bundlerepo'))
660 663
661 664 class simplestorerepo(repo.__class__):
662 665 def file(self, f):
663 666 return filestorage(self.svfs, f)
664 667
665 668 repo.__class__ = simplestorerepo
666 669
667 670 def featuresetup(ui, supported):
668 671 supported.add(REQUIREMENT)
669 672
670 673 def newreporequirements(orig, ui, createopts):
671 674 """Modifies default requirements for new repos to use the simple store."""
672 675 requirements = orig(ui, createopts)
673 676
674 677 # These requirements are only used to affect creation of the store
675 678 # object. We have our own store. So we can remove them.
676 679 # TODO do this once we feel like taking the test hit.
677 680 #if 'fncache' in requirements:
678 681 # requirements.remove('fncache')
679 682 #if 'dotencode' in requirements:
680 683 # requirements.remove('dotencode')
681 684
682 685 requirements.add(REQUIREMENT)
683 686
684 687 return requirements
685 688
686 689 def makestore(orig, requirements, path, vfstype):
687 690 if REQUIREMENT not in requirements:
688 691 return orig(requirements, path, vfstype)
689 692
690 693 return simplestore(path, vfstype)
691 694
692 695 def verifierinit(orig, self, *args, **kwargs):
693 696 orig(self, *args, **kwargs)
694 697
695 698 # We don't care that files in the store don't align with what is
696 699 # advertised. So suppress these warnings.
697 700 self.warnorphanstorefiles = False
698 701
699 702 def extsetup(ui):
700 703 localrepo.featuresetupfuncs.add(featuresetup)
701 704
702 705 extensions.wrapfunction(localrepo, 'newreporequirements',
703 706 newreporequirements)
704 707 extensions.wrapfunction(localrepo, 'makestore', makestore)
705 708 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now