##// END OF EJS Templates
revlog: change addgroup callbacks to take revision numbers...
Joerg Sonnenberger -
r47259:7a93b7b3 default
parent child Browse files
Show More
@@ -1,1301 +1,1301 b''
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 48 import sqlite3
49 49 import struct
50 50 import threading
51 51 import zlib
52 52
53 53 from mercurial.i18n import _
54 54 from mercurial.node import (
55 55 nullid,
56 56 nullrev,
57 57 short,
58 58 )
59 59 from mercurial.thirdparty import attr
60 60 from mercurial import (
61 61 ancestor,
62 62 dagop,
63 63 encoding,
64 64 error,
65 65 extensions,
66 66 localrepo,
67 67 mdiff,
68 68 pycompat,
69 69 registrar,
70 70 requirements,
71 71 util,
72 72 verify,
73 73 )
74 74 from mercurial.interfaces import (
75 75 repository,
76 76 util as interfaceutil,
77 77 )
78 78 from mercurial.utils import (
79 79 hashutil,
80 80 storageutil,
81 81 )
82 82
83 83 try:
84 84 from mercurial import zstd
85 85
86 86 zstd.__version__
87 87 except ImportError:
88 88 zstd = None
89 89
90 90 configtable = {}
91 91 configitem = registrar.configitem(configtable)
92 92
93 93 # experimental config: storage.sqlite.compression
94 94 configitem(
95 95 b'storage',
96 96 b'sqlite.compression',
97 97 default=b'zstd' if zstd else b'zlib',
98 98 experimental=True,
99 99 )
100 100
101 101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
102 102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
103 103 # be specifying the version(s) of Mercurial they are tested with, or
104 104 # leave the attribute unspecified.
105 105 testedwith = b'ships-with-hg-core'
106 106
107 107 REQUIREMENT = b'exp-sqlite-001'
108 108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
109 109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
110 110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
111 111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
112 112
113 113 CURRENT_SCHEMA_VERSION = 1
114 114
115 115 COMPRESSION_NONE = 1
116 116 COMPRESSION_ZSTD = 2
117 117 COMPRESSION_ZLIB = 3
118 118
119 119 FLAG_CENSORED = 1
120 120 FLAG_MISSING_P1 = 2
121 121 FLAG_MISSING_P2 = 4
122 122
123 123 CREATE_SCHEMA = [
124 124 # Deltas are stored as content-indexed blobs.
125 125 # compression column holds COMPRESSION_* constant for how the
126 126 # delta is encoded.
127 127 'CREATE TABLE delta ('
128 128 ' id INTEGER PRIMARY KEY, '
129 129 ' compression INTEGER NOT NULL, '
130 130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
131 131 ' delta BLOB NOT NULL '
132 132 ')',
133 133 # Tracked paths are denormalized to integers to avoid redundant
134 134 # storage of the path name.
135 135 'CREATE TABLE filepath ('
136 136 ' id INTEGER PRIMARY KEY, '
137 137 ' path BLOB NOT NULL '
138 138 ')',
139 139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
140 140 # We have a single table for all file revision data.
141 141 # Each file revision is uniquely described by a (path, rev) and
142 142 # (path, node).
143 143 #
144 144 # Revision data is stored as a pointer to the delta producing this
145 145 # revision and the file revision whose delta should be applied before
146 146 # that one. One can reconstruct the delta chain by recursively following
147 147 # the delta base revision pointers until one encounters NULL.
148 148 #
149 149 # flags column holds bitwise integer flags controlling storage options.
150 150 # These flags are defined by the FLAG_* constants.
151 151 'CREATE TABLE fileindex ('
152 152 ' id INTEGER PRIMARY KEY, '
153 153 ' pathid INTEGER REFERENCES filepath(id), '
154 154 ' revnum INTEGER NOT NULL, '
155 155 ' p1rev INTEGER NOT NULL, '
156 156 ' p2rev INTEGER NOT NULL, '
157 157 ' linkrev INTEGER NOT NULL, '
158 158 ' flags INTEGER NOT NULL, '
159 159 ' deltaid INTEGER REFERENCES delta(id), '
160 160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
161 161 ' node BLOB NOT NULL '
162 162 ')',
163 163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
164 164 ' ON fileindex (pathid, revnum)',
165 165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
166 166 # Provide a view over all file data for convenience.
167 167 'CREATE VIEW filedata AS '
168 168 'SELECT '
169 169 ' fileindex.id AS id, '
170 170 ' filepath.id AS pathid, '
171 171 ' filepath.path AS path, '
172 172 ' fileindex.revnum AS revnum, '
173 173 ' fileindex.node AS node, '
174 174 ' fileindex.p1rev AS p1rev, '
175 175 ' fileindex.p2rev AS p2rev, '
176 176 ' fileindex.linkrev AS linkrev, '
177 177 ' fileindex.flags AS flags, '
178 178 ' fileindex.deltaid AS deltaid, '
179 179 ' fileindex.deltabaseid AS deltabaseid '
180 180 'FROM filepath, fileindex '
181 181 'WHERE fileindex.pathid=filepath.id',
182 182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
183 183 ]
184 184
185 185
186 186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
187 187 """Resolve a delta chain for a file node."""
188 188
189 189 # TODO the "not in ({stops})" here is possibly slowing down the query
190 190 # because it needs to perform the lookup on every recursive invocation.
191 191 # This could possibly be faster if we created a temporary query with
192 192 # baseid "poisoned" to null and limited the recursive filter to
193 193 # "is not null".
194 194 res = db.execute(
195 195 'WITH RECURSIVE '
196 196 ' deltachain(deltaid, baseid) AS ('
197 197 ' SELECT deltaid, deltabaseid FROM fileindex '
198 198 ' WHERE pathid=? AND node=? '
199 199 ' UNION ALL '
200 200 ' SELECT fileindex.deltaid, deltabaseid '
201 201 ' FROM fileindex, deltachain '
202 202 ' WHERE '
203 203 ' fileindex.id=deltachain.baseid '
204 204 ' AND deltachain.baseid IS NOT NULL '
205 205 ' AND fileindex.id NOT IN ({stops}) '
206 206 ' ) '
207 207 'SELECT deltachain.baseid, compression, delta '
208 208 'FROM deltachain, delta '
209 209 'WHERE delta.id=deltachain.deltaid'.format(
210 210 stops=','.join(['?'] * len(stoprids))
211 211 ),
212 212 tuple([pathid, node] + list(stoprids.keys())),
213 213 )
214 214
215 215 deltas = []
216 216 lastdeltabaseid = None
217 217
218 218 for deltabaseid, compression, delta in res:
219 219 lastdeltabaseid = deltabaseid
220 220
221 221 if compression == COMPRESSION_ZSTD:
222 222 delta = zstddctx.decompress(delta)
223 223 elif compression == COMPRESSION_NONE:
224 224 delta = delta
225 225 elif compression == COMPRESSION_ZLIB:
226 226 delta = zlib.decompress(delta)
227 227 else:
228 228 raise SQLiteStoreError(
229 229 b'unhandled compression type: %d' % compression
230 230 )
231 231
232 232 deltas.append(delta)
233 233
234 234 if lastdeltabaseid in stoprids:
235 235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 236 else:
237 237 basetext = deltas.pop()
238 238
239 239 deltas.reverse()
240 240 fulltext = mdiff.patches(basetext, deltas)
241 241
242 242 # SQLite returns buffer instances for blob columns on Python 2. This
243 243 # type can propagate through the delta application layer. Because
244 244 # downstream callers assume revisions are bytes, cast as needed.
245 245 if not isinstance(fulltext, bytes):
246 246 fulltext = bytes(delta)
247 247
248 248 return fulltext
249 249
250 250
251 251 def insertdelta(db, compression, hash, delta):
252 252 try:
253 253 return db.execute(
254 254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
255 255 (compression, hash, delta),
256 256 ).lastrowid
257 257 except sqlite3.IntegrityError:
258 258 return db.execute(
259 259 'SELECT id FROM delta WHERE hash=?', (hash,)
260 260 ).fetchone()[0]
261 261
262 262
263 263 class SQLiteStoreError(error.StorageError):
264 264 pass
265 265
266 266
267 267 @attr.s
268 268 class revisionentry(object):
269 269 rid = attr.ib()
270 270 rev = attr.ib()
271 271 node = attr.ib()
272 272 p1rev = attr.ib()
273 273 p2rev = attr.ib()
274 274 p1node = attr.ib()
275 275 p2node = attr.ib()
276 276 linkrev = attr.ib()
277 277 flags = attr.ib()
278 278
279 279
280 280 @interfaceutil.implementer(repository.irevisiondelta)
281 281 @attr.s(slots=True)
282 282 class sqliterevisiondelta(object):
283 283 node = attr.ib()
284 284 p1node = attr.ib()
285 285 p2node = attr.ib()
286 286 basenode = attr.ib()
287 287 flags = attr.ib()
288 288 baserevisionsize = attr.ib()
289 289 revision = attr.ib()
290 290 delta = attr.ib()
291 291 linknode = attr.ib(default=None)
292 292
293 293
294 294 @interfaceutil.implementer(repository.iverifyproblem)
295 295 @attr.s(frozen=True)
296 296 class sqliteproblem(object):
297 297 warning = attr.ib(default=None)
298 298 error = attr.ib(default=None)
299 299 node = attr.ib(default=None)
300 300
301 301
302 302 @interfaceutil.implementer(repository.ifilestorage)
303 303 class sqlitefilestore(object):
304 304 """Implements storage for an individual tracked path."""
305 305
306 306 def __init__(self, db, path, compression):
307 307 self._db = db
308 308 self._path = path
309 309
310 310 self._pathid = None
311 311
312 312 # revnum -> node
313 313 self._revtonode = {}
314 314 # node -> revnum
315 315 self._nodetorev = {}
316 316 # node -> data structure
317 317 self._revisions = {}
318 318
319 319 self._revisioncache = util.lrucachedict(10)
320 320
321 321 self._compengine = compression
322 322
323 323 if compression == b'zstd':
324 324 self._cctx = zstd.ZstdCompressor(level=3)
325 325 self._dctx = zstd.ZstdDecompressor()
326 326 else:
327 327 self._cctx = None
328 328 self._dctx = None
329 329
330 330 self._refreshindex()
331 331
332 332 def _refreshindex(self):
333 333 self._revtonode = {}
334 334 self._nodetorev = {}
335 335 self._revisions = {}
336 336
337 337 res = list(
338 338 self._db.execute(
339 339 'SELECT id FROM filepath WHERE path=?', (self._path,)
340 340 )
341 341 )
342 342
343 343 if not res:
344 344 self._pathid = None
345 345 return
346 346
347 347 self._pathid = res[0][0]
348 348
349 349 res = self._db.execute(
350 350 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
351 351 'FROM fileindex '
352 352 'WHERE pathid=? '
353 353 'ORDER BY revnum ASC',
354 354 (self._pathid,),
355 355 )
356 356
357 357 for i, row in enumerate(res):
358 358 rid, rev, node, p1rev, p2rev, linkrev, flags = row
359 359
360 360 if i != rev:
361 361 raise SQLiteStoreError(
362 362 _(b'sqlite database has inconsistent revision numbers')
363 363 )
364 364
365 365 if p1rev == nullrev:
366 366 p1node = nullid
367 367 else:
368 368 p1node = self._revtonode[p1rev]
369 369
370 370 if p2rev == nullrev:
371 371 p2node = nullid
372 372 else:
373 373 p2node = self._revtonode[p2rev]
374 374
375 375 entry = revisionentry(
376 376 rid=rid,
377 377 rev=rev,
378 378 node=node,
379 379 p1rev=p1rev,
380 380 p2rev=p2rev,
381 381 p1node=p1node,
382 382 p2node=p2node,
383 383 linkrev=linkrev,
384 384 flags=flags,
385 385 )
386 386
387 387 self._revtonode[rev] = node
388 388 self._nodetorev[node] = rev
389 389 self._revisions[node] = entry
390 390
391 391 # Start of ifileindex interface.
392 392
393 393 def __len__(self):
394 394 return len(self._revisions)
395 395
396 396 def __iter__(self):
397 397 return iter(pycompat.xrange(len(self._revisions)))
398 398
399 399 def hasnode(self, node):
400 400 if node == nullid:
401 401 return False
402 402
403 403 return node in self._nodetorev
404 404
405 405 def revs(self, start=0, stop=None):
406 406 return storageutil.iterrevs(
407 407 len(self._revisions), start=start, stop=stop
408 408 )
409 409
410 410 def parents(self, node):
411 411 if node == nullid:
412 412 return nullid, nullid
413 413
414 414 if node not in self._revisions:
415 415 raise error.LookupError(node, self._path, _(b'no node'))
416 416
417 417 entry = self._revisions[node]
418 418 return entry.p1node, entry.p2node
419 419
420 420 def parentrevs(self, rev):
421 421 if rev == nullrev:
422 422 return nullrev, nullrev
423 423
424 424 if rev not in self._revtonode:
425 425 raise IndexError(rev)
426 426
427 427 entry = self._revisions[self._revtonode[rev]]
428 428 return entry.p1rev, entry.p2rev
429 429
430 430 def rev(self, node):
431 431 if node == nullid:
432 432 return nullrev
433 433
434 434 if node not in self._nodetorev:
435 435 raise error.LookupError(node, self._path, _(b'no node'))
436 436
437 437 return self._nodetorev[node]
438 438
439 439 def node(self, rev):
440 440 if rev == nullrev:
441 441 return nullid
442 442
443 443 if rev not in self._revtonode:
444 444 raise IndexError(rev)
445 445
446 446 return self._revtonode[rev]
447 447
448 448 def lookup(self, node):
449 449 return storageutil.fileidlookup(self, node, self._path)
450 450
451 451 def linkrev(self, rev):
452 452 if rev == nullrev:
453 453 return nullrev
454 454
455 455 if rev not in self._revtonode:
456 456 raise IndexError(rev)
457 457
458 458 entry = self._revisions[self._revtonode[rev]]
459 459 return entry.linkrev
460 460
461 461 def iscensored(self, rev):
462 462 if rev == nullrev:
463 463 return False
464 464
465 465 if rev not in self._revtonode:
466 466 raise IndexError(rev)
467 467
468 468 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
469 469
470 470 def commonancestorsheads(self, node1, node2):
471 471 rev1 = self.rev(node1)
472 472 rev2 = self.rev(node2)
473 473
474 474 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
475 475 return pycompat.maplist(self.node, ancestors)
476 476
477 477 def descendants(self, revs):
478 478 # TODO we could implement this using a recursive SQL query, which
479 479 # might be faster.
480 480 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
481 481
482 482 def heads(self, start=None, stop=None):
483 483 if start is None and stop is None:
484 484 if not len(self):
485 485 return [nullid]
486 486
487 487 startrev = self.rev(start) if start is not None else nullrev
488 488 stoprevs = {self.rev(n) for n in stop or []}
489 489
490 490 revs = dagop.headrevssubset(
491 491 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
492 492 )
493 493
494 494 return [self.node(rev) for rev in revs]
495 495
496 496 def children(self, node):
497 497 rev = self.rev(node)
498 498
499 499 res = self._db.execute(
500 500 'SELECT'
501 501 ' node '
502 502 ' FROM filedata '
503 503 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
504 504 ' ORDER BY revnum ASC',
505 505 (self._path, rev, rev),
506 506 )
507 507
508 508 return [row[0] for row in res]
509 509
510 510 # End of ifileindex interface.
511 511
512 512 # Start of ifiledata interface.
513 513
514 514 def size(self, rev):
515 515 if rev == nullrev:
516 516 return 0
517 517
518 518 if rev not in self._revtonode:
519 519 raise IndexError(rev)
520 520
521 521 node = self._revtonode[rev]
522 522
523 523 if self.renamed(node):
524 524 return len(self.read(node))
525 525
526 526 return len(self.revision(node))
527 527
528 528 def revision(self, node, raw=False, _verifyhash=True):
529 529 if node in (nullid, nullrev):
530 530 return b''
531 531
532 532 if isinstance(node, int):
533 533 node = self.node(node)
534 534
535 535 if node not in self._nodetorev:
536 536 raise error.LookupError(node, self._path, _(b'no node'))
537 537
538 538 if node in self._revisioncache:
539 539 return self._revisioncache[node]
540 540
541 541 # Because we have a fulltext revision cache, we are able to
542 542 # short-circuit delta chain traversal and decompression as soon as
543 543 # we encounter a revision in the cache.
544 544
545 545 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
546 546
547 547 if not stoprids:
548 548 stoprids[-1] = None
549 549
550 550 fulltext = resolvedeltachain(
551 551 self._db,
552 552 self._pathid,
553 553 node,
554 554 self._revisioncache,
555 555 stoprids,
556 556 zstddctx=self._dctx,
557 557 )
558 558
559 559 # Don't verify hashes if parent nodes were rewritten, as the hash
560 560 # wouldn't verify.
561 561 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
562 562 _verifyhash = False
563 563
564 564 if _verifyhash:
565 565 self._checkhash(fulltext, node)
566 566 self._revisioncache[node] = fulltext
567 567
568 568 return fulltext
569 569
570 570 def rawdata(self, *args, **kwargs):
571 571 return self.revision(*args, **kwargs)
572 572
573 573 def read(self, node):
574 574 return storageutil.filtermetadata(self.revision(node))
575 575
576 576 def renamed(self, node):
577 577 return storageutil.filerevisioncopied(self, node)
578 578
579 579 def cmp(self, node, fulltext):
580 580 return not storageutil.filedataequivalent(self, node, fulltext)
581 581
582 582 def emitrevisions(
583 583 self,
584 584 nodes,
585 585 nodesorder=None,
586 586 revisiondata=False,
587 587 assumehaveparentrevisions=False,
588 588 deltamode=repository.CG_DELTAMODE_STD,
589 589 ):
590 590 if nodesorder not in (b'nodes', b'storage', b'linear', None):
591 591 raise error.ProgrammingError(
592 592 b'unhandled value for nodesorder: %s' % nodesorder
593 593 )
594 594
595 595 nodes = [n for n in nodes if n != nullid]
596 596
597 597 if not nodes:
598 598 return
599 599
600 600 # TODO perform in a single query.
601 601 res = self._db.execute(
602 602 'SELECT revnum, deltaid FROM fileindex '
603 603 'WHERE pathid=? '
604 604 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
605 605 tuple([self._pathid] + nodes),
606 606 )
607 607
608 608 deltabases = {}
609 609
610 610 for rev, deltaid in res:
611 611 res = self._db.execute(
612 612 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
613 613 (self._pathid, deltaid),
614 614 )
615 615 deltabases[rev] = res.fetchone()[0]
616 616
617 617 # TODO define revdifffn so we can use delta from storage.
618 618 for delta in storageutil.emitrevisions(
619 619 self,
620 620 nodes,
621 621 nodesorder,
622 622 sqliterevisiondelta,
623 623 deltaparentfn=deltabases.__getitem__,
624 624 revisiondata=revisiondata,
625 625 assumehaveparentrevisions=assumehaveparentrevisions,
626 626 deltamode=deltamode,
627 627 ):
628 628
629 629 yield delta
630 630
631 631 # End of ifiledata interface.
632 632
633 633 # Start of ifilemutation interface.
634 634
635 635 def add(self, filedata, meta, transaction, linkrev, p1, p2):
636 636 if meta or filedata.startswith(b'\x01\n'):
637 637 filedata = storageutil.packmeta(meta, filedata)
638 638
639 639 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
640 640 return self.node(rev)
641 641
642 642 def addrevision(
643 643 self,
644 644 revisiondata,
645 645 transaction,
646 646 linkrev,
647 647 p1,
648 648 p2,
649 649 node=None,
650 650 flags=0,
651 651 cachedelta=None,
652 652 ):
653 653 if flags:
654 654 raise SQLiteStoreError(_(b'flags not supported on revisions'))
655 655
656 656 validatehash = node is not None
657 657 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
658 658
659 659 if validatehash:
660 660 self._checkhash(revisiondata, node, p1, p2)
661 661
662 662 rev = self._nodetorev.get(node)
663 663 if rev is not None:
664 664 return rev
665 665
666 666 rev = self._addrawrevision(
667 667 node, revisiondata, transaction, linkrev, p1, p2
668 668 )
669 669
670 670 self._revisioncache[node] = revisiondata
671 671 return rev
672 672
673 673 def addgroup(
674 674 self,
675 675 deltas,
676 676 linkmapper,
677 677 transaction,
678 678 addrevisioncb=None,
679 679 duplicaterevisioncb=None,
680 680 maybemissingparents=False,
681 681 ):
682 682 empty = True
683 683
684 684 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
685 685 storeflags = 0
686 686
687 687 if wireflags & repository.REVISION_FLAG_CENSORED:
688 688 storeflags |= FLAG_CENSORED
689 689
690 690 if wireflags & ~repository.REVISION_FLAG_CENSORED:
691 691 raise SQLiteStoreError(b'unhandled revision flag')
692 692
693 693 if maybemissingparents:
694 694 if p1 != nullid and not self.hasnode(p1):
695 695 p1 = nullid
696 696 storeflags |= FLAG_MISSING_P1
697 697
698 698 if p2 != nullid and not self.hasnode(p2):
699 699 p2 = nullid
700 700 storeflags |= FLAG_MISSING_P2
701 701
702 702 baserev = self.rev(deltabase)
703 703
704 704 # If base is censored, delta must be full replacement in a single
705 705 # patch operation.
706 706 if baserev != nullrev and self.iscensored(baserev):
707 707 hlen = struct.calcsize(b'>lll')
708 708 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
709 709 newlen = len(delta) - hlen
710 710
711 711 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
712 712 raise error.CensoredBaseError(self._path, deltabase)
713 713
714 714 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
715 715 delta, baserev, lambda x: len(self.rawdata(x))
716 716 ):
717 717 storeflags |= FLAG_CENSORED
718 718
719 719 linkrev = linkmapper(linknode)
720 720
721 721 if node in self._revisions:
722 722 # Possibly reset parents to make them proper.
723 723 entry = self._revisions[node]
724 724
725 725 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
726 726 entry.p1node = p1
727 727 entry.p1rev = self._nodetorev[p1]
728 728 entry.flags &= ~FLAG_MISSING_P1
729 729
730 730 self._db.execute(
731 731 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
732 732 (self._nodetorev[p1], entry.flags, entry.rid),
733 733 )
734 734
735 735 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
736 736 entry.p2node = p2
737 737 entry.p2rev = self._nodetorev[p2]
738 738 entry.flags &= ~FLAG_MISSING_P2
739 739
740 740 self._db.execute(
741 741 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
742 742 (self._nodetorev[p1], entry.flags, entry.rid),
743 743 )
744 744
745 745 if duplicaterevisioncb:
746 duplicaterevisioncb(self, node)
746 duplicaterevisioncb(self, self.rev(node))
747 747 empty = False
748 748 continue
749 749
750 750 if deltabase == nullid:
751 751 text = mdiff.patch(b'', delta)
752 752 storedelta = None
753 753 else:
754 754 text = None
755 755 storedelta = (deltabase, delta)
756 756
757 self._addrawrevision(
757 rev = self._addrawrevision(
758 758 node,
759 759 text,
760 760 transaction,
761 761 linkrev,
762 762 p1,
763 763 p2,
764 764 storedelta=storedelta,
765 765 flags=storeflags,
766 766 )
767 767
768 768 if addrevisioncb:
769 addrevisioncb(self, node)
769 addrevisioncb(self, rev)
770 770 empty = False
771 771
772 772 return not empty
773 773
774 774 def censorrevision(self, tr, censornode, tombstone=b''):
775 775 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
776 776
777 777 # This restriction is cargo culted from revlogs and makes no sense for
778 778 # SQLite, since columns can be resized at will.
779 779 if len(tombstone) > len(self.rawdata(censornode)):
780 780 raise error.Abort(
781 781 _(b'censor tombstone must be no longer than censored data')
782 782 )
783 783
784 784 # We need to replace the censored revision's data with the tombstone.
785 785 # But replacing that data will have implications for delta chains that
786 786 # reference it.
787 787 #
788 788 # While "better," more complex strategies are possible, we do something
789 789 # simple: we find delta chain children of the censored revision and we
790 790 # replace those incremental deltas with fulltexts of their corresponding
791 791 # revision. Then we delete the now-unreferenced delta and original
792 792 # revision and insert a replacement.
793 793
794 794 # Find the delta to be censored.
795 795 censoreddeltaid = self._db.execute(
796 796 'SELECT deltaid FROM fileindex WHERE id=?',
797 797 (self._revisions[censornode].rid,),
798 798 ).fetchone()[0]
799 799
800 800 # Find all its delta chain children.
801 801 # TODO once we support storing deltas for !files, we'll need to look
802 802 # for those delta chains too.
803 803 rows = list(
804 804 self._db.execute(
805 805 'SELECT id, pathid, node FROM fileindex '
806 806 'WHERE deltabaseid=? OR deltaid=?',
807 807 (censoreddeltaid, censoreddeltaid),
808 808 )
809 809 )
810 810
811 811 for row in rows:
812 812 rid, pathid, node = row
813 813
814 814 fulltext = resolvedeltachain(
815 815 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
816 816 )
817 817
818 818 deltahash = hashutil.sha1(fulltext).digest()
819 819
820 820 if self._compengine == b'zstd':
821 821 deltablob = self._cctx.compress(fulltext)
822 822 compression = COMPRESSION_ZSTD
823 823 elif self._compengine == b'zlib':
824 824 deltablob = zlib.compress(fulltext)
825 825 compression = COMPRESSION_ZLIB
826 826 elif self._compengine == b'none':
827 827 deltablob = fulltext
828 828 compression = COMPRESSION_NONE
829 829 else:
830 830 raise error.ProgrammingError(
831 831 b'unhandled compression engine: %s' % self._compengine
832 832 )
833 833
834 834 if len(deltablob) >= len(fulltext):
835 835 deltablob = fulltext
836 836 compression = COMPRESSION_NONE
837 837
838 838 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
839 839
840 840 self._db.execute(
841 841 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
842 842 'WHERE id=?',
843 843 (deltaid, rid),
844 844 )
845 845
846 846 # Now create the tombstone delta and replace the delta on the censored
847 847 # node.
848 848 deltahash = hashutil.sha1(tombstone).digest()
849 849 tombstonedeltaid = insertdelta(
850 850 self._db, COMPRESSION_NONE, deltahash, tombstone
851 851 )
852 852
853 853 flags = self._revisions[censornode].flags
854 854 flags |= FLAG_CENSORED
855 855
856 856 self._db.execute(
857 857 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
858 858 'WHERE pathid=? AND node=?',
859 859 (flags, tombstonedeltaid, self._pathid, censornode),
860 860 )
861 861
862 862 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
863 863
864 864 self._refreshindex()
865 865 self._revisioncache.clear()
866 866
867 867 def getstrippoint(self, minlink):
868 868 return storageutil.resolvestripinfo(
869 869 minlink,
870 870 len(self) - 1,
871 871 [self.rev(n) for n in self.heads()],
872 872 self.linkrev,
873 873 self.parentrevs,
874 874 )
875 875
876 876 def strip(self, minlink, transaction):
877 877 if not len(self):
878 878 return
879 879
880 880 rev, _ignored = self.getstrippoint(minlink)
881 881
882 882 if rev == len(self):
883 883 return
884 884
885 885 for rev in self.revs(rev):
886 886 self._db.execute(
887 887 'DELETE FROM fileindex WHERE pathid=? AND node=?',
888 888 (self._pathid, self.node(rev)),
889 889 )
890 890
891 891 # TODO how should we garbage collect data in delta table?
892 892
893 893 self._refreshindex()
894 894
895 895 # End of ifilemutation interface.
896 896
897 897 # Start of ifilestorage interface.
898 898
899 899 def files(self):
900 900 return []
901 901
902 902 def storageinfo(
903 903 self,
904 904 exclusivefiles=False,
905 905 sharedfiles=False,
906 906 revisionscount=False,
907 907 trackedsize=False,
908 908 storedsize=False,
909 909 ):
910 910 d = {}
911 911
912 912 if exclusivefiles:
913 913 d[b'exclusivefiles'] = []
914 914
915 915 if sharedfiles:
916 916 # TODO list sqlite file(s) here.
917 917 d[b'sharedfiles'] = []
918 918
919 919 if revisionscount:
920 920 d[b'revisionscount'] = len(self)
921 921
922 922 if trackedsize:
923 923 d[b'trackedsize'] = sum(
924 924 len(self.revision(node)) for node in self._nodetorev
925 925 )
926 926
927 927 if storedsize:
928 928 # TODO implement this?
929 929 d[b'storedsize'] = None
930 930
931 931 return d
932 932
933 933 def verifyintegrity(self, state):
934 934 state[b'skipread'] = set()
935 935
936 936 for rev in self:
937 937 node = self.node(rev)
938 938
939 939 try:
940 940 self.revision(node)
941 941 except Exception as e:
942 942 yield sqliteproblem(
943 943 error=_(b'unpacking %s: %s') % (short(node), e), node=node
944 944 )
945 945
946 946 state[b'skipread'].add(node)
947 947
948 948 # End of ifilestorage interface.
949 949
950 950 def _checkhash(self, fulltext, node, p1=None, p2=None):
951 951 if p1 is None and p2 is None:
952 952 p1, p2 = self.parents(node)
953 953
954 954 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
955 955 return
956 956
957 957 try:
958 958 del self._revisioncache[node]
959 959 except KeyError:
960 960 pass
961 961
962 962 if storageutil.iscensoredtext(fulltext):
963 963 raise error.CensoredNodeError(self._path, node, fulltext)
964 964
965 965 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
966 966
967 967 def _addrawrevision(
968 968 self,
969 969 node,
970 970 revisiondata,
971 971 transaction,
972 972 linkrev,
973 973 p1,
974 974 p2,
975 975 storedelta=None,
976 976 flags=0,
977 977 ):
978 978 if self._pathid is None:
979 979 res = self._db.execute(
980 980 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
981 981 )
982 982 self._pathid = res.lastrowid
983 983
984 984 # For simplicity, always store a delta against p1.
985 985 # TODO we need a lot more logic here to make behavior reasonable.
986 986
987 987 if storedelta:
988 988 deltabase, delta = storedelta
989 989
990 990 if isinstance(deltabase, int):
991 991 deltabase = self.node(deltabase)
992 992
993 993 else:
994 994 assert revisiondata is not None
995 995 deltabase = p1
996 996
997 997 if deltabase == nullid:
998 998 delta = revisiondata
999 999 else:
1000 1000 delta = mdiff.textdiff(
1001 1001 self.revision(self.rev(deltabase)), revisiondata
1002 1002 )
1003 1003
1004 1004 # File index stores a pointer to its delta and the parent delta.
1005 1005 # The parent delta is stored via a pointer to the fileindex PK.
1006 1006 if deltabase == nullid:
1007 1007 baseid = None
1008 1008 else:
1009 1009 baseid = self._revisions[deltabase].rid
1010 1010
1011 1011 # Deltas are stored with a hash of their content. This allows
1012 1012 # us to de-duplicate. The table is configured to ignore conflicts
1013 1013 # and it is faster to just insert and silently noop than to look
1014 1014 # first.
1015 1015 deltahash = hashutil.sha1(delta).digest()
1016 1016
1017 1017 if self._compengine == b'zstd':
1018 1018 deltablob = self._cctx.compress(delta)
1019 1019 compression = COMPRESSION_ZSTD
1020 1020 elif self._compengine == b'zlib':
1021 1021 deltablob = zlib.compress(delta)
1022 1022 compression = COMPRESSION_ZLIB
1023 1023 elif self._compengine == b'none':
1024 1024 deltablob = delta
1025 1025 compression = COMPRESSION_NONE
1026 1026 else:
1027 1027 raise error.ProgrammingError(
1028 1028 b'unhandled compression engine: %s' % self._compengine
1029 1029 )
1030 1030
1031 1031 # Don't store compressed data if it isn't practical.
1032 1032 if len(deltablob) >= len(delta):
1033 1033 deltablob = delta
1034 1034 compression = COMPRESSION_NONE
1035 1035
1036 1036 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1037 1037
1038 1038 rev = len(self)
1039 1039
1040 1040 if p1 == nullid:
1041 1041 p1rev = nullrev
1042 1042 else:
1043 1043 p1rev = self._nodetorev[p1]
1044 1044
1045 1045 if p2 == nullid:
1046 1046 p2rev = nullrev
1047 1047 else:
1048 1048 p2rev = self._nodetorev[p2]
1049 1049
1050 1050 rid = self._db.execute(
1051 1051 'INSERT INTO fileindex ('
1052 1052 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1053 1053 ' deltaid, deltabaseid) '
1054 1054 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1055 1055 (
1056 1056 self._pathid,
1057 1057 rev,
1058 1058 node,
1059 1059 p1rev,
1060 1060 p2rev,
1061 1061 linkrev,
1062 1062 flags,
1063 1063 deltaid,
1064 1064 baseid,
1065 1065 ),
1066 1066 ).lastrowid
1067 1067
1068 1068 entry = revisionentry(
1069 1069 rid=rid,
1070 1070 rev=rev,
1071 1071 node=node,
1072 1072 p1rev=p1rev,
1073 1073 p2rev=p2rev,
1074 1074 p1node=p1,
1075 1075 p2node=p2,
1076 1076 linkrev=linkrev,
1077 1077 flags=flags,
1078 1078 )
1079 1079
1080 1080 self._nodetorev[node] = rev
1081 1081 self._revtonode[rev] = node
1082 1082 self._revisions[node] = entry
1083 1083
1084 1084 return rev
1085 1085
1086 1086
1087 1087 class sqliterepository(localrepo.localrepository):
1088 1088 def cancopy(self):
1089 1089 return False
1090 1090
1091 1091 def transaction(self, *args, **kwargs):
1092 1092 current = self.currenttransaction()
1093 1093
1094 1094 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1095 1095
1096 1096 if current:
1097 1097 return tr
1098 1098
1099 1099 self._dbconn.execute('BEGIN TRANSACTION')
1100 1100
1101 1101 def committransaction(_):
1102 1102 self._dbconn.commit()
1103 1103
1104 1104 tr.addfinalize(b'sqlitestore', committransaction)
1105 1105
1106 1106 return tr
1107 1107
1108 1108 @property
1109 1109 def _dbconn(self):
1110 1110 # SQLite connections can only be used on the thread that created
1111 1111 # them. In most cases, this "just works." However, hgweb uses
1112 1112 # multiple threads.
1113 1113 tid = threading.current_thread().ident
1114 1114
1115 1115 if self._db:
1116 1116 if self._db[0] == tid:
1117 1117 return self._db[1]
1118 1118
1119 1119 db = makedb(self.svfs.join(b'db.sqlite'))
1120 1120 self._db = (tid, db)
1121 1121
1122 1122 return db
1123 1123
1124 1124
1125 1125 def makedb(path):
1126 1126 """Construct a database handle for a database at path."""
1127 1127
1128 1128 db = sqlite3.connect(encoding.strfromlocal(path))
1129 1129 db.text_factory = bytes
1130 1130
1131 1131 res = db.execute('PRAGMA user_version').fetchone()[0]
1132 1132
1133 1133 # New database.
1134 1134 if res == 0:
1135 1135 for statement in CREATE_SCHEMA:
1136 1136 db.execute(statement)
1137 1137
1138 1138 db.commit()
1139 1139
1140 1140 elif res == CURRENT_SCHEMA_VERSION:
1141 1141 pass
1142 1142
1143 1143 else:
1144 1144 raise error.Abort(_(b'sqlite database has unrecognized version'))
1145 1145
1146 1146 db.execute('PRAGMA journal_mode=WAL')
1147 1147
1148 1148 return db
1149 1149
1150 1150
1151 1151 def featuresetup(ui, supported):
1152 1152 supported.add(REQUIREMENT)
1153 1153
1154 1154 if zstd:
1155 1155 supported.add(REQUIREMENT_ZSTD)
1156 1156
1157 1157 supported.add(REQUIREMENT_ZLIB)
1158 1158 supported.add(REQUIREMENT_NONE)
1159 1159 supported.add(REQUIREMENT_SHALLOW_FILES)
1160 1160 supported.add(requirements.NARROW_REQUIREMENT)
1161 1161
1162 1162
1163 1163 def newreporequirements(orig, ui, createopts):
1164 1164 if createopts[b'backend'] != b'sqlite':
1165 1165 return orig(ui, createopts)
1166 1166
1167 1167 # This restriction can be lifted once we have more confidence.
1168 1168 if b'sharedrepo' in createopts:
1169 1169 raise error.Abort(
1170 1170 _(b'shared repositories not supported with SQLite store')
1171 1171 )
1172 1172
1173 1173 # This filtering is out of an abundance of caution: we want to ensure
1174 1174 # we honor creation options and we do that by annotating exactly the
1175 1175 # creation options we recognize.
1176 1176 known = {
1177 1177 b'narrowfiles',
1178 1178 b'backend',
1179 1179 b'shallowfilestore',
1180 1180 }
1181 1181
1182 1182 unsupported = set(createopts) - known
1183 1183 if unsupported:
1184 1184 raise error.Abort(
1185 1185 _(b'SQLite store does not support repo creation option: %s')
1186 1186 % b', '.join(sorted(unsupported))
1187 1187 )
1188 1188
1189 1189 # Since we're a hybrid store that still relies on revlogs, we fall back
1190 1190 # to using the revlogv1 backend's storage requirements then adding our
1191 1191 # own requirement.
1192 1192 createopts[b'backend'] = b'revlogv1'
1193 1193 requirements = orig(ui, createopts)
1194 1194 requirements.add(REQUIREMENT)
1195 1195
1196 1196 compression = ui.config(b'storage', b'sqlite.compression')
1197 1197
1198 1198 if compression == b'zstd' and not zstd:
1199 1199 raise error.Abort(
1200 1200 _(
1201 1201 b'storage.sqlite.compression set to "zstd" but '
1202 1202 b'zstandard compression not available to this '
1203 1203 b'Mercurial install'
1204 1204 )
1205 1205 )
1206 1206
1207 1207 if compression == b'zstd':
1208 1208 requirements.add(REQUIREMENT_ZSTD)
1209 1209 elif compression == b'zlib':
1210 1210 requirements.add(REQUIREMENT_ZLIB)
1211 1211 elif compression == b'none':
1212 1212 requirements.add(REQUIREMENT_NONE)
1213 1213 else:
1214 1214 raise error.Abort(
1215 1215 _(
1216 1216 b'unknown compression engine defined in '
1217 1217 b'storage.sqlite.compression: %s'
1218 1218 )
1219 1219 % compression
1220 1220 )
1221 1221
1222 1222 if createopts.get(b'shallowfilestore'):
1223 1223 requirements.add(REQUIREMENT_SHALLOW_FILES)
1224 1224
1225 1225 return requirements
1226 1226
1227 1227
1228 1228 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1229 1229 class sqlitefilestorage(object):
1230 1230 """Repository file storage backed by SQLite."""
1231 1231
1232 1232 def file(self, path):
1233 1233 if path[0] == b'/':
1234 1234 path = path[1:]
1235 1235
1236 1236 if REQUIREMENT_ZSTD in self.requirements:
1237 1237 compression = b'zstd'
1238 1238 elif REQUIREMENT_ZLIB in self.requirements:
1239 1239 compression = b'zlib'
1240 1240 elif REQUIREMENT_NONE in self.requirements:
1241 1241 compression = b'none'
1242 1242 else:
1243 1243 raise error.Abort(
1244 1244 _(
1245 1245 b'unable to determine what compression engine '
1246 1246 b'to use for SQLite storage'
1247 1247 )
1248 1248 )
1249 1249
1250 1250 return sqlitefilestore(self._dbconn, path, compression)
1251 1251
1252 1252
1253 1253 def makefilestorage(orig, requirements, features, **kwargs):
1254 1254 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1255 1255 if REQUIREMENT in requirements:
1256 1256 if REQUIREMENT_SHALLOW_FILES in requirements:
1257 1257 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1258 1258
1259 1259 return sqlitefilestorage
1260 1260 else:
1261 1261 return orig(requirements=requirements, features=features, **kwargs)
1262 1262
1263 1263
1264 1264 def makemain(orig, ui, requirements, **kwargs):
1265 1265 if REQUIREMENT in requirements:
1266 1266 if REQUIREMENT_ZSTD in requirements and not zstd:
1267 1267 raise error.Abort(
1268 1268 _(
1269 1269 b'repository uses zstandard compression, which '
1270 1270 b'is not available to this Mercurial install'
1271 1271 )
1272 1272 )
1273 1273
1274 1274 return sqliterepository
1275 1275
1276 1276 return orig(requirements=requirements, **kwargs)
1277 1277
1278 1278
1279 1279 def verifierinit(orig, self, *args, **kwargs):
1280 1280 orig(self, *args, **kwargs)
1281 1281
1282 1282 # We don't care that files in the store don't align with what is
1283 1283 # advertised. So suppress these warnings.
1284 1284 self.warnorphanstorefiles = False
1285 1285
1286 1286
1287 1287 def extsetup(ui):
1288 1288 localrepo.featuresetupfuncs.add(featuresetup)
1289 1289 extensions.wrapfunction(
1290 1290 localrepo, b'newreporequirements', newreporequirements
1291 1291 )
1292 1292 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1293 1293 extensions.wrapfunction(localrepo, b'makemain', makemain)
1294 1294 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1295 1295
1296 1296
1297 1297 def reposetup(ui, repo):
1298 1298 if isinstance(repo, sqliterepository):
1299 1299 repo._db = None
1300 1300
1301 1301 # TODO check for bundlerepository?
@@ -1,1707 +1,1706 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35
36 36 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
37 37 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
38 38 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
39 39
40 40 LFS_REQUIREMENT = b'lfs'
41 41
42 42 readexactly = util.readexactly
43 43
44 44
45 45 def getchunk(stream):
46 46 """return the next chunk from stream as a string"""
47 47 d = readexactly(stream, 4)
48 48 l = struct.unpack(b">l", d)[0]
49 49 if l <= 4:
50 50 if l:
51 51 raise error.Abort(_(b"invalid chunk length %d") % l)
52 52 return b""
53 53 return readexactly(stream, l - 4)
54 54
55 55
56 56 def chunkheader(length):
57 57 """return a changegroup chunk header (string)"""
58 58 return struct.pack(b">l", length + 4)
59 59
60 60
61 61 def closechunk():
62 62 """return a changegroup chunk header (string) for a zero-length chunk"""
63 63 return struct.pack(b">l", 0)
64 64
65 65
66 66 def _fileheader(path):
67 67 """Obtain a changegroup chunk header for a named path."""
68 68 return chunkheader(len(path)) + path
69 69
70 70
71 71 def writechunks(ui, chunks, filename, vfs=None):
72 72 """Write chunks to a file and return its filename.
73 73
74 74 The stream is assumed to be a bundle file.
75 75 Existing files will not be overwritten.
76 76 If no filename is specified, a temporary file is created.
77 77 """
78 78 fh = None
79 79 cleanup = None
80 80 try:
81 81 if filename:
82 82 if vfs:
83 83 fh = vfs.open(filename, b"wb")
84 84 else:
85 85 # Increase default buffer size because default is usually
86 86 # small (4k is common on Linux).
87 87 fh = open(filename, b"wb", 131072)
88 88 else:
89 89 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
90 90 fh = os.fdopen(fd, "wb")
91 91 cleanup = filename
92 92 for c in chunks:
93 93 fh.write(c)
94 94 cleanup = None
95 95 return filename
96 96 finally:
97 97 if fh is not None:
98 98 fh.close()
99 99 if cleanup is not None:
100 100 if filename and vfs:
101 101 vfs.unlink(cleanup)
102 102 else:
103 103 os.unlink(cleanup)
104 104
105 105
106 106 class cg1unpacker(object):
107 107 """Unpacker for cg1 changegroup streams.
108 108
109 109 A changegroup unpacker handles the framing of the revision data in
110 110 the wire format. Most consumers will want to use the apply()
111 111 method to add the changes from the changegroup to a repository.
112 112
113 113 If you're forwarding a changegroup unmodified to another consumer,
114 114 use getchunks(), which returns an iterator of changegroup
115 115 chunks. This is mostly useful for cases where you need to know the
116 116 data stream has ended by observing the end of the changegroup.
117 117
118 118 deltachunk() is useful only if you're applying delta data. Most
119 119 consumers should prefer apply() instead.
120 120
121 121 A few other public methods exist. Those are used only for
122 122 bundlerepo and some debug commands - their use is discouraged.
123 123 """
124 124
125 125 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
126 126 deltaheadersize = deltaheader.size
127 127 version = b'01'
128 128 _grouplistcount = 1 # One list of files after the manifests
129 129
130 130 def __init__(self, fh, alg, extras=None):
131 131 if alg is None:
132 132 alg = b'UN'
133 133 if alg not in util.compengines.supportedbundletypes:
134 134 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
135 135 if alg == b'BZ':
136 136 alg = b'_truncatedBZ'
137 137
138 138 compengine = util.compengines.forbundletype(alg)
139 139 self._stream = compengine.decompressorreader(fh)
140 140 self._type = alg
141 141 self.extras = extras or {}
142 142 self.callback = None
143 143
144 144 # These methods (compressed, read, seek, tell) all appear to only
145 145 # be used by bundlerepo, but it's a little hard to tell.
146 146 def compressed(self):
147 147 return self._type is not None and self._type != b'UN'
148 148
149 149 def read(self, l):
150 150 return self._stream.read(l)
151 151
152 152 def seek(self, pos):
153 153 return self._stream.seek(pos)
154 154
155 155 def tell(self):
156 156 return self._stream.tell()
157 157
158 158 def close(self):
159 159 return self._stream.close()
160 160
161 161 def _chunklength(self):
162 162 d = readexactly(self._stream, 4)
163 163 l = struct.unpack(b">l", d)[0]
164 164 if l <= 4:
165 165 if l:
166 166 raise error.Abort(_(b"invalid chunk length %d") % l)
167 167 return 0
168 168 if self.callback:
169 169 self.callback()
170 170 return l - 4
171 171
172 172 def changelogheader(self):
173 173 """v10 does not have a changelog header chunk"""
174 174 return {}
175 175
176 176 def manifestheader(self):
177 177 """v10 does not have a manifest header chunk"""
178 178 return {}
179 179
180 180 def filelogheader(self):
181 181 """return the header of the filelogs chunk, v10 only has the filename"""
182 182 l = self._chunklength()
183 183 if not l:
184 184 return {}
185 185 fname = readexactly(self._stream, l)
186 186 return {b'filename': fname}
187 187
188 188 def _deltaheader(self, headertuple, prevnode):
189 189 node, p1, p2, cs = headertuple
190 190 if prevnode is None:
191 191 deltabase = p1
192 192 else:
193 193 deltabase = prevnode
194 194 flags = 0
195 195 return node, p1, p2, deltabase, cs, flags
196 196
197 197 def deltachunk(self, prevnode):
198 198 l = self._chunklength()
199 199 if not l:
200 200 return {}
201 201 headerdata = readexactly(self._stream, self.deltaheadersize)
202 202 header = self.deltaheader.unpack(headerdata)
203 203 delta = readexactly(self._stream, l - self.deltaheadersize)
204 204 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
205 205 return (node, p1, p2, cs, deltabase, delta, flags)
206 206
207 207 def getchunks(self):
208 208 """returns all the chunks contains in the bundle
209 209
210 210 Used when you need to forward the binary stream to a file or another
211 211 network API. To do so, it parse the changegroup data, otherwise it will
212 212 block in case of sshrepo because it don't know the end of the stream.
213 213 """
214 214 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
215 215 # and a list of filelogs. For changegroup 3, we expect 4 parts:
216 216 # changelog, manifestlog, a list of tree manifestlogs, and a list of
217 217 # filelogs.
218 218 #
219 219 # Changelog and manifestlog parts are terminated with empty chunks. The
220 220 # tree and file parts are a list of entry sections. Each entry section
221 221 # is a series of chunks terminating in an empty chunk. The list of these
222 222 # entry sections is terminated in yet another empty chunk, so we know
223 223 # we've reached the end of the tree/file list when we reach an empty
224 224 # chunk that was proceeded by no non-empty chunks.
225 225
226 226 parts = 0
227 227 while parts < 2 + self._grouplistcount:
228 228 noentries = True
229 229 while True:
230 230 chunk = getchunk(self)
231 231 if not chunk:
232 232 # The first two empty chunks represent the end of the
233 233 # changelog and the manifestlog portions. The remaining
234 234 # empty chunks represent either A) the end of individual
235 235 # tree or file entries in the file list, or B) the end of
236 236 # the entire list. It's the end of the entire list if there
237 237 # were no entries (i.e. noentries is True).
238 238 if parts < 2:
239 239 parts += 1
240 240 elif noentries:
241 241 parts += 1
242 242 break
243 243 noentries = False
244 244 yield chunkheader(len(chunk))
245 245 pos = 0
246 246 while pos < len(chunk):
247 247 next = pos + 2 ** 20
248 248 yield chunk[pos:next]
249 249 pos = next
250 250 yield closechunk()
251 251
252 252 def _unpackmanifests(self, repo, revmap, trp, prog):
253 253 self.callback = prog.increment
254 254 # no need to check for empty manifest group here:
255 255 # if the result of the merge of 1 and 2 is the same in 3 and 4,
256 256 # no new manifest will be created and the manifest group will
257 257 # be empty during the pull
258 258 self.manifestheader()
259 259 deltas = self.deltaiter()
260 260 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
261 261 prog.complete()
262 262 self.callback = None
263 263
264 264 def apply(
265 265 self,
266 266 repo,
267 267 tr,
268 268 srctype,
269 269 url,
270 270 targetphase=phases.draft,
271 271 expectedtotal=None,
272 272 ):
273 273 """Add the changegroup returned by source.read() to this repo.
274 274 srctype is a string like 'push', 'pull', or 'unbundle'. url is
275 275 the URL of the repo where this changegroup is coming from.
276 276
277 277 Return an integer summarizing the change to this repo:
278 278 - nothing changed or no source: 0
279 279 - more heads than before: 1+added heads (2..n)
280 280 - fewer heads than before: -1-removed heads (-2..-n)
281 281 - number of heads stays the same: 1
282 282 """
283 283 repo = repo.unfiltered()
284 284
285 285 def csmap(x):
286 286 repo.ui.debug(b"add changeset %s\n" % short(x))
287 287 return len(cl)
288 288
289 289 def revmap(x):
290 290 return cl.rev(x)
291 291
292 292 try:
293 293 # The transaction may already carry source information. In this
294 294 # case we use the top level data. We overwrite the argument
295 295 # because we need to use the top level value (if they exist)
296 296 # in this function.
297 297 srctype = tr.hookargs.setdefault(b'source', srctype)
298 298 tr.hookargs.setdefault(b'url', url)
299 299 repo.hook(
300 300 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
301 301 )
302 302
303 303 # write changelog data to temp files so concurrent readers
304 304 # will not see an inconsistent view
305 305 cl = repo.changelog
306 306 cl.delayupdate(tr)
307 307 oldheads = set(cl.heads())
308 308
309 309 trp = weakref.proxy(tr)
310 310 # pull off the changeset group
311 311 repo.ui.status(_(b"adding changesets\n"))
312 312 clstart = len(cl)
313 313 progress = repo.ui.makeprogress(
314 314 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
315 315 )
316 316 self.callback = progress.increment
317 317
318 318 efilesset = set()
319 319 cgnodes = []
320 320
321 def ondupchangelog(cl, node):
322 if cl.rev(node) < clstart:
323 cgnodes.append(node)
321 def ondupchangelog(cl, rev):
322 if rev < clstart:
323 cgnodes.append(cl.node(rev))
324 324
325 def onchangelog(cl, node):
326 rev = cl.rev(node)
325 def onchangelog(cl, rev):
327 326 ctx = cl.changelogrevision(rev)
328 327 efilesset.update(ctx.files)
329 328 repo.register_changeset(rev, ctx)
330 329
331 330 self.changelogheader()
332 331 deltas = self.deltaiter()
333 332 if not cl.addgroup(
334 333 deltas,
335 334 csmap,
336 335 trp,
337 336 alwayscache=True,
338 337 addrevisioncb=onchangelog,
339 338 duplicaterevisioncb=ondupchangelog,
340 339 ):
341 340 repo.ui.develwarn(
342 341 b'applied empty changelog from changegroup',
343 342 config=b'warn-empty-changegroup',
344 343 )
345 344 efiles = len(efilesset)
346 345 clend = len(cl)
347 346 changesets = clend - clstart
348 347 progress.complete()
349 348 del deltas
350 349 # TODO Python 2.7 removal
351 350 # del efilesset
352 351 efilesset = None
353 352 self.callback = None
354 353
355 354 # pull off the manifest group
356 355 repo.ui.status(_(b"adding manifests\n"))
357 356 # We know that we'll never have more manifests than we had
358 357 # changesets.
359 358 progress = repo.ui.makeprogress(
360 359 _(b'manifests'), unit=_(b'chunks'), total=changesets
361 360 )
362 361 self._unpackmanifests(repo, revmap, trp, progress)
363 362
364 363 needfiles = {}
365 364 if repo.ui.configbool(b'server', b'validate'):
366 365 cl = repo.changelog
367 366 ml = repo.manifestlog
368 367 # validate incoming csets have their manifests
369 368 for cset in pycompat.xrange(clstart, clend):
370 369 mfnode = cl.changelogrevision(cset).manifest
371 370 mfest = ml[mfnode].readdelta()
372 371 # store file nodes we must see
373 372 for f, n in pycompat.iteritems(mfest):
374 373 needfiles.setdefault(f, set()).add(n)
375 374
376 375 # process the files
377 376 repo.ui.status(_(b"adding file changes\n"))
378 377 newrevs, newfiles = _addchangegroupfiles(
379 378 repo, self, revmap, trp, efiles, needfiles
380 379 )
381 380
382 381 # making sure the value exists
383 382 tr.changes.setdefault(b'changegroup-count-changesets', 0)
384 383 tr.changes.setdefault(b'changegroup-count-revisions', 0)
385 384 tr.changes.setdefault(b'changegroup-count-files', 0)
386 385 tr.changes.setdefault(b'changegroup-count-heads', 0)
387 386
388 387 # some code use bundle operation for internal purpose. They usually
389 388 # set `ui.quiet` to do this outside of user sight. Size the report
390 389 # of such operation now happens at the end of the transaction, that
391 390 # ui.quiet has not direct effect on the output.
392 391 #
393 392 # To preserve this intend use an inelegant hack, we fail to report
394 393 # the change if `quiet` is set. We should probably move to
395 394 # something better, but this is a good first step to allow the "end
396 395 # of transaction report" to pass tests.
397 396 if not repo.ui.quiet:
398 397 tr.changes[b'changegroup-count-changesets'] += changesets
399 398 tr.changes[b'changegroup-count-revisions'] += newrevs
400 399 tr.changes[b'changegroup-count-files'] += newfiles
401 400
402 401 deltaheads = 0
403 402 if oldheads:
404 403 heads = cl.heads()
405 404 deltaheads += len(heads) - len(oldheads)
406 405 for h in heads:
407 406 if h not in oldheads and repo[h].closesbranch():
408 407 deltaheads -= 1
409 408
410 409 # see previous comment about checking ui.quiet
411 410 if not repo.ui.quiet:
412 411 tr.changes[b'changegroup-count-heads'] += deltaheads
413 412 repo.invalidatevolatilesets()
414 413
415 414 if changesets > 0:
416 415 if b'node' not in tr.hookargs:
417 416 tr.hookargs[b'node'] = hex(cl.node(clstart))
418 417 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
419 418 hookargs = dict(tr.hookargs)
420 419 else:
421 420 hookargs = dict(tr.hookargs)
422 421 hookargs[b'node'] = hex(cl.node(clstart))
423 422 hookargs[b'node_last'] = hex(cl.node(clend - 1))
424 423 repo.hook(
425 424 b'pretxnchangegroup',
426 425 throw=True,
427 426 **pycompat.strkwargs(hookargs)
428 427 )
429 428
430 429 added = pycompat.xrange(clstart, clend)
431 430 phaseall = None
432 431 if srctype in (b'push', b'serve'):
433 432 # Old servers can not push the boundary themselves.
434 433 # New servers won't push the boundary if changeset already
435 434 # exists locally as secret
436 435 #
437 436 # We should not use added here but the list of all change in
438 437 # the bundle
439 438 if repo.publishing():
440 439 targetphase = phaseall = phases.public
441 440 else:
442 441 # closer target phase computation
443 442
444 443 # Those changesets have been pushed from the
445 444 # outside, their phases are going to be pushed
446 445 # alongside. Therefor `targetphase` is
447 446 # ignored.
448 447 targetphase = phaseall = phases.draft
449 448 if added:
450 449 phases.registernew(repo, tr, targetphase, added)
451 450 if phaseall is not None:
452 451 phases.advanceboundary(repo, tr, phaseall, cgnodes, revs=added)
453 452 cgnodes = []
454 453
455 454 if changesets > 0:
456 455
457 456 def runhooks(unused_success):
458 457 # These hooks run when the lock releases, not when the
459 458 # transaction closes. So it's possible for the changelog
460 459 # to have changed since we last saw it.
461 460 if clstart >= len(repo):
462 461 return
463 462
464 463 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
465 464
466 465 for rev in added:
467 466 args = hookargs.copy()
468 467 args[b'node'] = hex(cl.node(rev))
469 468 del args[b'node_last']
470 469 repo.hook(b"incoming", **pycompat.strkwargs(args))
471 470
472 471 newheads = [h for h in repo.heads() if h not in oldheads]
473 472 repo.ui.log(
474 473 b"incoming",
475 474 b"%d incoming changes - new heads: %s\n",
476 475 len(added),
477 476 b', '.join([hex(c[:6]) for c in newheads]),
478 477 )
479 478
480 479 tr.addpostclose(
481 480 b'changegroup-runhooks-%020i' % clstart,
482 481 lambda tr: repo._afterlock(runhooks),
483 482 )
484 483 finally:
485 484 repo.ui.flush()
486 485 # never return 0 here:
487 486 if deltaheads < 0:
488 487 ret = deltaheads - 1
489 488 else:
490 489 ret = deltaheads + 1
491 490 return ret
492 491
493 492 def deltaiter(self):
494 493 """
495 494 returns an iterator of the deltas in this changegroup
496 495
497 496 Useful for passing to the underlying storage system to be stored.
498 497 """
499 498 chain = None
500 499 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
501 500 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
502 501 yield chunkdata
503 502 chain = chunkdata[0]
504 503
505 504
506 505 class cg2unpacker(cg1unpacker):
507 506 """Unpacker for cg2 streams.
508 507
509 508 cg2 streams add support for generaldelta, so the delta header
510 509 format is slightly different. All other features about the data
511 510 remain the same.
512 511 """
513 512
514 513 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
515 514 deltaheadersize = deltaheader.size
516 515 version = b'02'
517 516
518 517 def _deltaheader(self, headertuple, prevnode):
519 518 node, p1, p2, deltabase, cs = headertuple
520 519 flags = 0
521 520 return node, p1, p2, deltabase, cs, flags
522 521
523 522
524 523 class cg3unpacker(cg2unpacker):
525 524 """Unpacker for cg3 streams.
526 525
527 526 cg3 streams add support for exchanging treemanifests and revlog
528 527 flags. It adds the revlog flags to the delta header and an empty chunk
529 528 separating manifests and files.
530 529 """
531 530
532 531 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
533 532 deltaheadersize = deltaheader.size
534 533 version = b'03'
535 534 _grouplistcount = 2 # One list of manifests and one list of files
536 535
537 536 def _deltaheader(self, headertuple, prevnode):
538 537 node, p1, p2, deltabase, cs, flags = headertuple
539 538 return node, p1, p2, deltabase, cs, flags
540 539
541 540 def _unpackmanifests(self, repo, revmap, trp, prog):
542 541 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
543 542 for chunkdata in iter(self.filelogheader, {}):
544 543 # If we get here, there are directory manifests in the changegroup
545 544 d = chunkdata[b"filename"]
546 545 repo.ui.debug(b"adding %s revisions\n" % d)
547 546 deltas = self.deltaiter()
548 547 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
549 548 raise error.Abort(_(b"received dir revlog group is empty"))
550 549
551 550
552 551 class headerlessfixup(object):
553 552 def __init__(self, fh, h):
554 553 self._h = h
555 554 self._fh = fh
556 555
557 556 def read(self, n):
558 557 if self._h:
559 558 d, self._h = self._h[:n], self._h[n:]
560 559 if len(d) < n:
561 560 d += readexactly(self._fh, n - len(d))
562 561 return d
563 562 return readexactly(self._fh, n)
564 563
565 564
566 565 def _revisiondeltatochunks(delta, headerfn):
567 566 """Serialize a revisiondelta to changegroup chunks."""
568 567
569 568 # The captured revision delta may be encoded as a delta against
570 569 # a base revision or as a full revision. The changegroup format
571 570 # requires that everything on the wire be deltas. So for full
572 571 # revisions, we need to invent a header that says to rewrite
573 572 # data.
574 573
575 574 if delta.delta is not None:
576 575 prefix, data = b'', delta.delta
577 576 elif delta.basenode == nullid:
578 577 data = delta.revision
579 578 prefix = mdiff.trivialdiffheader(len(data))
580 579 else:
581 580 data = delta.revision
582 581 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
583 582
584 583 meta = headerfn(delta)
585 584
586 585 yield chunkheader(len(meta) + len(prefix) + len(data))
587 586 yield meta
588 587 if prefix:
589 588 yield prefix
590 589 yield data
591 590
592 591
593 592 def _sortnodesellipsis(store, nodes, cl, lookup):
594 593 """Sort nodes for changegroup generation."""
595 594 # Ellipses serving mode.
596 595 #
597 596 # In a perfect world, we'd generate better ellipsis-ified graphs
598 597 # for non-changelog revlogs. In practice, we haven't started doing
599 598 # that yet, so the resulting DAGs for the manifestlog and filelogs
600 599 # are actually full of bogus parentage on all the ellipsis
601 600 # nodes. This has the side effect that, while the contents are
602 601 # correct, the individual DAGs might be completely out of whack in
603 602 # a case like 882681bc3166 and its ancestors (back about 10
604 603 # revisions or so) in the main hg repo.
605 604 #
606 605 # The one invariant we *know* holds is that the new (potentially
607 606 # bogus) DAG shape will be valid if we order the nodes in the
608 607 # order that they're introduced in dramatis personae by the
609 608 # changelog, so what we do is we sort the non-changelog histories
610 609 # by the order in which they are used by the changelog.
611 610 key = lambda n: cl.rev(lookup(n))
612 611 return sorted(nodes, key=key)
613 612
614 613
615 614 def _resolvenarrowrevisioninfo(
616 615 cl,
617 616 store,
618 617 ischangelog,
619 618 rev,
620 619 linkrev,
621 620 linknode,
622 621 clrevtolocalrev,
623 622 fullclnodes,
624 623 precomputedellipsis,
625 624 ):
626 625 linkparents = precomputedellipsis[linkrev]
627 626
628 627 def local(clrev):
629 628 """Turn a changelog revnum into a local revnum.
630 629
631 630 The ellipsis dag is stored as revnums on the changelog,
632 631 but when we're producing ellipsis entries for
633 632 non-changelog revlogs, we need to turn those numbers into
634 633 something local. This does that for us, and during the
635 634 changelog sending phase will also expand the stored
636 635 mappings as needed.
637 636 """
638 637 if clrev == nullrev:
639 638 return nullrev
640 639
641 640 if ischangelog:
642 641 return clrev
643 642
644 643 # Walk the ellipsis-ized changelog breadth-first looking for a
645 644 # change that has been linked from the current revlog.
646 645 #
647 646 # For a flat manifest revlog only a single step should be necessary
648 647 # as all relevant changelog entries are relevant to the flat
649 648 # manifest.
650 649 #
651 650 # For a filelog or tree manifest dirlog however not every changelog
652 651 # entry will have been relevant, so we need to skip some changelog
653 652 # nodes even after ellipsis-izing.
654 653 walk = [clrev]
655 654 while walk:
656 655 p = walk[0]
657 656 walk = walk[1:]
658 657 if p in clrevtolocalrev:
659 658 return clrevtolocalrev[p]
660 659 elif p in fullclnodes:
661 660 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
662 661 elif p in precomputedellipsis:
663 662 walk.extend(
664 663 [pp for pp in precomputedellipsis[p] if pp != nullrev]
665 664 )
666 665 else:
667 666 # In this case, we've got an ellipsis with parents
668 667 # outside the current bundle (likely an
669 668 # incremental pull). We "know" that we can use the
670 669 # value of this same revlog at whatever revision
671 670 # is pointed to by linknode. "Know" is in scare
672 671 # quotes because I haven't done enough examination
673 672 # of edge cases to convince myself this is really
674 673 # a fact - it works for all the (admittedly
675 674 # thorough) cases in our testsuite, but I would be
676 675 # somewhat unsurprised to find a case in the wild
677 676 # where this breaks down a bit. That said, I don't
678 677 # know if it would hurt anything.
679 678 for i in pycompat.xrange(rev, 0, -1):
680 679 if store.linkrev(i) == clrev:
681 680 return i
682 681 # We failed to resolve a parent for this node, so
683 682 # we crash the changegroup construction.
684 683 raise error.Abort(
685 684 b'unable to resolve parent while packing %r %r'
686 685 b' for changeset %r' % (store.indexfile, rev, clrev)
687 686 )
688 687
689 688 return nullrev
690 689
691 690 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
692 691 p1, p2 = nullrev, nullrev
693 692 elif len(linkparents) == 1:
694 693 (p1,) = sorted(local(p) for p in linkparents)
695 694 p2 = nullrev
696 695 else:
697 696 p1, p2 = sorted(local(p) for p in linkparents)
698 697
699 698 p1node, p2node = store.node(p1), store.node(p2)
700 699
701 700 return p1node, p2node, linknode
702 701
703 702
704 703 def deltagroup(
705 704 repo,
706 705 store,
707 706 nodes,
708 707 ischangelog,
709 708 lookup,
710 709 forcedeltaparentprev,
711 710 topic=None,
712 711 ellipses=False,
713 712 clrevtolocalrev=None,
714 713 fullclnodes=None,
715 714 precomputedellipsis=None,
716 715 ):
717 716 """Calculate deltas for a set of revisions.
718 717
719 718 Is a generator of ``revisiondelta`` instances.
720 719
721 720 If topic is not None, progress detail will be generated using this
722 721 topic name (e.g. changesets, manifests, etc).
723 722 """
724 723 if not nodes:
725 724 return
726 725
727 726 cl = repo.changelog
728 727
729 728 if ischangelog:
730 729 # `hg log` shows changesets in storage order. To preserve order
731 730 # across clones, send out changesets in storage order.
732 731 nodesorder = b'storage'
733 732 elif ellipses:
734 733 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
735 734 nodesorder = b'nodes'
736 735 else:
737 736 nodesorder = None
738 737
739 738 # Perform ellipses filtering and revision massaging. We do this before
740 739 # emitrevisions() because a) filtering out revisions creates less work
741 740 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
742 741 # assumptions about delta choices and we would possibly send a delta
743 742 # referencing a missing base revision.
744 743 #
745 744 # Also, calling lookup() has side-effects with regards to populating
746 745 # data structures. If we don't call lookup() for each node or if we call
747 746 # lookup() after the first pass through each node, things can break -
748 747 # possibly intermittently depending on the python hash seed! For that
749 748 # reason, we store a mapping of all linknodes during the initial node
750 749 # pass rather than use lookup() on the output side.
751 750 if ellipses:
752 751 filtered = []
753 752 adjustedparents = {}
754 753 linknodes = {}
755 754
756 755 for node in nodes:
757 756 rev = store.rev(node)
758 757 linknode = lookup(node)
759 758 linkrev = cl.rev(linknode)
760 759 clrevtolocalrev[linkrev] = rev
761 760
762 761 # If linknode is in fullclnodes, it means the corresponding
763 762 # changeset was a full changeset and is being sent unaltered.
764 763 if linknode in fullclnodes:
765 764 linknodes[node] = linknode
766 765
767 766 # If the corresponding changeset wasn't in the set computed
768 767 # as relevant to us, it should be dropped outright.
769 768 elif linkrev not in precomputedellipsis:
770 769 continue
771 770
772 771 else:
773 772 # We could probably do this later and avoid the dict
774 773 # holding state. But it likely doesn't matter.
775 774 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
776 775 cl,
777 776 store,
778 777 ischangelog,
779 778 rev,
780 779 linkrev,
781 780 linknode,
782 781 clrevtolocalrev,
783 782 fullclnodes,
784 783 precomputedellipsis,
785 784 )
786 785
787 786 adjustedparents[node] = (p1node, p2node)
788 787 linknodes[node] = linknode
789 788
790 789 filtered.append(node)
791 790
792 791 nodes = filtered
793 792
794 793 # We expect the first pass to be fast, so we only engage the progress
795 794 # meter for constructing the revision deltas.
796 795 progress = None
797 796 if topic is not None:
798 797 progress = repo.ui.makeprogress(
799 798 topic, unit=_(b'chunks'), total=len(nodes)
800 799 )
801 800
802 801 configtarget = repo.ui.config(b'devel', b'bundle.delta')
803 802 if configtarget not in (b'', b'p1', b'full'):
804 803 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
805 804 repo.ui.warn(msg % configtarget)
806 805
807 806 deltamode = repository.CG_DELTAMODE_STD
808 807 if forcedeltaparentprev:
809 808 deltamode = repository.CG_DELTAMODE_PREV
810 809 elif configtarget == b'p1':
811 810 deltamode = repository.CG_DELTAMODE_P1
812 811 elif configtarget == b'full':
813 812 deltamode = repository.CG_DELTAMODE_FULL
814 813
815 814 revisions = store.emitrevisions(
816 815 nodes,
817 816 nodesorder=nodesorder,
818 817 revisiondata=True,
819 818 assumehaveparentrevisions=not ellipses,
820 819 deltamode=deltamode,
821 820 )
822 821
823 822 for i, revision in enumerate(revisions):
824 823 if progress:
825 824 progress.update(i + 1)
826 825
827 826 if ellipses:
828 827 linknode = linknodes[revision.node]
829 828
830 829 if revision.node in adjustedparents:
831 830 p1node, p2node = adjustedparents[revision.node]
832 831 revision.p1node = p1node
833 832 revision.p2node = p2node
834 833 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
835 834
836 835 else:
837 836 linknode = lookup(revision.node)
838 837
839 838 revision.linknode = linknode
840 839 yield revision
841 840
842 841 if progress:
843 842 progress.complete()
844 843
845 844
846 845 class cgpacker(object):
847 846 def __init__(
848 847 self,
849 848 repo,
850 849 oldmatcher,
851 850 matcher,
852 851 version,
853 852 builddeltaheader,
854 853 manifestsend,
855 854 forcedeltaparentprev=False,
856 855 bundlecaps=None,
857 856 ellipses=False,
858 857 shallow=False,
859 858 ellipsisroots=None,
860 859 fullnodes=None,
861 860 ):
862 861 """Given a source repo, construct a bundler.
863 862
864 863 oldmatcher is a matcher that matches on files the client already has.
865 864 These will not be included in the changegroup.
866 865
867 866 matcher is a matcher that matches on files to include in the
868 867 changegroup. Used to facilitate sparse changegroups.
869 868
870 869 forcedeltaparentprev indicates whether delta parents must be against
871 870 the previous revision in a delta group. This should only be used for
872 871 compatibility with changegroup version 1.
873 872
874 873 builddeltaheader is a callable that constructs the header for a group
875 874 delta.
876 875
877 876 manifestsend is a chunk to send after manifests have been fully emitted.
878 877
879 878 ellipses indicates whether ellipsis serving mode is enabled.
880 879
881 880 bundlecaps is optional and can be used to specify the set of
882 881 capabilities which can be used to build the bundle. While bundlecaps is
883 882 unused in core Mercurial, extensions rely on this feature to communicate
884 883 capabilities to customize the changegroup packer.
885 884
886 885 shallow indicates whether shallow data might be sent. The packer may
887 886 need to pack file contents not introduced by the changes being packed.
888 887
889 888 fullnodes is the set of changelog nodes which should not be ellipsis
890 889 nodes. We store this rather than the set of nodes that should be
891 890 ellipsis because for very large histories we expect this to be
892 891 significantly smaller.
893 892 """
894 893 assert oldmatcher
895 894 assert matcher
896 895 self._oldmatcher = oldmatcher
897 896 self._matcher = matcher
898 897
899 898 self.version = version
900 899 self._forcedeltaparentprev = forcedeltaparentprev
901 900 self._builddeltaheader = builddeltaheader
902 901 self._manifestsend = manifestsend
903 902 self._ellipses = ellipses
904 903
905 904 # Set of capabilities we can use to build the bundle.
906 905 if bundlecaps is None:
907 906 bundlecaps = set()
908 907 self._bundlecaps = bundlecaps
909 908 self._isshallow = shallow
910 909 self._fullclnodes = fullnodes
911 910
912 911 # Maps ellipsis revs to their roots at the changelog level.
913 912 self._precomputedellipsis = ellipsisroots
914 913
915 914 self._repo = repo
916 915
917 916 if self._repo.ui.verbose and not self._repo.ui.debugflag:
918 917 self._verbosenote = self._repo.ui.note
919 918 else:
920 919 self._verbosenote = lambda s: None
921 920
922 921 def generate(
923 922 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
924 923 ):
925 924 """Yield a sequence of changegroup byte chunks.
926 925 If changelog is False, changelog data won't be added to changegroup
927 926 """
928 927
929 928 repo = self._repo
930 929 cl = repo.changelog
931 930
932 931 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
933 932 size = 0
934 933
935 934 clstate, deltas = self._generatechangelog(
936 935 cl, clnodes, generate=changelog
937 936 )
938 937 for delta in deltas:
939 938 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
940 939 size += len(chunk)
941 940 yield chunk
942 941
943 942 close = closechunk()
944 943 size += len(close)
945 944 yield closechunk()
946 945
947 946 self._verbosenote(_(b'%8.i (changelog)\n') % size)
948 947
949 948 clrevorder = clstate[b'clrevorder']
950 949 manifests = clstate[b'manifests']
951 950 changedfiles = clstate[b'changedfiles']
952 951
953 952 # We need to make sure that the linkrev in the changegroup refers to
954 953 # the first changeset that introduced the manifest or file revision.
955 954 # The fastpath is usually safer than the slowpath, because the filelogs
956 955 # are walked in revlog order.
957 956 #
958 957 # When taking the slowpath when the manifest revlog uses generaldelta,
959 958 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
960 959 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
961 960 #
962 961 # When taking the fastpath, we are only vulnerable to reordering
963 962 # of the changelog itself. The changelog never uses generaldelta and is
964 963 # never reordered. To handle this case, we simply take the slowpath,
965 964 # which already has the 'clrevorder' logic. This was also fixed in
966 965 # cc0ff93d0c0c.
967 966
968 967 # Treemanifests don't work correctly with fastpathlinkrev
969 968 # either, because we don't discover which directory nodes to
970 969 # send along with files. This could probably be fixed.
971 970 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
972 971
973 972 fnodes = {} # needed file nodes
974 973
975 974 size = 0
976 975 it = self.generatemanifests(
977 976 commonrevs,
978 977 clrevorder,
979 978 fastpathlinkrev,
980 979 manifests,
981 980 fnodes,
982 981 source,
983 982 clstate[b'clrevtomanifestrev'],
984 983 )
985 984
986 985 for tree, deltas in it:
987 986 if tree:
988 987 assert self.version == b'03'
989 988 chunk = _fileheader(tree)
990 989 size += len(chunk)
991 990 yield chunk
992 991
993 992 for delta in deltas:
994 993 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
995 994 for chunk in chunks:
996 995 size += len(chunk)
997 996 yield chunk
998 997
999 998 close = closechunk()
1000 999 size += len(close)
1001 1000 yield close
1002 1001
1003 1002 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1004 1003 yield self._manifestsend
1005 1004
1006 1005 mfdicts = None
1007 1006 if self._ellipses and self._isshallow:
1008 1007 mfdicts = [
1009 1008 (self._repo.manifestlog[n].read(), lr)
1010 1009 for (n, lr) in pycompat.iteritems(manifests)
1011 1010 ]
1012 1011
1013 1012 manifests.clear()
1014 1013 clrevs = {cl.rev(x) for x in clnodes}
1015 1014
1016 1015 it = self.generatefiles(
1017 1016 changedfiles,
1018 1017 commonrevs,
1019 1018 source,
1020 1019 mfdicts,
1021 1020 fastpathlinkrev,
1022 1021 fnodes,
1023 1022 clrevs,
1024 1023 )
1025 1024
1026 1025 for path, deltas in it:
1027 1026 h = _fileheader(path)
1028 1027 size = len(h)
1029 1028 yield h
1030 1029
1031 1030 for delta in deltas:
1032 1031 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1033 1032 for chunk in chunks:
1034 1033 size += len(chunk)
1035 1034 yield chunk
1036 1035
1037 1036 close = closechunk()
1038 1037 size += len(close)
1039 1038 yield close
1040 1039
1041 1040 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1042 1041
1043 1042 yield closechunk()
1044 1043
1045 1044 if clnodes:
1046 1045 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1047 1046
1048 1047 def _generatechangelog(self, cl, nodes, generate=True):
1049 1048 """Generate data for changelog chunks.
1050 1049
1051 1050 Returns a 2-tuple of a dict containing state and an iterable of
1052 1051 byte chunks. The state will not be fully populated until the
1053 1052 chunk stream has been fully consumed.
1054 1053
1055 1054 if generate is False, the state will be fully populated and no chunk
1056 1055 stream will be yielded
1057 1056 """
1058 1057 clrevorder = {}
1059 1058 manifests = {}
1060 1059 mfl = self._repo.manifestlog
1061 1060 changedfiles = set()
1062 1061 clrevtomanifestrev = {}
1063 1062
1064 1063 state = {
1065 1064 b'clrevorder': clrevorder,
1066 1065 b'manifests': manifests,
1067 1066 b'changedfiles': changedfiles,
1068 1067 b'clrevtomanifestrev': clrevtomanifestrev,
1069 1068 }
1070 1069
1071 1070 if not (generate or self._ellipses):
1072 1071 # sort the nodes in storage order
1073 1072 nodes = sorted(nodes, key=cl.rev)
1074 1073 for node in nodes:
1075 1074 c = cl.changelogrevision(node)
1076 1075 clrevorder[node] = len(clrevorder)
1077 1076 # record the first changeset introducing this manifest version
1078 1077 manifests.setdefault(c.manifest, node)
1079 1078 # Record a complete list of potentially-changed files in
1080 1079 # this manifest.
1081 1080 changedfiles.update(c.files)
1082 1081
1083 1082 return state, ()
1084 1083
1085 1084 # Callback for the changelog, used to collect changed files and
1086 1085 # manifest nodes.
1087 1086 # Returns the linkrev node (identity in the changelog case).
1088 1087 def lookupcl(x):
1089 1088 c = cl.changelogrevision(x)
1090 1089 clrevorder[x] = len(clrevorder)
1091 1090
1092 1091 if self._ellipses:
1093 1092 # Only update manifests if x is going to be sent. Otherwise we
1094 1093 # end up with bogus linkrevs specified for manifests and
1095 1094 # we skip some manifest nodes that we should otherwise
1096 1095 # have sent.
1097 1096 if (
1098 1097 x in self._fullclnodes
1099 1098 or cl.rev(x) in self._precomputedellipsis
1100 1099 ):
1101 1100
1102 1101 manifestnode = c.manifest
1103 1102 # Record the first changeset introducing this manifest
1104 1103 # version.
1105 1104 manifests.setdefault(manifestnode, x)
1106 1105 # Set this narrow-specific dict so we have the lowest
1107 1106 # manifest revnum to look up for this cl revnum. (Part of
1108 1107 # mapping changelog ellipsis parents to manifest ellipsis
1109 1108 # parents)
1110 1109 clrevtomanifestrev.setdefault(
1111 1110 cl.rev(x), mfl.rev(manifestnode)
1112 1111 )
1113 1112 # We can't trust the changed files list in the changeset if the
1114 1113 # client requested a shallow clone.
1115 1114 if self._isshallow:
1116 1115 changedfiles.update(mfl[c.manifest].read().keys())
1117 1116 else:
1118 1117 changedfiles.update(c.files)
1119 1118 else:
1120 1119 # record the first changeset introducing this manifest version
1121 1120 manifests.setdefault(c.manifest, x)
1122 1121 # Record a complete list of potentially-changed files in
1123 1122 # this manifest.
1124 1123 changedfiles.update(c.files)
1125 1124
1126 1125 return x
1127 1126
1128 1127 gen = deltagroup(
1129 1128 self._repo,
1130 1129 cl,
1131 1130 nodes,
1132 1131 True,
1133 1132 lookupcl,
1134 1133 self._forcedeltaparentprev,
1135 1134 ellipses=self._ellipses,
1136 1135 topic=_(b'changesets'),
1137 1136 clrevtolocalrev={},
1138 1137 fullclnodes=self._fullclnodes,
1139 1138 precomputedellipsis=self._precomputedellipsis,
1140 1139 )
1141 1140
1142 1141 return state, gen
1143 1142
1144 1143 def generatemanifests(
1145 1144 self,
1146 1145 commonrevs,
1147 1146 clrevorder,
1148 1147 fastpathlinkrev,
1149 1148 manifests,
1150 1149 fnodes,
1151 1150 source,
1152 1151 clrevtolocalrev,
1153 1152 ):
1154 1153 """Returns an iterator of changegroup chunks containing manifests.
1155 1154
1156 1155 `source` is unused here, but is used by extensions like remotefilelog to
1157 1156 change what is sent based in pulls vs pushes, etc.
1158 1157 """
1159 1158 repo = self._repo
1160 1159 mfl = repo.manifestlog
1161 1160 tmfnodes = {b'': manifests}
1162 1161
1163 1162 # Callback for the manifest, used to collect linkrevs for filelog
1164 1163 # revisions.
1165 1164 # Returns the linkrev node (collected in lookupcl).
1166 1165 def makelookupmflinknode(tree, nodes):
1167 1166 if fastpathlinkrev:
1168 1167 assert not tree
1169 1168 return (
1170 1169 manifests.__getitem__
1171 1170 ) # pytype: disable=unsupported-operands
1172 1171
1173 1172 def lookupmflinknode(x):
1174 1173 """Callback for looking up the linknode for manifests.
1175 1174
1176 1175 Returns the linkrev node for the specified manifest.
1177 1176
1178 1177 SIDE EFFECT:
1179 1178
1180 1179 1) fclnodes gets populated with the list of relevant
1181 1180 file nodes if we're not using fastpathlinkrev
1182 1181 2) When treemanifests are in use, collects treemanifest nodes
1183 1182 to send
1184 1183
1185 1184 Note that this means manifests must be completely sent to
1186 1185 the client before you can trust the list of files and
1187 1186 treemanifests to send.
1188 1187 """
1189 1188 clnode = nodes[x]
1190 1189 mdata = mfl.get(tree, x).readfast(shallow=True)
1191 1190 for p, n, fl in mdata.iterentries():
1192 1191 if fl == b't': # subdirectory manifest
1193 1192 subtree = tree + p + b'/'
1194 1193 tmfclnodes = tmfnodes.setdefault(subtree, {})
1195 1194 tmfclnode = tmfclnodes.setdefault(n, clnode)
1196 1195 if clrevorder[clnode] < clrevorder[tmfclnode]:
1197 1196 tmfclnodes[n] = clnode
1198 1197 else:
1199 1198 f = tree + p
1200 1199 fclnodes = fnodes.setdefault(f, {})
1201 1200 fclnode = fclnodes.setdefault(n, clnode)
1202 1201 if clrevorder[clnode] < clrevorder[fclnode]:
1203 1202 fclnodes[n] = clnode
1204 1203 return clnode
1205 1204
1206 1205 return lookupmflinknode
1207 1206
1208 1207 while tmfnodes:
1209 1208 tree, nodes = tmfnodes.popitem()
1210 1209
1211 1210 should_visit = self._matcher.visitdir(tree[:-1])
1212 1211 if tree and not should_visit:
1213 1212 continue
1214 1213
1215 1214 store = mfl.getstorage(tree)
1216 1215
1217 1216 if not should_visit:
1218 1217 # No nodes to send because this directory is out of
1219 1218 # the client's view of the repository (probably
1220 1219 # because of narrow clones). Do this even for the root
1221 1220 # directory (tree=='')
1222 1221 prunednodes = []
1223 1222 else:
1224 1223 # Avoid sending any manifest nodes we can prove the
1225 1224 # client already has by checking linkrevs. See the
1226 1225 # related comment in generatefiles().
1227 1226 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1228 1227
1229 1228 if tree and not prunednodes:
1230 1229 continue
1231 1230
1232 1231 lookupfn = makelookupmflinknode(tree, nodes)
1233 1232
1234 1233 deltas = deltagroup(
1235 1234 self._repo,
1236 1235 store,
1237 1236 prunednodes,
1238 1237 False,
1239 1238 lookupfn,
1240 1239 self._forcedeltaparentprev,
1241 1240 ellipses=self._ellipses,
1242 1241 topic=_(b'manifests'),
1243 1242 clrevtolocalrev=clrevtolocalrev,
1244 1243 fullclnodes=self._fullclnodes,
1245 1244 precomputedellipsis=self._precomputedellipsis,
1246 1245 )
1247 1246
1248 1247 if not self._oldmatcher.visitdir(store.tree[:-1]):
1249 1248 yield tree, deltas
1250 1249 else:
1251 1250 # 'deltas' is a generator and we need to consume it even if
1252 1251 # we are not going to send it because a side-effect is that
1253 1252 # it updates tmdnodes (via lookupfn)
1254 1253 for d in deltas:
1255 1254 pass
1256 1255 if not tree:
1257 1256 yield tree, []
1258 1257
1259 1258 def _prunemanifests(self, store, nodes, commonrevs):
1260 1259 if not self._ellipses:
1261 1260 # In non-ellipses case and large repositories, it is better to
1262 1261 # prevent calling of store.rev and store.linkrev on a lot of
1263 1262 # nodes as compared to sending some extra data
1264 1263 return nodes.copy()
1265 1264 # This is split out as a separate method to allow filtering
1266 1265 # commonrevs in extension code.
1267 1266 #
1268 1267 # TODO(augie): this shouldn't be required, instead we should
1269 1268 # make filtering of revisions to send delegated to the store
1270 1269 # layer.
1271 1270 frev, flr = store.rev, store.linkrev
1272 1271 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1273 1272
1274 1273 # The 'source' parameter is useful for extensions
1275 1274 def generatefiles(
1276 1275 self,
1277 1276 changedfiles,
1278 1277 commonrevs,
1279 1278 source,
1280 1279 mfdicts,
1281 1280 fastpathlinkrev,
1282 1281 fnodes,
1283 1282 clrevs,
1284 1283 ):
1285 1284 changedfiles = [
1286 1285 f
1287 1286 for f in changedfiles
1288 1287 if self._matcher(f) and not self._oldmatcher(f)
1289 1288 ]
1290 1289
1291 1290 if not fastpathlinkrev:
1292 1291
1293 1292 def normallinknodes(unused, fname):
1294 1293 return fnodes.get(fname, {})
1295 1294
1296 1295 else:
1297 1296 cln = self._repo.changelog.node
1298 1297
1299 1298 def normallinknodes(store, fname):
1300 1299 flinkrev = store.linkrev
1301 1300 fnode = store.node
1302 1301 revs = ((r, flinkrev(r)) for r in store)
1303 1302 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1304 1303
1305 1304 clrevtolocalrev = {}
1306 1305
1307 1306 if self._isshallow:
1308 1307 # In a shallow clone, the linknodes callback needs to also include
1309 1308 # those file nodes that are in the manifests we sent but weren't
1310 1309 # introduced by those manifests.
1311 1310 commonctxs = [self._repo[c] for c in commonrevs]
1312 1311 clrev = self._repo.changelog.rev
1313 1312
1314 1313 def linknodes(flog, fname):
1315 1314 for c in commonctxs:
1316 1315 try:
1317 1316 fnode = c.filenode(fname)
1318 1317 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1319 1318 except error.ManifestLookupError:
1320 1319 pass
1321 1320 links = normallinknodes(flog, fname)
1322 1321 if len(links) != len(mfdicts):
1323 1322 for mf, lr in mfdicts:
1324 1323 fnode = mf.get(fname, None)
1325 1324 if fnode in links:
1326 1325 links[fnode] = min(links[fnode], lr, key=clrev)
1327 1326 elif fnode:
1328 1327 links[fnode] = lr
1329 1328 return links
1330 1329
1331 1330 else:
1332 1331 linknodes = normallinknodes
1333 1332
1334 1333 repo = self._repo
1335 1334 progress = repo.ui.makeprogress(
1336 1335 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1337 1336 )
1338 1337 for i, fname in enumerate(sorted(changedfiles)):
1339 1338 filerevlog = repo.file(fname)
1340 1339 if not filerevlog:
1341 1340 raise error.Abort(
1342 1341 _(b"empty or missing file data for %s") % fname
1343 1342 )
1344 1343
1345 1344 clrevtolocalrev.clear()
1346 1345
1347 1346 linkrevnodes = linknodes(filerevlog, fname)
1348 1347 # Lookup for filenodes, we collected the linkrev nodes above in the
1349 1348 # fastpath case and with lookupmf in the slowpath case.
1350 1349 def lookupfilelog(x):
1351 1350 return linkrevnodes[x]
1352 1351
1353 1352 frev, flr = filerevlog.rev, filerevlog.linkrev
1354 1353 # Skip sending any filenode we know the client already
1355 1354 # has. This avoids over-sending files relatively
1356 1355 # inexpensively, so it's not a problem if we under-filter
1357 1356 # here.
1358 1357 filenodes = [
1359 1358 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1360 1359 ]
1361 1360
1362 1361 if not filenodes:
1363 1362 continue
1364 1363
1365 1364 progress.update(i + 1, item=fname)
1366 1365
1367 1366 deltas = deltagroup(
1368 1367 self._repo,
1369 1368 filerevlog,
1370 1369 filenodes,
1371 1370 False,
1372 1371 lookupfilelog,
1373 1372 self._forcedeltaparentprev,
1374 1373 ellipses=self._ellipses,
1375 1374 clrevtolocalrev=clrevtolocalrev,
1376 1375 fullclnodes=self._fullclnodes,
1377 1376 precomputedellipsis=self._precomputedellipsis,
1378 1377 )
1379 1378
1380 1379 yield fname, deltas
1381 1380
1382 1381 progress.complete()
1383 1382
1384 1383
1385 1384 def _makecg1packer(
1386 1385 repo,
1387 1386 oldmatcher,
1388 1387 matcher,
1389 1388 bundlecaps,
1390 1389 ellipses=False,
1391 1390 shallow=False,
1392 1391 ellipsisroots=None,
1393 1392 fullnodes=None,
1394 1393 ):
1395 1394 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1396 1395 d.node, d.p1node, d.p2node, d.linknode
1397 1396 )
1398 1397
1399 1398 return cgpacker(
1400 1399 repo,
1401 1400 oldmatcher,
1402 1401 matcher,
1403 1402 b'01',
1404 1403 builddeltaheader=builddeltaheader,
1405 1404 manifestsend=b'',
1406 1405 forcedeltaparentprev=True,
1407 1406 bundlecaps=bundlecaps,
1408 1407 ellipses=ellipses,
1409 1408 shallow=shallow,
1410 1409 ellipsisroots=ellipsisroots,
1411 1410 fullnodes=fullnodes,
1412 1411 )
1413 1412
1414 1413
1415 1414 def _makecg2packer(
1416 1415 repo,
1417 1416 oldmatcher,
1418 1417 matcher,
1419 1418 bundlecaps,
1420 1419 ellipses=False,
1421 1420 shallow=False,
1422 1421 ellipsisroots=None,
1423 1422 fullnodes=None,
1424 1423 ):
1425 1424 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1426 1425 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1427 1426 )
1428 1427
1429 1428 return cgpacker(
1430 1429 repo,
1431 1430 oldmatcher,
1432 1431 matcher,
1433 1432 b'02',
1434 1433 builddeltaheader=builddeltaheader,
1435 1434 manifestsend=b'',
1436 1435 bundlecaps=bundlecaps,
1437 1436 ellipses=ellipses,
1438 1437 shallow=shallow,
1439 1438 ellipsisroots=ellipsisroots,
1440 1439 fullnodes=fullnodes,
1441 1440 )
1442 1441
1443 1442
1444 1443 def _makecg3packer(
1445 1444 repo,
1446 1445 oldmatcher,
1447 1446 matcher,
1448 1447 bundlecaps,
1449 1448 ellipses=False,
1450 1449 shallow=False,
1451 1450 ellipsisroots=None,
1452 1451 fullnodes=None,
1453 1452 ):
1454 1453 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1455 1454 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1456 1455 )
1457 1456
1458 1457 return cgpacker(
1459 1458 repo,
1460 1459 oldmatcher,
1461 1460 matcher,
1462 1461 b'03',
1463 1462 builddeltaheader=builddeltaheader,
1464 1463 manifestsend=closechunk(),
1465 1464 bundlecaps=bundlecaps,
1466 1465 ellipses=ellipses,
1467 1466 shallow=shallow,
1468 1467 ellipsisroots=ellipsisroots,
1469 1468 fullnodes=fullnodes,
1470 1469 )
1471 1470
1472 1471
1473 1472 _packermap = {
1474 1473 b'01': (_makecg1packer, cg1unpacker),
1475 1474 # cg2 adds support for exchanging generaldelta
1476 1475 b'02': (_makecg2packer, cg2unpacker),
1477 1476 # cg3 adds support for exchanging revlog flags and treemanifests
1478 1477 b'03': (_makecg3packer, cg3unpacker),
1479 1478 }
1480 1479
1481 1480
1482 1481 def allsupportedversions(repo):
1483 1482 versions = set(_packermap.keys())
1484 1483 needv03 = False
1485 1484 if (
1486 1485 repo.ui.configbool(b'experimental', b'changegroup3')
1487 1486 or repo.ui.configbool(b'experimental', b'treemanifest')
1488 1487 or scmutil.istreemanifest(repo)
1489 1488 ):
1490 1489 # we keep version 03 because we need to to exchange treemanifest data
1491 1490 #
1492 1491 # we also keep vresion 01 and 02, because it is possible for repo to
1493 1492 # contains both normal and tree manifest at the same time. so using
1494 1493 # older version to pull data is viable
1495 1494 #
1496 1495 # (or even to push subset of history)
1497 1496 needv03 = True
1498 1497 if b'exp-sidedata-flag' in repo.requirements:
1499 1498 needv03 = True
1500 1499 # don't attempt to use 01/02 until we do sidedata cleaning
1501 1500 versions.discard(b'01')
1502 1501 versions.discard(b'02')
1503 1502 if not needv03:
1504 1503 versions.discard(b'03')
1505 1504 return versions
1506 1505
1507 1506
1508 1507 # Changegroup versions that can be applied to the repo
1509 1508 def supportedincomingversions(repo):
1510 1509 return allsupportedversions(repo)
1511 1510
1512 1511
1513 1512 # Changegroup versions that can be created from the repo
1514 1513 def supportedoutgoingversions(repo):
1515 1514 versions = allsupportedversions(repo)
1516 1515 if scmutil.istreemanifest(repo):
1517 1516 # Versions 01 and 02 support only flat manifests and it's just too
1518 1517 # expensive to convert between the flat manifest and tree manifest on
1519 1518 # the fly. Since tree manifests are hashed differently, all of history
1520 1519 # would have to be converted. Instead, we simply don't even pretend to
1521 1520 # support versions 01 and 02.
1522 1521 versions.discard(b'01')
1523 1522 versions.discard(b'02')
1524 1523 if requirements.NARROW_REQUIREMENT in repo.requirements:
1525 1524 # Versions 01 and 02 don't support revlog flags, and we need to
1526 1525 # support that for stripping and unbundling to work.
1527 1526 versions.discard(b'01')
1528 1527 versions.discard(b'02')
1529 1528 if LFS_REQUIREMENT in repo.requirements:
1530 1529 # Versions 01 and 02 don't support revlog flags, and we need to
1531 1530 # mark LFS entries with REVIDX_EXTSTORED.
1532 1531 versions.discard(b'01')
1533 1532 versions.discard(b'02')
1534 1533
1535 1534 return versions
1536 1535
1537 1536
1538 1537 def localversion(repo):
1539 1538 # Finds the best version to use for bundles that are meant to be used
1540 1539 # locally, such as those from strip and shelve, and temporary bundles.
1541 1540 return max(supportedoutgoingversions(repo))
1542 1541
1543 1542
1544 1543 def safeversion(repo):
1545 1544 # Finds the smallest version that it's safe to assume clients of the repo
1546 1545 # will support. For example, all hg versions that support generaldelta also
1547 1546 # support changegroup 02.
1548 1547 versions = supportedoutgoingversions(repo)
1549 1548 if b'generaldelta' in repo.requirements:
1550 1549 versions.discard(b'01')
1551 1550 assert versions
1552 1551 return min(versions)
1553 1552
1554 1553
1555 1554 def getbundler(
1556 1555 version,
1557 1556 repo,
1558 1557 bundlecaps=None,
1559 1558 oldmatcher=None,
1560 1559 matcher=None,
1561 1560 ellipses=False,
1562 1561 shallow=False,
1563 1562 ellipsisroots=None,
1564 1563 fullnodes=None,
1565 1564 ):
1566 1565 assert version in supportedoutgoingversions(repo)
1567 1566
1568 1567 if matcher is None:
1569 1568 matcher = matchmod.always()
1570 1569 if oldmatcher is None:
1571 1570 oldmatcher = matchmod.never()
1572 1571
1573 1572 if version == b'01' and not matcher.always():
1574 1573 raise error.ProgrammingError(
1575 1574 b'version 01 changegroups do not support sparse file matchers'
1576 1575 )
1577 1576
1578 1577 if ellipses and version in (b'01', b'02'):
1579 1578 raise error.Abort(
1580 1579 _(
1581 1580 b'ellipsis nodes require at least cg3 on client and server, '
1582 1581 b'but negotiated version %s'
1583 1582 )
1584 1583 % version
1585 1584 )
1586 1585
1587 1586 # Requested files could include files not in the local store. So
1588 1587 # filter those out.
1589 1588 matcher = repo.narrowmatch(matcher)
1590 1589
1591 1590 fn = _packermap[version][0]
1592 1591 return fn(
1593 1592 repo,
1594 1593 oldmatcher,
1595 1594 matcher,
1596 1595 bundlecaps,
1597 1596 ellipses=ellipses,
1598 1597 shallow=shallow,
1599 1598 ellipsisroots=ellipsisroots,
1600 1599 fullnodes=fullnodes,
1601 1600 )
1602 1601
1603 1602
1604 1603 def getunbundler(version, fh, alg, extras=None):
1605 1604 return _packermap[version][1](fh, alg, extras=extras)
1606 1605
1607 1606
1608 1607 def _changegroupinfo(repo, nodes, source):
1609 1608 if repo.ui.verbose or source == b'bundle':
1610 1609 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1611 1610 if repo.ui.debugflag:
1612 1611 repo.ui.debug(b"list of changesets:\n")
1613 1612 for node in nodes:
1614 1613 repo.ui.debug(b"%s\n" % hex(node))
1615 1614
1616 1615
1617 1616 def makechangegroup(
1618 1617 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1619 1618 ):
1620 1619 cgstream = makestream(
1621 1620 repo,
1622 1621 outgoing,
1623 1622 version,
1624 1623 source,
1625 1624 fastpath=fastpath,
1626 1625 bundlecaps=bundlecaps,
1627 1626 )
1628 1627 return getunbundler(
1629 1628 version,
1630 1629 util.chunkbuffer(cgstream),
1631 1630 None,
1632 1631 {b'clcount': len(outgoing.missing)},
1633 1632 )
1634 1633
1635 1634
1636 1635 def makestream(
1637 1636 repo,
1638 1637 outgoing,
1639 1638 version,
1640 1639 source,
1641 1640 fastpath=False,
1642 1641 bundlecaps=None,
1643 1642 matcher=None,
1644 1643 ):
1645 1644 bundler = getbundler(version, repo, bundlecaps=bundlecaps, matcher=matcher)
1646 1645
1647 1646 repo = repo.unfiltered()
1648 1647 commonrevs = outgoing.common
1649 1648 csets = outgoing.missing
1650 1649 heads = outgoing.ancestorsof
1651 1650 # We go through the fast path if we get told to, or if all (unfiltered
1652 1651 # heads have been requested (since we then know there all linkrevs will
1653 1652 # be pulled by the client).
1654 1653 heads.sort()
1655 1654 fastpathlinkrev = fastpath or (
1656 1655 repo.filtername is None and heads == sorted(repo.heads())
1657 1656 )
1658 1657
1659 1658 repo.hook(b'preoutgoing', throw=True, source=source)
1660 1659 _changegroupinfo(repo, csets, source)
1661 1660 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1662 1661
1663 1662
1664 1663 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1665 1664 revisions = 0
1666 1665 files = 0
1667 1666 progress = repo.ui.makeprogress(
1668 1667 _(b'files'), unit=_(b'files'), total=expectedfiles
1669 1668 )
1670 1669 for chunkdata in iter(source.filelogheader, {}):
1671 1670 files += 1
1672 1671 f = chunkdata[b"filename"]
1673 1672 repo.ui.debug(b"adding %s revisions\n" % f)
1674 1673 progress.increment()
1675 1674 fl = repo.file(f)
1676 1675 o = len(fl)
1677 1676 try:
1678 1677 deltas = source.deltaiter()
1679 1678 if not fl.addgroup(deltas, revmap, trp):
1680 1679 raise error.Abort(_(b"received file revlog group is empty"))
1681 1680 except error.CensoredBaseError as e:
1682 1681 raise error.Abort(_(b"received delta base is censored: %s") % e)
1683 1682 revisions += len(fl) - o
1684 1683 if f in needfiles:
1685 1684 needs = needfiles[f]
1686 1685 for new in pycompat.xrange(o, len(fl)):
1687 1686 n = fl.node(new)
1688 1687 if n in needs:
1689 1688 needs.remove(n)
1690 1689 else:
1691 1690 raise error.Abort(_(b"received spurious file revlog entry"))
1692 1691 if not needs:
1693 1692 del needfiles[f]
1694 1693 progress.complete()
1695 1694
1696 1695 for f, needs in pycompat.iteritems(needfiles):
1697 1696 fl = repo.file(f)
1698 1697 for n in needs:
1699 1698 try:
1700 1699 fl.rev(n)
1701 1700 except error.LookupError:
1702 1701 raise error.Abort(
1703 1702 _(b'missing file data for %s:%s - run hg verify')
1704 1703 % (f, hex(n))
1705 1704 )
1706 1705
1707 1706 return revisions, files
@@ -1,618 +1,618 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29 from .revlogutils import flagutil
30 30
31 31 _defaultextra = {b'branch': b'default'}
32 32
33 33
34 34 def _string_escape(text):
35 35 """
36 36 >>> from .pycompat import bytechr as chr
37 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 39 >>> s
40 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 41 >>> res = _string_escape(s)
42 42 >>> s == _string_unescape(res)
43 43 True
44 44 """
45 45 # subset of the string_escape codec
46 46 text = (
47 47 text.replace(b'\\', b'\\\\')
48 48 .replace(b'\n', b'\\n')
49 49 .replace(b'\r', b'\\r')
50 50 )
51 51 return text.replace(b'\0', b'\\0')
52 52
53 53
54 54 def _string_unescape(text):
55 55 if b'\\0' in text:
56 56 # fix up \0 without getting into trouble with \\0
57 57 text = text.replace(b'\\\\', b'\\\\\n')
58 58 text = text.replace(b'\\0', b'\0')
59 59 text = text.replace(b'\n', b'')
60 60 return stringutil.unescapestr(text)
61 61
62 62
63 63 def decodeextra(text):
64 64 """
65 65 >>> from .pycompat import bytechr as chr
66 66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 67 ... ).items())
68 68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 70 ... b'baz': chr(92) + chr(0) + b'2'})
71 71 ... ).items())
72 72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 73 """
74 74 extra = _defaultextra.copy()
75 75 for l in text.split(b'\0'):
76 76 if l:
77 77 k, v = _string_unescape(l).split(b':', 1)
78 78 extra[k] = v
79 79 return extra
80 80
81 81
82 82 def encodeextra(d):
83 83 # keys must be sorted to produce a deterministic changelog entry
84 84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 85 return b"\0".join(items)
86 86
87 87
88 88 def stripdesc(desc):
89 89 """strip trailing whitespace and leading and trailing empty lines"""
90 90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 91
92 92
93 93 class appender(object):
94 94 """the changelog index must be updated last on disk, so we use this class
95 95 to delay writes to it"""
96 96
97 97 def __init__(self, vfs, name, mode, buf):
98 98 self.data = buf
99 99 fp = vfs(name, mode)
100 100 self.fp = fp
101 101 self.offset = fp.tell()
102 102 self.size = vfs.fstat(fp).st_size
103 103 self._end = self.size
104 104
105 105 def end(self):
106 106 return self._end
107 107
108 108 def tell(self):
109 109 return self.offset
110 110
111 111 def flush(self):
112 112 pass
113 113
114 114 @property
115 115 def closed(self):
116 116 return self.fp.closed
117 117
118 118 def close(self):
119 119 self.fp.close()
120 120
121 121 def seek(self, offset, whence=0):
122 122 '''virtual file offset spans real file and data'''
123 123 if whence == 0:
124 124 self.offset = offset
125 125 elif whence == 1:
126 126 self.offset += offset
127 127 elif whence == 2:
128 128 self.offset = self.end() + offset
129 129 if self.offset < self.size:
130 130 self.fp.seek(self.offset)
131 131
132 132 def read(self, count=-1):
133 133 '''only trick here is reads that span real file and data'''
134 134 ret = b""
135 135 if self.offset < self.size:
136 136 s = self.fp.read(count)
137 137 ret = s
138 138 self.offset += len(s)
139 139 if count > 0:
140 140 count -= len(s)
141 141 if count != 0:
142 142 doff = self.offset - self.size
143 143 self.data.insert(0, b"".join(self.data))
144 144 del self.data[1:]
145 145 s = self.data[0][doff : doff + count]
146 146 self.offset += len(s)
147 147 ret += s
148 148 return ret
149 149
150 150 def write(self, s):
151 151 self.data.append(bytes(s))
152 152 self.offset += len(s)
153 153 self._end += len(s)
154 154
155 155 def __enter__(self):
156 156 self.fp.__enter__()
157 157 return self
158 158
159 159 def __exit__(self, *args):
160 160 return self.fp.__exit__(*args)
161 161
162 162
163 163 class _divertopener(object):
164 164 def __init__(self, opener, target):
165 165 self._opener = opener
166 166 self._target = target
167 167
168 168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 169 if name != self._target:
170 170 return self._opener(name, mode, **kwargs)
171 171 return self._opener(name + b".a", mode, **kwargs)
172 172
173 173 def __getattr__(self, attr):
174 174 return getattr(self._opener, attr)
175 175
176 176
177 177 def _delayopener(opener, target, buf):
178 178 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 179
180 180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 181 if name != target:
182 182 return opener(name, mode, **kwargs)
183 183 assert not kwargs
184 184 return appender(opener, name, mode, buf)
185 185
186 186 return _delay
187 187
188 188
189 189 @attr.s
190 190 class _changelogrevision(object):
191 191 # Extensions might modify _defaultextra, so let the constructor below pass
192 192 # it in
193 193 extra = attr.ib()
194 194 manifest = attr.ib(default=nullid)
195 195 user = attr.ib(default=b'')
196 196 date = attr.ib(default=(0, 0))
197 197 files = attr.ib(default=attr.Factory(list))
198 198 filesadded = attr.ib(default=None)
199 199 filesremoved = attr.ib(default=None)
200 200 p1copies = attr.ib(default=None)
201 201 p2copies = attr.ib(default=None)
202 202 description = attr.ib(default=b'')
203 203 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 '_changes',
220 220 )
221 221
222 222 def __new__(cls, text, sidedata, cpsd):
223 223 if not text:
224 224 return _changelogrevision(extra=_defaultextra)
225 225
226 226 self = super(changelogrevision, cls).__new__(cls)
227 227 # We could return here and implement the following as an __init__.
228 228 # But doing it here is equivalent and saves an extra function call.
229 229
230 230 # format used:
231 231 # nodeid\n : manifest node in ascii
232 232 # user\n : user, no \n or \r allowed
233 233 # time tz extra\n : date (time is int or float, timezone is int)
234 234 # : extra is metadata, encoded and separated by '\0'
235 235 # : older versions ignore it
236 236 # files\n\n : files modified by the cset, no \n or \r allowed
237 237 # (.*) : comment (free text, ideally utf-8)
238 238 #
239 239 # changelog v0 doesn't use extra
240 240
241 241 nl1 = text.index(b'\n')
242 242 nl2 = text.index(b'\n', nl1 + 1)
243 243 nl3 = text.index(b'\n', nl2 + 1)
244 244
245 245 # The list of files may be empty. Which means nl3 is the first of the
246 246 # double newline that precedes the description.
247 247 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 248 doublenl = nl3
249 249 else:
250 250 doublenl = text.index(b'\n\n', nl3 + 1)
251 251
252 252 self._offsets = (nl1, nl2, nl3, doublenl)
253 253 self._text = text
254 254 self._sidedata = sidedata
255 255 self._cpsd = cpsd
256 256 self._changes = None
257 257
258 258 return self
259 259
260 260 @property
261 261 def manifest(self):
262 262 return bin(self._text[0 : self._offsets[0]])
263 263
264 264 @property
265 265 def user(self):
266 266 off = self._offsets
267 267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268 268
269 269 @property
270 270 def _rawdate(self):
271 271 off = self._offsets
272 272 dateextra = self._text[off[1] + 1 : off[2]]
273 273 return dateextra.split(b' ', 2)[0:2]
274 274
275 275 @property
276 276 def _rawextra(self):
277 277 off = self._offsets
278 278 dateextra = self._text[off[1] + 1 : off[2]]
279 279 fields = dateextra.split(b' ', 2)
280 280 if len(fields) != 3:
281 281 return None
282 282
283 283 return fields[2]
284 284
285 285 @property
286 286 def date(self):
287 287 raw = self._rawdate
288 288 time = float(raw[0])
289 289 # Various tools did silly things with the timezone.
290 290 try:
291 291 timezone = int(raw[1])
292 292 except ValueError:
293 293 timezone = 0
294 294
295 295 return time, timezone
296 296
297 297 @property
298 298 def extra(self):
299 299 raw = self._rawextra
300 300 if raw is None:
301 301 return _defaultextra
302 302
303 303 return decodeextra(raw)
304 304
305 305 @property
306 306 def changes(self):
307 307 if self._changes is not None:
308 308 return self._changes
309 309 if self._cpsd:
310 310 changes = metadata.decode_files_sidedata(self._sidedata)
311 311 else:
312 312 changes = metadata.ChangingFiles(
313 313 touched=self.files or (),
314 314 added=self.filesadded or (),
315 315 removed=self.filesremoved or (),
316 316 p1_copies=self.p1copies or {},
317 317 p2_copies=self.p2copies or {},
318 318 )
319 319 self._changes = changes
320 320 return changes
321 321
322 322 @property
323 323 def files(self):
324 324 if self._cpsd:
325 325 return sorted(self.changes.touched)
326 326 off = self._offsets
327 327 if off[2] == off[3]:
328 328 return []
329 329
330 330 return self._text[off[2] + 1 : off[3]].split(b'\n')
331 331
332 332 @property
333 333 def filesadded(self):
334 334 if self._cpsd:
335 335 return self.changes.added
336 336 else:
337 337 rawindices = self.extra.get(b'filesadded')
338 338 if rawindices is None:
339 339 return None
340 340 return metadata.decodefileindices(self.files, rawindices)
341 341
342 342 @property
343 343 def filesremoved(self):
344 344 if self._cpsd:
345 345 return self.changes.removed
346 346 else:
347 347 rawindices = self.extra.get(b'filesremoved')
348 348 if rawindices is None:
349 349 return None
350 350 return metadata.decodefileindices(self.files, rawindices)
351 351
352 352 @property
353 353 def p1copies(self):
354 354 if self._cpsd:
355 355 return self.changes.copied_from_p1
356 356 else:
357 357 rawcopies = self.extra.get(b'p1copies')
358 358 if rawcopies is None:
359 359 return None
360 360 return metadata.decodecopies(self.files, rawcopies)
361 361
362 362 @property
363 363 def p2copies(self):
364 364 if self._cpsd:
365 365 return self.changes.copied_from_p2
366 366 else:
367 367 rawcopies = self.extra.get(b'p2copies')
368 368 if rawcopies is None:
369 369 return None
370 370 return metadata.decodecopies(self.files, rawcopies)
371 371
372 372 @property
373 373 def description(self):
374 374 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
375 375
376 376 @property
377 377 def branchinfo(self):
378 378 extra = self.extra
379 379 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
380 380
381 381
382 382 class changelog(revlog.revlog):
383 383 def __init__(self, opener, trypending=False):
384 384 """Load a changelog revlog using an opener.
385 385
386 386 If ``trypending`` is true, we attempt to load the index from a
387 387 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
388 388 The ``00changelog.i.a`` file contains index (and possibly inline
389 389 revision) data for a transaction that hasn't been finalized yet.
390 390 It exists in a separate file to facilitate readers (such as
391 391 hooks processes) accessing data before a transaction is finalized.
392 392 """
393 393 if trypending and opener.exists(b'00changelog.i.a'):
394 394 indexfile = b'00changelog.i.a'
395 395 else:
396 396 indexfile = b'00changelog.i'
397 397
398 398 datafile = b'00changelog.d'
399 399 revlog.revlog.__init__(
400 400 self,
401 401 opener,
402 402 indexfile,
403 403 datafile=datafile,
404 404 checkambig=True,
405 405 mmaplargeindex=True,
406 406 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
407 407 )
408 408
409 409 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
410 410 # changelogs don't benefit from generaldelta.
411 411
412 412 self.version &= ~revlog.FLAG_GENERALDELTA
413 413 self._generaldelta = False
414 414
415 415 # Delta chains for changelogs tend to be very small because entries
416 416 # tend to be small and don't delta well with each. So disable delta
417 417 # chains.
418 418 self._storedeltachains = False
419 419
420 420 self._realopener = opener
421 421 self._delayed = False
422 422 self._delaybuf = None
423 423 self._divert = False
424 424 self._filteredrevs = frozenset()
425 425 self._filteredrevs_hashcache = {}
426 426 self._copiesstorage = opener.options.get(b'copies-storage')
427 427
428 428 @property
429 429 def filteredrevs(self):
430 430 return self._filteredrevs
431 431
432 432 @filteredrevs.setter
433 433 def filteredrevs(self, val):
434 434 # Ensure all updates go through this function
435 435 assert isinstance(val, frozenset)
436 436 self._filteredrevs = val
437 437 self._filteredrevs_hashcache = {}
438 438
439 439 def delayupdate(self, tr):
440 440 """delay visibility of index updates to other readers"""
441 441
442 442 if not self._delayed:
443 443 if len(self) == 0:
444 444 self._divert = True
445 445 if self._realopener.exists(self.indexfile + b'.a'):
446 446 self._realopener.unlink(self.indexfile + b'.a')
447 447 self.opener = _divertopener(self._realopener, self.indexfile)
448 448 else:
449 449 self._delaybuf = []
450 450 self.opener = _delayopener(
451 451 self._realopener, self.indexfile, self._delaybuf
452 452 )
453 453 self._delayed = True
454 454 tr.addpending(b'cl-%i' % id(self), self._writepending)
455 455 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
456 456
457 457 def _finalize(self, tr):
458 458 """finalize index updates"""
459 459 self._delayed = False
460 460 self.opener = self._realopener
461 461 # move redirected index data back into place
462 462 if self._divert:
463 463 assert not self._delaybuf
464 464 tmpname = self.indexfile + b".a"
465 465 nfile = self.opener.open(tmpname)
466 466 nfile.close()
467 467 self.opener.rename(tmpname, self.indexfile, checkambig=True)
468 468 elif self._delaybuf:
469 469 fp = self.opener(self.indexfile, b'a', checkambig=True)
470 470 fp.write(b"".join(self._delaybuf))
471 471 fp.close()
472 472 self._delaybuf = None
473 473 self._divert = False
474 474 # split when we're done
475 475 self._enforceinlinesize(tr)
476 476
477 477 def _writepending(self, tr):
478 478 """create a file containing the unfinalized state for
479 479 pretxnchangegroup"""
480 480 if self._delaybuf:
481 481 # make a temporary copy of the index
482 482 fp1 = self._realopener(self.indexfile)
483 483 pendingfilename = self.indexfile + b".a"
484 484 # register as a temp file to ensure cleanup on failure
485 485 tr.registertmp(pendingfilename)
486 486 # write existing data
487 487 fp2 = self._realopener(pendingfilename, b"w")
488 488 fp2.write(fp1.read())
489 489 # add pending data
490 490 fp2.write(b"".join(self._delaybuf))
491 491 fp2.close()
492 492 # switch modes so finalize can simply rename
493 493 self._delaybuf = None
494 494 self._divert = True
495 495 self.opener = _divertopener(self._realopener, self.indexfile)
496 496
497 497 if self._divert:
498 498 return True
499 499
500 500 return False
501 501
502 502 def _enforceinlinesize(self, tr, fp=None):
503 503 if not self._delayed:
504 504 revlog.revlog._enforceinlinesize(self, tr, fp)
505 505
506 506 def read(self, node):
507 507 """Obtain data from a parsed changelog revision.
508 508
509 509 Returns a 6-tuple of:
510 510
511 511 - manifest node in binary
512 512 - author/user as a localstr
513 513 - date as a 2-tuple of (time, timezone)
514 514 - list of files
515 515 - commit message as a localstr
516 516 - dict of extra metadata
517 517
518 518 Unless you need to access all fields, consider calling
519 519 ``changelogrevision`` instead, as it is faster for partial object
520 520 access.
521 521 """
522 522 d, s = self._revisiondata(node)
523 523 c = changelogrevision(
524 524 d, s, self._copiesstorage == b'changeset-sidedata'
525 525 )
526 526 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
527 527
528 528 def changelogrevision(self, nodeorrev):
529 529 """Obtain a ``changelogrevision`` for a node or revision."""
530 530 text, sidedata = self._revisiondata(nodeorrev)
531 531 return changelogrevision(
532 532 text, sidedata, self._copiesstorage == b'changeset-sidedata'
533 533 )
534 534
535 535 def readfiles(self, node):
536 536 """
537 537 short version of read that only returns the files modified by the cset
538 538 """
539 539 text = self.revision(node)
540 540 if not text:
541 541 return []
542 542 last = text.index(b"\n\n")
543 543 l = text[:last].split(b'\n')
544 544 return l[3:]
545 545
546 546 def add(
547 547 self,
548 548 manifest,
549 549 files,
550 550 desc,
551 551 transaction,
552 552 p1,
553 553 p2,
554 554 user,
555 555 date=None,
556 556 extra=None,
557 557 ):
558 558 # Convert to UTF-8 encoded bytestrings as the very first
559 559 # thing: calling any method on a localstr object will turn it
560 560 # into a str object and the cached UTF-8 string is thus lost.
561 561 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
562 562
563 563 user = user.strip()
564 564 # An empty username or a username with a "\n" will make the
565 565 # revision text contain two "\n\n" sequences -> corrupt
566 566 # repository since read cannot unpack the revision.
567 567 if not user:
568 568 raise error.StorageError(_(b"empty username"))
569 569 if b"\n" in user:
570 570 raise error.StorageError(
571 571 _(b"username %r contains a newline") % pycompat.bytestr(user)
572 572 )
573 573
574 574 desc = stripdesc(desc)
575 575
576 576 if date:
577 577 parseddate = b"%d %d" % dateutil.parsedate(date)
578 578 else:
579 579 parseddate = b"%d %d" % dateutil.makedate()
580 580 if extra:
581 581 branch = extra.get(b"branch")
582 582 if branch in (b"default", b""):
583 583 del extra[b"branch"]
584 584 elif branch in (b".", b"null", b"tip"):
585 585 raise error.StorageError(
586 586 _(b'the name \'%s\' is reserved') % branch
587 587 )
588 588 sortedfiles = sorted(files.touched)
589 589 flags = 0
590 590 sidedata = None
591 591 if self._copiesstorage == b'changeset-sidedata':
592 592 if files.has_copies_info:
593 593 flags |= flagutil.REVIDX_HASCOPIESINFO
594 594 sidedata = metadata.encode_files_sidedata(files)
595 595
596 596 if extra:
597 597 extra = encodeextra(extra)
598 598 parseddate = b"%s %s" % (parseddate, extra)
599 599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
600 600 text = b"\n".join(l)
601 601 rev = self.addrevision(
602 602 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
603 603 )
604 604 return self.node(rev)
605 605
606 606 def branchinfo(self, rev):
607 607 """return the branch name and open/close state of a revision
608 608
609 609 This function exists because creating a changectx object
610 610 just to access this is costly."""
611 611 return self.changelogrevision(rev).branchinfo
612 612
613 def _nodeduplicatecallback(self, transaction, node):
613 def _nodeduplicatecallback(self, transaction, rev):
614 614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
615 615 #
616 616 # We track them in a list to preserve their order from the source bundle
617 617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
618 duplicates.append(self.rev(node))
618 duplicates.append(rev)
@@ -1,799 +1,798 b''
1 1 # exchangev2.py - repository exchange for wire protocol version 2
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import collections
11 11 import weakref
12 12
13 13 from .i18n import _
14 14 from .node import (
15 15 nullid,
16 16 short,
17 17 )
18 18 from . import (
19 19 bookmarks,
20 20 error,
21 21 mdiff,
22 22 narrowspec,
23 23 phases,
24 24 pycompat,
25 25 setdiscovery,
26 26 )
27 27 from .interfaces import repository
28 28
29 29
30 30 def pull(pullop):
31 31 """Pull using wire protocol version 2."""
32 32 repo = pullop.repo
33 33 remote = pullop.remote
34 34
35 35 usingrawchangelogandmanifest = _checkuserawstorefiledata(pullop)
36 36
37 37 # If this is a clone and it was requested to perform a "stream clone",
38 38 # we obtain the raw files data from the remote then fall back to an
39 39 # incremental pull. This is somewhat hacky and is not nearly robust enough
40 40 # for long-term usage.
41 41 if usingrawchangelogandmanifest:
42 42 with repo.transaction(b'clone'):
43 43 _fetchrawstorefiles(repo, remote)
44 44 repo.invalidate(clearfilecache=True)
45 45
46 46 tr = pullop.trmanager.transaction()
47 47
48 48 # We don't use the repo's narrow matcher here because the patterns passed
49 49 # to exchange.pull() could be different.
50 50 narrowmatcher = narrowspec.match(
51 51 repo.root,
52 52 # Empty maps to nevermatcher. So always
53 53 # set includes if missing.
54 54 pullop.includepats or {b'path:.'},
55 55 pullop.excludepats,
56 56 )
57 57
58 58 if pullop.includepats or pullop.excludepats:
59 59 pathfilter = {}
60 60 if pullop.includepats:
61 61 pathfilter[b'include'] = sorted(pullop.includepats)
62 62 if pullop.excludepats:
63 63 pathfilter[b'exclude'] = sorted(pullop.excludepats)
64 64 else:
65 65 pathfilter = None
66 66
67 67 # Figure out what needs to be fetched.
68 68 common, fetch, remoteheads = _pullchangesetdiscovery(
69 69 repo, remote, pullop.heads, abortwhenunrelated=pullop.force
70 70 )
71 71
72 72 # And fetch the data.
73 73 pullheads = pullop.heads or remoteheads
74 74 csetres = _fetchchangesets(repo, tr, remote, common, fetch, pullheads)
75 75
76 76 # New revisions are written to the changelog. But all other updates
77 77 # are deferred. Do those now.
78 78
79 79 # Ensure all new changesets are draft by default. If the repo is
80 80 # publishing, the phase will be adjusted by the loop below.
81 81 if csetres[b'added']:
82 82 phases.registernew(
83 83 repo, tr, phases.draft, [repo[n].rev() for n in csetres[b'added']]
84 84 )
85 85
86 86 # And adjust the phase of all changesets accordingly.
87 87 for phasenumber, phase in phases.phasenames.items():
88 88 if phase == b'secret' or not csetres[b'nodesbyphase'][phase]:
89 89 continue
90 90
91 91 phases.advanceboundary(
92 92 repo,
93 93 tr,
94 94 phasenumber,
95 95 csetres[b'nodesbyphase'][phase],
96 96 )
97 97
98 98 # Write bookmark updates.
99 99 bookmarks.updatefromremote(
100 100 repo.ui,
101 101 repo,
102 102 csetres[b'bookmarks'],
103 103 remote.url(),
104 104 pullop.gettransaction,
105 105 explicit=pullop.explicitbookmarks,
106 106 )
107 107
108 108 manres = _fetchmanifests(repo, tr, remote, csetres[b'manifestnodes'])
109 109
110 110 # We don't properly support shallow changeset and manifest yet. So we apply
111 111 # depth limiting locally.
112 112 if pullop.depth:
113 113 relevantcsetnodes = set()
114 114 clnode = repo.changelog.node
115 115
116 116 for rev in repo.revs(
117 117 b'ancestors(%ln, %s)', pullheads, pullop.depth - 1
118 118 ):
119 119 relevantcsetnodes.add(clnode(rev))
120 120
121 121 csetrelevantfilter = lambda n: n in relevantcsetnodes
122 122
123 123 else:
124 124 csetrelevantfilter = lambda n: True
125 125
126 126 # If obtaining the raw store files, we need to scan the full repo to
127 127 # derive all the changesets, manifests, and linkrevs.
128 128 if usingrawchangelogandmanifest:
129 129 csetsforfiles = []
130 130 mnodesforfiles = []
131 131 manifestlinkrevs = {}
132 132
133 133 for rev in repo:
134 134 ctx = repo[rev]
135 135 node = ctx.node()
136 136
137 137 if not csetrelevantfilter(node):
138 138 continue
139 139
140 140 mnode = ctx.manifestnode()
141 141
142 142 csetsforfiles.append(node)
143 143 mnodesforfiles.append(mnode)
144 144 manifestlinkrevs[mnode] = rev
145 145
146 146 else:
147 147 csetsforfiles = [n for n in csetres[b'added'] if csetrelevantfilter(n)]
148 148 mnodesforfiles = manres[b'added']
149 149 manifestlinkrevs = manres[b'linkrevs']
150 150
151 151 # Find all file nodes referenced by added manifests and fetch those
152 152 # revisions.
153 153 fnodes = _derivefilesfrommanifests(repo, narrowmatcher, mnodesforfiles)
154 154 _fetchfilesfromcsets(
155 155 repo,
156 156 tr,
157 157 remote,
158 158 pathfilter,
159 159 fnodes,
160 160 csetsforfiles,
161 161 manifestlinkrevs,
162 162 shallow=bool(pullop.depth),
163 163 )
164 164
165 165
166 166 def _checkuserawstorefiledata(pullop):
167 167 """Check whether we should use rawstorefiledata command to retrieve data."""
168 168
169 169 repo = pullop.repo
170 170 remote = pullop.remote
171 171
172 172 # Command to obtain raw store data isn't available.
173 173 if b'rawstorefiledata' not in remote.apidescriptor[b'commands']:
174 174 return False
175 175
176 176 # Only honor if user requested stream clone operation.
177 177 if not pullop.streamclonerequested:
178 178 return False
179 179
180 180 # Only works on empty repos.
181 181 if len(repo):
182 182 return False
183 183
184 184 # TODO This is super hacky. There needs to be a storage API for this. We
185 185 # also need to check for compatibility with the remote.
186 186 if b'revlogv1' not in repo.requirements:
187 187 return False
188 188
189 189 return True
190 190
191 191
192 192 def _fetchrawstorefiles(repo, remote):
193 193 with remote.commandexecutor() as e:
194 194 objs = e.callcommand(
195 195 b'rawstorefiledata',
196 196 {
197 197 b'files': [b'changelog', b'manifestlog'],
198 198 },
199 199 ).result()
200 200
201 201 # First object is a summary of files data that follows.
202 202 overall = next(objs)
203 203
204 204 progress = repo.ui.makeprogress(
205 205 _(b'clone'), total=overall[b'totalsize'], unit=_(b'bytes')
206 206 )
207 207 with progress:
208 208 progress.update(0)
209 209
210 210 # Next are pairs of file metadata, data.
211 211 while True:
212 212 try:
213 213 filemeta = next(objs)
214 214 except StopIteration:
215 215 break
216 216
217 217 for k in (b'location', b'path', b'size'):
218 218 if k not in filemeta:
219 219 raise error.Abort(
220 220 _(b'remote file data missing key: %s') % k
221 221 )
222 222
223 223 if filemeta[b'location'] == b'store':
224 224 vfs = repo.svfs
225 225 else:
226 226 raise error.Abort(
227 227 _(b'invalid location for raw file data: %s')
228 228 % filemeta[b'location']
229 229 )
230 230
231 231 bytesremaining = filemeta[b'size']
232 232
233 233 with vfs.open(filemeta[b'path'], b'wb') as fh:
234 234 while True:
235 235 try:
236 236 chunk = next(objs)
237 237 except StopIteration:
238 238 break
239 239
240 240 bytesremaining -= len(chunk)
241 241
242 242 if bytesremaining < 0:
243 243 raise error.Abort(
244 244 _(
245 245 b'received invalid number of bytes for file '
246 246 b'data; expected %d, got extra'
247 247 )
248 248 % filemeta[b'size']
249 249 )
250 250
251 251 progress.increment(step=len(chunk))
252 252 fh.write(chunk)
253 253
254 254 try:
255 255 if chunk.islast:
256 256 break
257 257 except AttributeError:
258 258 raise error.Abort(
259 259 _(
260 260 b'did not receive indefinite length bytestring '
261 261 b'for file data'
262 262 )
263 263 )
264 264
265 265 if bytesremaining:
266 266 raise error.Abort(
267 267 _(
268 268 b'received invalid number of bytes for'
269 269 b'file data; expected %d got %d'
270 270 )
271 271 % (
272 272 filemeta[b'size'],
273 273 filemeta[b'size'] - bytesremaining,
274 274 )
275 275 )
276 276
277 277
278 278 def _pullchangesetdiscovery(repo, remote, heads, abortwhenunrelated=True):
279 279 """Determine which changesets need to be pulled."""
280 280
281 281 if heads:
282 282 knownnode = repo.changelog.hasnode
283 283 if all(knownnode(head) for head in heads):
284 284 return heads, False, heads
285 285
286 286 # TODO wire protocol version 2 is capable of more efficient discovery
287 287 # than setdiscovery. Consider implementing something better.
288 288 common, fetch, remoteheads = setdiscovery.findcommonheads(
289 289 repo.ui, repo, remote, abortwhenunrelated=abortwhenunrelated
290 290 )
291 291
292 292 common = set(common)
293 293 remoteheads = set(remoteheads)
294 294
295 295 # If a remote head is filtered locally, put it back in the common set.
296 296 # See the comment in exchange._pulldiscoverychangegroup() for more.
297 297
298 298 if fetch and remoteheads:
299 299 has_node = repo.unfiltered().changelog.index.has_node
300 300
301 301 common |= {head for head in remoteheads if has_node(head)}
302 302
303 303 if set(remoteheads).issubset(common):
304 304 fetch = []
305 305
306 306 common.discard(nullid)
307 307
308 308 return common, fetch, remoteheads
309 309
310 310
311 311 def _fetchchangesets(repo, tr, remote, common, fetch, remoteheads):
312 312 # TODO consider adding a step here where we obtain the DAG shape first
313 313 # (or ask the server to slice changesets into chunks for us) so that
314 314 # we can perform multiple fetches in batches. This will facilitate
315 315 # resuming interrupted clones, higher server-side cache hit rates due
316 316 # to smaller segments, etc.
317 317 with remote.commandexecutor() as e:
318 318 objs = e.callcommand(
319 319 b'changesetdata',
320 320 {
321 321 b'revisions': [
322 322 {
323 323 b'type': b'changesetdagrange',
324 324 b'roots': sorted(common),
325 325 b'heads': sorted(remoteheads),
326 326 }
327 327 ],
328 328 b'fields': {b'bookmarks', b'parents', b'phase', b'revision'},
329 329 },
330 330 ).result()
331 331
332 332 # The context manager waits on all response data when exiting. So
333 333 # we need to remain in the context manager in order to stream data.
334 334 return _processchangesetdata(repo, tr, objs)
335 335
336 336
337 337 def _processchangesetdata(repo, tr, objs):
338 338 repo.hook(b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs))
339 339
340 340 urepo = repo.unfiltered()
341 341 cl = urepo.changelog
342 342
343 343 cl.delayupdate(tr)
344 344
345 345 # The first emitted object is a header describing the data that
346 346 # follows.
347 347 meta = next(objs)
348 348
349 349 progress = repo.ui.makeprogress(
350 350 _(b'changesets'), unit=_(b'chunks'), total=meta.get(b'totalitems')
351 351 )
352 352
353 353 manifestnodes = {}
354 354 added = []
355 355
356 356 def linkrev(node):
357 357 repo.ui.debug(b'add changeset %s\n' % short(node))
358 358 # Linkrev for changelog is always self.
359 359 return len(cl)
360 360
361 def ondupchangeset(cl, node):
362 added.append(node)
361 def ondupchangeset(cl, rev):
362 added.append(cl.node(rev))
363 363
364 def onchangeset(cl, node):
364 def onchangeset(cl, rev):
365 365 progress.increment()
366 366
367 rev = cl.rev(node)
368 367 revision = cl.changelogrevision(rev)
369 added.append(node)
368 added.append(cl.node(rev))
370 369
371 370 # We need to preserve the mapping of changelog revision to node
372 371 # so we can set the linkrev accordingly when manifests are added.
373 372 manifestnodes[rev] = revision.manifest
374 373
375 374 repo.register_changeset(rev, revision)
376 375
377 376 nodesbyphase = {phase: set() for phase in phases.phasenames.values()}
378 377 remotebookmarks = {}
379 378
380 379 # addgroup() expects a 7-tuple describing revisions. This normalizes
381 380 # the wire data to that format.
382 381 #
383 382 # This loop also aggregates non-revision metadata, such as phase
384 383 # data.
385 384 def iterrevisions():
386 385 for cset in objs:
387 386 node = cset[b'node']
388 387
389 388 if b'phase' in cset:
390 389 nodesbyphase[cset[b'phase']].add(node)
391 390
392 391 for mark in cset.get(b'bookmarks', []):
393 392 remotebookmarks[mark] = node
394 393
395 394 # TODO add mechanism for extensions to examine records so they
396 395 # can siphon off custom data fields.
397 396
398 397 extrafields = {}
399 398
400 399 for field, size in cset.get(b'fieldsfollowing', []):
401 400 extrafields[field] = next(objs)
402 401
403 402 # Some entries might only be metadata only updates.
404 403 if b'revision' not in extrafields:
405 404 continue
406 405
407 406 data = extrafields[b'revision']
408 407
409 408 yield (
410 409 node,
411 410 cset[b'parents'][0],
412 411 cset[b'parents'][1],
413 412 # Linknode is always itself for changesets.
414 413 cset[b'node'],
415 414 # We always send full revisions. So delta base is not set.
416 415 nullid,
417 416 mdiff.trivialdiffheader(len(data)) + data,
418 417 # Flags not yet supported.
419 418 0,
420 419 )
421 420
422 421 cl.addgroup(
423 422 iterrevisions(),
424 423 linkrev,
425 424 weakref.proxy(tr),
426 425 alwayscache=True,
427 426 addrevisioncb=onchangeset,
428 427 duplicaterevisioncb=ondupchangeset,
429 428 )
430 429
431 430 progress.complete()
432 431
433 432 return {
434 433 b'added': added,
435 434 b'nodesbyphase': nodesbyphase,
436 435 b'bookmarks': remotebookmarks,
437 436 b'manifestnodes': manifestnodes,
438 437 }
439 438
440 439
441 440 def _fetchmanifests(repo, tr, remote, manifestnodes):
442 441 rootmanifest = repo.manifestlog.getstorage(b'')
443 442
444 443 # Some manifests can be shared between changesets. Filter out revisions
445 444 # we already know about.
446 445 fetchnodes = []
447 446 linkrevs = {}
448 447 seen = set()
449 448
450 449 for clrev, node in sorted(pycompat.iteritems(manifestnodes)):
451 450 if node in seen:
452 451 continue
453 452
454 453 try:
455 454 rootmanifest.rev(node)
456 455 except error.LookupError:
457 456 fetchnodes.append(node)
458 457 linkrevs[node] = clrev
459 458
460 459 seen.add(node)
461 460
462 461 # TODO handle tree manifests
463 462
464 463 # addgroup() expects 7-tuple describing revisions. This normalizes
465 464 # the wire data to that format.
466 465 def iterrevisions(objs, progress):
467 466 for manifest in objs:
468 467 node = manifest[b'node']
469 468
470 469 extrafields = {}
471 470
472 471 for field, size in manifest.get(b'fieldsfollowing', []):
473 472 extrafields[field] = next(objs)
474 473
475 474 if b'delta' in extrafields:
476 475 basenode = manifest[b'deltabasenode']
477 476 delta = extrafields[b'delta']
478 477 elif b'revision' in extrafields:
479 478 basenode = nullid
480 479 revision = extrafields[b'revision']
481 480 delta = mdiff.trivialdiffheader(len(revision)) + revision
482 481 else:
483 482 continue
484 483
485 484 yield (
486 485 node,
487 486 manifest[b'parents'][0],
488 487 manifest[b'parents'][1],
489 488 # The value passed in is passed to the lookup function passed
490 489 # to addgroup(). We already have a map of manifest node to
491 490 # changelog revision number. So we just pass in the
492 491 # manifest node here and use linkrevs.__getitem__ as the
493 492 # resolution function.
494 493 node,
495 494 basenode,
496 495 delta,
497 496 # Flags not yet supported.
498 497 0,
499 498 )
500 499
501 500 progress.increment()
502 501
503 502 progress = repo.ui.makeprogress(
504 503 _(b'manifests'), unit=_(b'chunks'), total=len(fetchnodes)
505 504 )
506 505
507 506 commandmeta = remote.apidescriptor[b'commands'][b'manifestdata']
508 507 batchsize = commandmeta.get(b'recommendedbatchsize', 10000)
509 508 # TODO make size configurable on client?
510 509
511 510 # We send commands 1 at a time to the remote. This is not the most
512 511 # efficient because we incur a round trip at the end of each batch.
513 512 # However, the existing frame-based reactor keeps consuming server
514 513 # data in the background. And this results in response data buffering
515 514 # in memory. This can consume gigabytes of memory.
516 515 # TODO send multiple commands in a request once background buffering
517 516 # issues are resolved.
518 517
519 518 added = []
520 519
521 520 for i in pycompat.xrange(0, len(fetchnodes), batchsize):
522 521 batch = [node for node in fetchnodes[i : i + batchsize]]
523 522 if not batch:
524 523 continue
525 524
526 525 with remote.commandexecutor() as e:
527 526 objs = e.callcommand(
528 527 b'manifestdata',
529 528 {
530 529 b'tree': b'',
531 530 b'nodes': batch,
532 531 b'fields': {b'parents', b'revision'},
533 532 b'haveparents': True,
534 533 },
535 534 ).result()
536 535
537 536 # Chomp off header object.
538 537 next(objs)
539 538
540 def onchangeset(cl, node):
541 added.append(node)
539 def onchangeset(cl, rev):
540 added.append(cl.node(rev))
542 541
543 542 rootmanifest.addgroup(
544 543 iterrevisions(objs, progress),
545 544 linkrevs.__getitem__,
546 545 weakref.proxy(tr),
547 546 addrevisioncb=onchangeset,
548 547 duplicaterevisioncb=onchangeset,
549 548 )
550 549
551 550 progress.complete()
552 551
553 552 return {
554 553 b'added': added,
555 554 b'linkrevs': linkrevs,
556 555 }
557 556
558 557
559 558 def _derivefilesfrommanifests(repo, matcher, manifestnodes):
560 559 """Determine what file nodes are relevant given a set of manifest nodes.
561 560
562 561 Returns a dict mapping file paths to dicts of file node to first manifest
563 562 node.
564 563 """
565 564 ml = repo.manifestlog
566 565 fnodes = collections.defaultdict(dict)
567 566
568 567 progress = repo.ui.makeprogress(
569 568 _(b'scanning manifests'), total=len(manifestnodes)
570 569 )
571 570
572 571 with progress:
573 572 for manifestnode in manifestnodes:
574 573 m = ml.get(b'', manifestnode)
575 574
576 575 # TODO this will pull in unwanted nodes because it takes the storage
577 576 # delta into consideration. What we really want is something that
578 577 # takes the delta between the manifest's parents. And ideally we
579 578 # would ignore file nodes that are known locally. For now, ignore
580 579 # both these limitations. This will result in incremental fetches
581 580 # requesting data we already have. So this is far from ideal.
582 581 md = m.readfast()
583 582
584 583 for path, fnode in md.items():
585 584 if matcher(path):
586 585 fnodes[path].setdefault(fnode, manifestnode)
587 586
588 587 progress.increment()
589 588
590 589 return fnodes
591 590
592 591
593 592 def _fetchfiles(repo, tr, remote, fnodes, linkrevs):
594 593 """Fetch file data from explicit file revisions."""
595 594
596 595 def iterrevisions(objs, progress):
597 596 for filerevision in objs:
598 597 node = filerevision[b'node']
599 598
600 599 extrafields = {}
601 600
602 601 for field, size in filerevision.get(b'fieldsfollowing', []):
603 602 extrafields[field] = next(objs)
604 603
605 604 if b'delta' in extrafields:
606 605 basenode = filerevision[b'deltabasenode']
607 606 delta = extrafields[b'delta']
608 607 elif b'revision' in extrafields:
609 608 basenode = nullid
610 609 revision = extrafields[b'revision']
611 610 delta = mdiff.trivialdiffheader(len(revision)) + revision
612 611 else:
613 612 continue
614 613
615 614 yield (
616 615 node,
617 616 filerevision[b'parents'][0],
618 617 filerevision[b'parents'][1],
619 618 node,
620 619 basenode,
621 620 delta,
622 621 # Flags not yet supported.
623 622 0,
624 623 )
625 624
626 625 progress.increment()
627 626
628 627 progress = repo.ui.makeprogress(
629 628 _(b'files'),
630 629 unit=_(b'chunks'),
631 630 total=sum(len(v) for v in pycompat.itervalues(fnodes)),
632 631 )
633 632
634 633 # TODO make batch size configurable
635 634 batchsize = 10000
636 635 fnodeslist = [x for x in sorted(fnodes.items())]
637 636
638 637 for i in pycompat.xrange(0, len(fnodeslist), batchsize):
639 638 batch = [x for x in fnodeslist[i : i + batchsize]]
640 639 if not batch:
641 640 continue
642 641
643 642 with remote.commandexecutor() as e:
644 643 fs = []
645 644 locallinkrevs = {}
646 645
647 646 for path, nodes in batch:
648 647 fs.append(
649 648 (
650 649 path,
651 650 e.callcommand(
652 651 b'filedata',
653 652 {
654 653 b'path': path,
655 654 b'nodes': sorted(nodes),
656 655 b'fields': {b'parents', b'revision'},
657 656 b'haveparents': True,
658 657 },
659 658 ),
660 659 )
661 660 )
662 661
663 662 locallinkrevs[path] = {
664 663 node: linkrevs[manifestnode]
665 664 for node, manifestnode in pycompat.iteritems(nodes)
666 665 }
667 666
668 667 for path, f in fs:
669 668 objs = f.result()
670 669
671 670 # Chomp off header objects.
672 671 next(objs)
673 672
674 673 store = repo.file(path)
675 674 store.addgroup(
676 675 iterrevisions(objs, progress),
677 676 locallinkrevs[path].__getitem__,
678 677 weakref.proxy(tr),
679 678 )
680 679
681 680
682 681 def _fetchfilesfromcsets(
683 682 repo, tr, remote, pathfilter, fnodes, csets, manlinkrevs, shallow=False
684 683 ):
685 684 """Fetch file data from explicit changeset revisions."""
686 685
687 686 def iterrevisions(objs, remaining, progress):
688 687 while remaining:
689 688 filerevision = next(objs)
690 689
691 690 node = filerevision[b'node']
692 691
693 692 extrafields = {}
694 693
695 694 for field, size in filerevision.get(b'fieldsfollowing', []):
696 695 extrafields[field] = next(objs)
697 696
698 697 if b'delta' in extrafields:
699 698 basenode = filerevision[b'deltabasenode']
700 699 delta = extrafields[b'delta']
701 700 elif b'revision' in extrafields:
702 701 basenode = nullid
703 702 revision = extrafields[b'revision']
704 703 delta = mdiff.trivialdiffheader(len(revision)) + revision
705 704 else:
706 705 continue
707 706
708 707 if b'linknode' in filerevision:
709 708 linknode = filerevision[b'linknode']
710 709 else:
711 710 linknode = node
712 711
713 712 yield (
714 713 node,
715 714 filerevision[b'parents'][0],
716 715 filerevision[b'parents'][1],
717 716 linknode,
718 717 basenode,
719 718 delta,
720 719 # Flags not yet supported.
721 720 0,
722 721 )
723 722
724 723 progress.increment()
725 724 remaining -= 1
726 725
727 726 progress = repo.ui.makeprogress(
728 727 _(b'files'),
729 728 unit=_(b'chunks'),
730 729 total=sum(len(v) for v in pycompat.itervalues(fnodes)),
731 730 )
732 731
733 732 commandmeta = remote.apidescriptor[b'commands'][b'filesdata']
734 733 batchsize = commandmeta.get(b'recommendedbatchsize', 50000)
735 734
736 735 shallowfiles = repository.REPO_FEATURE_SHALLOW_FILE_STORAGE in repo.features
737 736 fields = {b'parents', b'revision'}
738 737 clrev = repo.changelog.rev
739 738
740 739 # There are no guarantees that we'll have ancestor revisions if
741 740 # a) this repo has shallow file storage b) shallow data fetching is enabled.
742 741 # Force remote to not delta against possibly unknown revisions when these
743 742 # conditions hold.
744 743 haveparents = not (shallowfiles or shallow)
745 744
746 745 # Similarly, we may not have calculated linkrevs for all incoming file
747 746 # revisions. Ask the remote to do work for us in this case.
748 747 if not haveparents:
749 748 fields.add(b'linknode')
750 749
751 750 for i in pycompat.xrange(0, len(csets), batchsize):
752 751 batch = [x for x in csets[i : i + batchsize]]
753 752 if not batch:
754 753 continue
755 754
756 755 with remote.commandexecutor() as e:
757 756 args = {
758 757 b'revisions': [
759 758 {
760 759 b'type': b'changesetexplicit',
761 760 b'nodes': batch,
762 761 }
763 762 ],
764 763 b'fields': fields,
765 764 b'haveparents': haveparents,
766 765 }
767 766
768 767 if pathfilter:
769 768 args[b'pathfilter'] = pathfilter
770 769
771 770 objs = e.callcommand(b'filesdata', args).result()
772 771
773 772 # First object is an overall header.
774 773 overall = next(objs)
775 774
776 775 # We have overall['totalpaths'] segments.
777 776 for i in pycompat.xrange(overall[b'totalpaths']):
778 777 header = next(objs)
779 778
780 779 path = header[b'path']
781 780 store = repo.file(path)
782 781
783 782 linkrevs = {
784 783 fnode: manlinkrevs[mnode]
785 784 for fnode, mnode in pycompat.iteritems(fnodes[path])
786 785 }
787 786
788 787 def getlinkrev(node):
789 788 if node in linkrevs:
790 789 return linkrevs[node]
791 790 else:
792 791 return clrev(node)
793 792
794 793 store.addgroup(
795 794 iterrevisions(objs, header[b'totalitems'], progress),
796 795 getlinkrev,
797 796 weakref.proxy(tr),
798 797 maybemissingparents=shallow,
799 798 )
@@ -1,1993 +1,1994 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_SIDEDATA = 1 << 12
31 31 REVISION_FLAG_HASCOPIESINFO = 1 << 11
32 32
33 33 REVISION_FLAGS_KNOWN = (
34 34 REVISION_FLAG_CENSORED
35 35 | REVISION_FLAG_ELLIPSIS
36 36 | REVISION_FLAG_EXTSTORED
37 37 | REVISION_FLAG_SIDEDATA
38 38 | REVISION_FLAG_HASCOPIESINFO
39 39 )
40 40
41 41 CG_DELTAMODE_STD = b'default'
42 42 CG_DELTAMODE_PREV = b'previous'
43 43 CG_DELTAMODE_FULL = b'fulltext'
44 44 CG_DELTAMODE_P1 = b'p1'
45 45
46 46
47 47 class ipeerconnection(interfaceutil.Interface):
48 48 """Represents a "connection" to a repository.
49 49
50 50 This is the base interface for representing a connection to a repository.
51 51 It holds basic properties and methods applicable to all peer types.
52 52
53 53 This is not a complete interface definition and should not be used
54 54 outside of this module.
55 55 """
56 56
57 57 ui = interfaceutil.Attribute("""ui.ui instance""")
58 58
59 59 def url():
60 60 """Returns a URL string representing this peer.
61 61
62 62 Currently, implementations expose the raw URL used to construct the
63 63 instance. It may contain credentials as part of the URL. The
64 64 expectations of the value aren't well-defined and this could lead to
65 65 data leakage.
66 66
67 67 TODO audit/clean consumers and more clearly define the contents of this
68 68 value.
69 69 """
70 70
71 71 def local():
72 72 """Returns a local repository instance.
73 73
74 74 If the peer represents a local repository, returns an object that
75 75 can be used to interface with it. Otherwise returns ``None``.
76 76 """
77 77
78 78 def peer():
79 79 """Returns an object conforming to this interface.
80 80
81 81 Most implementations will ``return self``.
82 82 """
83 83
84 84 def canpush():
85 85 """Returns a boolean indicating if this peer can be pushed to."""
86 86
87 87 def close():
88 88 """Close the connection to this peer.
89 89
90 90 This is called when the peer will no longer be used. Resources
91 91 associated with the peer should be cleaned up.
92 92 """
93 93
94 94
95 95 class ipeercapabilities(interfaceutil.Interface):
96 96 """Peer sub-interface related to capabilities."""
97 97
98 98 def capable(name):
99 99 """Determine support for a named capability.
100 100
101 101 Returns ``False`` if capability not supported.
102 102
103 103 Returns ``True`` if boolean capability is supported. Returns a string
104 104 if capability support is non-boolean.
105 105
106 106 Capability strings may or may not map to wire protocol capabilities.
107 107 """
108 108
109 109 def requirecap(name, purpose):
110 110 """Require a capability to be present.
111 111
112 112 Raises a ``CapabilityError`` if the capability isn't present.
113 113 """
114 114
115 115
116 116 class ipeercommands(interfaceutil.Interface):
117 117 """Client-side interface for communicating over the wire protocol.
118 118
119 119 This interface is used as a gateway to the Mercurial wire protocol.
120 120 methods commonly call wire protocol commands of the same name.
121 121 """
122 122
123 123 def branchmap():
124 124 """Obtain heads in named branches.
125 125
126 126 Returns a dict mapping branch name to an iterable of nodes that are
127 127 heads on that branch.
128 128 """
129 129
130 130 def capabilities():
131 131 """Obtain capabilities of the peer.
132 132
133 133 Returns a set of string capabilities.
134 134 """
135 135
136 136 def clonebundles():
137 137 """Obtains the clone bundles manifest for the repo.
138 138
139 139 Returns the manifest as unparsed bytes.
140 140 """
141 141
142 142 def debugwireargs(one, two, three=None, four=None, five=None):
143 143 """Used to facilitate debugging of arguments passed over the wire."""
144 144
145 145 def getbundle(source, **kwargs):
146 146 """Obtain remote repository data as a bundle.
147 147
148 148 This command is how the bulk of repository data is transferred from
149 149 the peer to the local repository
150 150
151 151 Returns a generator of bundle data.
152 152 """
153 153
154 154 def heads():
155 155 """Determine all known head revisions in the peer.
156 156
157 157 Returns an iterable of binary nodes.
158 158 """
159 159
160 160 def known(nodes):
161 161 """Determine whether multiple nodes are known.
162 162
163 163 Accepts an iterable of nodes whose presence to check for.
164 164
165 165 Returns an iterable of booleans indicating of the corresponding node
166 166 at that index is known to the peer.
167 167 """
168 168
169 169 def listkeys(namespace):
170 170 """Obtain all keys in a pushkey namespace.
171 171
172 172 Returns an iterable of key names.
173 173 """
174 174
175 175 def lookup(key):
176 176 """Resolve a value to a known revision.
177 177
178 178 Returns a binary node of the resolved revision on success.
179 179 """
180 180
181 181 def pushkey(namespace, key, old, new):
182 182 """Set a value using the ``pushkey`` protocol.
183 183
184 184 Arguments correspond to the pushkey namespace and key to operate on and
185 185 the old and new values for that key.
186 186
187 187 Returns a string with the peer result. The value inside varies by the
188 188 namespace.
189 189 """
190 190
191 191 def stream_out():
192 192 """Obtain streaming clone data.
193 193
194 194 Successful result should be a generator of data chunks.
195 195 """
196 196
197 197 def unbundle(bundle, heads, url):
198 198 """Transfer repository data to the peer.
199 199
200 200 This is how the bulk of data during a push is transferred.
201 201
202 202 Returns the integer number of heads added to the peer.
203 203 """
204 204
205 205
206 206 class ipeerlegacycommands(interfaceutil.Interface):
207 207 """Interface for implementing support for legacy wire protocol commands.
208 208
209 209 Wire protocol commands transition to legacy status when they are no longer
210 210 used by modern clients. To facilitate identifying which commands are
211 211 legacy, the interfaces are split.
212 212 """
213 213
214 214 def between(pairs):
215 215 """Obtain nodes between pairs of nodes.
216 216
217 217 ``pairs`` is an iterable of node pairs.
218 218
219 219 Returns an iterable of iterables of nodes corresponding to each
220 220 requested pair.
221 221 """
222 222
223 223 def branches(nodes):
224 224 """Obtain ancestor changesets of specific nodes back to a branch point.
225 225
226 226 For each requested node, the peer finds the first ancestor node that is
227 227 a DAG root or is a merge.
228 228
229 229 Returns an iterable of iterables with the resolved values for each node.
230 230 """
231 231
232 232 def changegroup(nodes, source):
233 233 """Obtain a changegroup with data for descendants of specified nodes."""
234 234
235 235 def changegroupsubset(bases, heads, source):
236 236 pass
237 237
238 238
239 239 class ipeercommandexecutor(interfaceutil.Interface):
240 240 """Represents a mechanism to execute remote commands.
241 241
242 242 This is the primary interface for requesting that wire protocol commands
243 243 be executed. Instances of this interface are active in a context manager
244 244 and have a well-defined lifetime. When the context manager exits, all
245 245 outstanding requests are waited on.
246 246 """
247 247
248 248 def callcommand(name, args):
249 249 """Request that a named command be executed.
250 250
251 251 Receives the command name and a dictionary of command arguments.
252 252
253 253 Returns a ``concurrent.futures.Future`` that will resolve to the
254 254 result of that command request. That exact value is left up to
255 255 the implementation and possibly varies by command.
256 256
257 257 Not all commands can coexist with other commands in an executor
258 258 instance: it depends on the underlying wire protocol transport being
259 259 used and the command itself.
260 260
261 261 Implementations MAY call ``sendcommands()`` automatically if the
262 262 requested command can not coexist with other commands in this executor.
263 263
264 264 Implementations MAY call ``sendcommands()`` automatically when the
265 265 future's ``result()`` is called. So, consumers using multiple
266 266 commands with an executor MUST ensure that ``result()`` is not called
267 267 until all command requests have been issued.
268 268 """
269 269
270 270 def sendcommands():
271 271 """Trigger submission of queued command requests.
272 272
273 273 Not all transports submit commands as soon as they are requested to
274 274 run. When called, this method forces queued command requests to be
275 275 issued. It will no-op if all commands have already been sent.
276 276
277 277 When called, no more new commands may be issued with this executor.
278 278 """
279 279
280 280 def close():
281 281 """Signal that this command request is finished.
282 282
283 283 When called, no more new commands may be issued. All outstanding
284 284 commands that have previously been issued are waited on before
285 285 returning. This not only includes waiting for the futures to resolve,
286 286 but also waiting for all response data to arrive. In other words,
287 287 calling this waits for all on-wire state for issued command requests
288 288 to finish.
289 289
290 290 When used as a context manager, this method is called when exiting the
291 291 context manager.
292 292
293 293 This method may call ``sendcommands()`` if there are buffered commands.
294 294 """
295 295
296 296
297 297 class ipeerrequests(interfaceutil.Interface):
298 298 """Interface for executing commands on a peer."""
299 299
300 300 limitedarguments = interfaceutil.Attribute(
301 301 """True if the peer cannot receive large argument value for commands."""
302 302 )
303 303
304 304 def commandexecutor():
305 305 """A context manager that resolves to an ipeercommandexecutor.
306 306
307 307 The object this resolves to can be used to issue command requests
308 308 to the peer.
309 309
310 310 Callers should call its ``callcommand`` method to issue command
311 311 requests.
312 312
313 313 A new executor should be obtained for each distinct set of commands
314 314 (possibly just a single command) that the consumer wants to execute
315 315 as part of a single operation or round trip. This is because some
316 316 peers are half-duplex and/or don't support persistent connections.
317 317 e.g. in the case of HTTP peers, commands sent to an executor represent
318 318 a single HTTP request. While some peers may support multiple command
319 319 sends over the wire per executor, consumers need to code to the least
320 320 capable peer. So it should be assumed that command executors buffer
321 321 called commands until they are told to send them and that each
322 322 command executor could result in a new connection or wire-level request
323 323 being issued.
324 324 """
325 325
326 326
327 327 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
328 328 """Unified interface for peer repositories.
329 329
330 330 All peer instances must conform to this interface.
331 331 """
332 332
333 333
334 334 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
335 335 """Unified peer interface for wire protocol version 2 peers."""
336 336
337 337 apidescriptor = interfaceutil.Attribute(
338 338 """Data structure holding description of server API."""
339 339 )
340 340
341 341
342 342 @interfaceutil.implementer(ipeerbase)
343 343 class peer(object):
344 344 """Base class for peer repositories."""
345 345
346 346 limitedarguments = False
347 347
348 348 def capable(self, name):
349 349 caps = self.capabilities()
350 350 if name in caps:
351 351 return True
352 352
353 353 name = b'%s=' % name
354 354 for cap in caps:
355 355 if cap.startswith(name):
356 356 return cap[len(name) :]
357 357
358 358 return False
359 359
360 360 def requirecap(self, name, purpose):
361 361 if self.capable(name):
362 362 return
363 363
364 364 raise error.CapabilityError(
365 365 _(
366 366 b'cannot %s; remote repository does not support the '
367 367 b'\'%s\' capability'
368 368 )
369 369 % (purpose, name)
370 370 )
371 371
372 372
373 373 class iverifyproblem(interfaceutil.Interface):
374 374 """Represents a problem with the integrity of the repository.
375 375
376 376 Instances of this interface are emitted to describe an integrity issue
377 377 with a repository (e.g. corrupt storage, missing data, etc).
378 378
379 379 Instances are essentially messages associated with severity.
380 380 """
381 381
382 382 warning = interfaceutil.Attribute(
383 383 """Message indicating a non-fatal problem."""
384 384 )
385 385
386 386 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
387 387
388 388 node = interfaceutil.Attribute(
389 389 """Revision encountering the problem.
390 390
391 391 ``None`` means the problem doesn't apply to a single revision.
392 392 """
393 393 )
394 394
395 395
396 396 class irevisiondelta(interfaceutil.Interface):
397 397 """Represents a delta between one revision and another.
398 398
399 399 Instances convey enough information to allow a revision to be exchanged
400 400 with another repository.
401 401
402 402 Instances represent the fulltext revision data or a delta against
403 403 another revision. Therefore the ``revision`` and ``delta`` attributes
404 404 are mutually exclusive.
405 405
406 406 Typically used for changegroup generation.
407 407 """
408 408
409 409 node = interfaceutil.Attribute("""20 byte node of this revision.""")
410 410
411 411 p1node = interfaceutil.Attribute(
412 412 """20 byte node of 1st parent of this revision."""
413 413 )
414 414
415 415 p2node = interfaceutil.Attribute(
416 416 """20 byte node of 2nd parent of this revision."""
417 417 )
418 418
419 419 linknode = interfaceutil.Attribute(
420 420 """20 byte node of the changelog revision this node is linked to."""
421 421 )
422 422
423 423 flags = interfaceutil.Attribute(
424 424 """2 bytes of integer flags that apply to this revision.
425 425
426 426 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
427 427 """
428 428 )
429 429
430 430 basenode = interfaceutil.Attribute(
431 431 """20 byte node of the revision this data is a delta against.
432 432
433 433 ``nullid`` indicates that the revision is a full revision and not
434 434 a delta.
435 435 """
436 436 )
437 437
438 438 baserevisionsize = interfaceutil.Attribute(
439 439 """Size of base revision this delta is against.
440 440
441 441 May be ``None`` if ``basenode`` is ``nullid``.
442 442 """
443 443 )
444 444
445 445 revision = interfaceutil.Attribute(
446 446 """Raw fulltext of revision data for this node."""
447 447 )
448 448
449 449 delta = interfaceutil.Attribute(
450 450 """Delta between ``basenode`` and ``node``.
451 451
452 452 Stored in the bdiff delta format.
453 453 """
454 454 )
455 455
456 456
457 457 class ifilerevisionssequence(interfaceutil.Interface):
458 458 """Contains index data for all revisions of a file.
459 459
460 460 Types implementing this behave like lists of tuples. The index
461 461 in the list corresponds to the revision number. The values contain
462 462 index metadata.
463 463
464 464 The *null* revision (revision number -1) is always the last item
465 465 in the index.
466 466 """
467 467
468 468 def __len__():
469 469 """The total number of revisions."""
470 470
471 471 def __getitem__(rev):
472 472 """Returns the object having a specific revision number.
473 473
474 474 Returns an 8-tuple with the following fields:
475 475
476 476 offset+flags
477 477 Contains the offset and flags for the revision. 64-bit unsigned
478 478 integer where first 6 bytes are the offset and the next 2 bytes
479 479 are flags. The offset can be 0 if it is not used by the store.
480 480 compressed size
481 481 Size of the revision data in the store. It can be 0 if it isn't
482 482 needed by the store.
483 483 uncompressed size
484 484 Fulltext size. It can be 0 if it isn't needed by the store.
485 485 base revision
486 486 Revision number of revision the delta for storage is encoded
487 487 against. -1 indicates not encoded against a base revision.
488 488 link revision
489 489 Revision number of changelog revision this entry is related to.
490 490 p1 revision
491 491 Revision number of 1st parent. -1 if no 1st parent.
492 492 p2 revision
493 493 Revision number of 2nd parent. -1 if no 1st parent.
494 494 node
495 495 Binary node value for this revision number.
496 496
497 497 Negative values should index off the end of the sequence. ``-1``
498 498 should return the null revision. ``-2`` should return the most
499 499 recent revision.
500 500 """
501 501
502 502 def __contains__(rev):
503 503 """Whether a revision number exists."""
504 504
505 505 def insert(self, i, entry):
506 506 """Add an item to the index at specific revision."""
507 507
508 508
509 509 class ifileindex(interfaceutil.Interface):
510 510 """Storage interface for index data of a single file.
511 511
512 512 File storage data is divided into index metadata and data storage.
513 513 This interface defines the index portion of the interface.
514 514
515 515 The index logically consists of:
516 516
517 517 * A mapping between revision numbers and nodes.
518 518 * DAG data (storing and querying the relationship between nodes).
519 519 * Metadata to facilitate storage.
520 520 """
521 521
522 522 def __len__():
523 523 """Obtain the number of revisions stored for this file."""
524 524
525 525 def __iter__():
526 526 """Iterate over revision numbers for this file."""
527 527
528 528 def hasnode(node):
529 529 """Returns a bool indicating if a node is known to this store.
530 530
531 531 Implementations must only return True for full, binary node values:
532 532 hex nodes, revision numbers, and partial node matches must be
533 533 rejected.
534 534
535 535 The null node is never present.
536 536 """
537 537
538 538 def revs(start=0, stop=None):
539 539 """Iterate over revision numbers for this file, with control."""
540 540
541 541 def parents(node):
542 542 """Returns a 2-tuple of parent nodes for a revision.
543 543
544 544 Values will be ``nullid`` if the parent is empty.
545 545 """
546 546
547 547 def parentrevs(rev):
548 548 """Like parents() but operates on revision numbers."""
549 549
550 550 def rev(node):
551 551 """Obtain the revision number given a node.
552 552
553 553 Raises ``error.LookupError`` if the node is not known.
554 554 """
555 555
556 556 def node(rev):
557 557 """Obtain the node value given a revision number.
558 558
559 559 Raises ``IndexError`` if the node is not known.
560 560 """
561 561
562 562 def lookup(node):
563 563 """Attempt to resolve a value to a node.
564 564
565 565 Value can be a binary node, hex node, revision number, or a string
566 566 that can be converted to an integer.
567 567
568 568 Raises ``error.LookupError`` if a node could not be resolved.
569 569 """
570 570
571 571 def linkrev(rev):
572 572 """Obtain the changeset revision number a revision is linked to."""
573 573
574 574 def iscensored(rev):
575 575 """Return whether a revision's content has been censored."""
576 576
577 577 def commonancestorsheads(node1, node2):
578 578 """Obtain an iterable of nodes containing heads of common ancestors.
579 579
580 580 See ``ancestor.commonancestorsheads()``.
581 581 """
582 582
583 583 def descendants(revs):
584 584 """Obtain descendant revision numbers for a set of revision numbers.
585 585
586 586 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
587 587 """
588 588
589 589 def heads(start=None, stop=None):
590 590 """Obtain a list of nodes that are DAG heads, with control.
591 591
592 592 The set of revisions examined can be limited by specifying
593 593 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
594 594 iterable of nodes. DAG traversal starts at earlier revision
595 595 ``start`` and iterates forward until any node in ``stop`` is
596 596 encountered.
597 597 """
598 598
599 599 def children(node):
600 600 """Obtain nodes that are children of a node.
601 601
602 602 Returns a list of nodes.
603 603 """
604 604
605 605
606 606 class ifiledata(interfaceutil.Interface):
607 607 """Storage interface for data storage of a specific file.
608 608
609 609 This complements ``ifileindex`` and provides an interface for accessing
610 610 data for a tracked file.
611 611 """
612 612
613 613 def size(rev):
614 614 """Obtain the fulltext size of file data.
615 615
616 616 Any metadata is excluded from size measurements.
617 617 """
618 618
619 619 def revision(node, raw=False):
620 620 """Obtain fulltext data for a node.
621 621
622 622 By default, any storage transformations are applied before the data
623 623 is returned. If ``raw`` is True, non-raw storage transformations
624 624 are not applied.
625 625
626 626 The fulltext data may contain a header containing metadata. Most
627 627 consumers should use ``read()`` to obtain the actual file data.
628 628 """
629 629
630 630 def rawdata(node):
631 631 """Obtain raw data for a node."""
632 632
633 633 def read(node):
634 634 """Resolve file fulltext data.
635 635
636 636 This is similar to ``revision()`` except any metadata in the data
637 637 headers is stripped.
638 638 """
639 639
640 640 def renamed(node):
641 641 """Obtain copy metadata for a node.
642 642
643 643 Returns ``False`` if no copy metadata is stored or a 2-tuple of
644 644 (path, node) from which this revision was copied.
645 645 """
646 646
647 647 def cmp(node, fulltext):
648 648 """Compare fulltext to another revision.
649 649
650 650 Returns True if the fulltext is different from what is stored.
651 651
652 652 This takes copy metadata into account.
653 653
654 654 TODO better document the copy metadata and censoring logic.
655 655 """
656 656
657 657 def emitrevisions(
658 658 nodes,
659 659 nodesorder=None,
660 660 revisiondata=False,
661 661 assumehaveparentrevisions=False,
662 662 deltamode=CG_DELTAMODE_STD,
663 663 ):
664 664 """Produce ``irevisiondelta`` for revisions.
665 665
666 666 Given an iterable of nodes, emits objects conforming to the
667 667 ``irevisiondelta`` interface that describe revisions in storage.
668 668
669 669 This method is a generator.
670 670
671 671 The input nodes may be unordered. Implementations must ensure that a
672 672 node's parents are emitted before the node itself. Transitively, this
673 673 means that a node may only be emitted once all its ancestors in
674 674 ``nodes`` have also been emitted.
675 675
676 676 By default, emits "index" data (the ``node``, ``p1node``, and
677 677 ``p2node`` attributes). If ``revisiondata`` is set, revision data
678 678 will also be present on the emitted objects.
679 679
680 680 With default argument values, implementations can choose to emit
681 681 either fulltext revision data or a delta. When emitting deltas,
682 682 implementations must consider whether the delta's base revision
683 683 fulltext is available to the receiver.
684 684
685 685 The base revision fulltext is guaranteed to be available if any of
686 686 the following are met:
687 687
688 688 * Its fulltext revision was emitted by this method call.
689 689 * A delta for that revision was emitted by this method call.
690 690 * ``assumehaveparentrevisions`` is True and the base revision is a
691 691 parent of the node.
692 692
693 693 ``nodesorder`` can be used to control the order that revisions are
694 694 emitted. By default, revisions can be reordered as long as they are
695 695 in DAG topological order (see above). If the value is ``nodes``,
696 696 the iteration order from ``nodes`` should be used. If the value is
697 697 ``storage``, then the native order from the backing storage layer
698 698 is used. (Not all storage layers will have strong ordering and behavior
699 699 of this mode is storage-dependent.) ``nodes`` ordering can force
700 700 revisions to be emitted before their ancestors, so consumers should
701 701 use it with care.
702 702
703 703 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
704 704 be set and it is the caller's responsibility to resolve it, if needed.
705 705
706 706 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
707 707 all revision data should be emitted as deltas against the revision
708 708 emitted just prior. The initial revision should be a delta against its
709 709 1st parent.
710 710 """
711 711
712 712
713 713 class ifilemutation(interfaceutil.Interface):
714 714 """Storage interface for mutation events of a tracked file."""
715 715
716 716 def add(filedata, meta, transaction, linkrev, p1, p2):
717 717 """Add a new revision to the store.
718 718
719 719 Takes file data, dictionary of metadata, a transaction, linkrev,
720 720 and parent nodes.
721 721
722 722 Returns the node that was added.
723 723
724 724 May no-op if a revision matching the supplied data is already stored.
725 725 """
726 726
727 727 def addrevision(
728 728 revisiondata,
729 729 transaction,
730 730 linkrev,
731 731 p1,
732 732 p2,
733 733 node=None,
734 734 flags=0,
735 735 cachedelta=None,
736 736 ):
737 737 """Add a new revision to the store and return its number.
738 738
739 739 This is similar to ``add()`` except it operates at a lower level.
740 740
741 741 The data passed in already contains a metadata header, if any.
742 742
743 743 ``node`` and ``flags`` can be used to define the expected node and
744 744 the flags to use with storage. ``flags`` is a bitwise value composed
745 745 of the various ``REVISION_FLAG_*`` constants.
746 746
747 747 ``add()`` is usually called when adding files from e.g. the working
748 748 directory. ``addrevision()`` is often called by ``add()`` and for
749 749 scenarios where revision data has already been computed, such as when
750 750 applying raw data from a peer repo.
751 751 """
752 752
753 753 def addgroup(
754 754 deltas,
755 755 linkmapper,
756 756 transaction,
757 757 addrevisioncb=None,
758 758 duplicaterevisioncb=None,
759 759 maybemissingparents=False,
760 760 ):
761 761 """Process a series of deltas for storage.
762 762
763 763 ``deltas`` is an iterable of 7-tuples of
764 764 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
765 765 to add.
766 766
767 767 The ``delta`` field contains ``mpatch`` data to apply to a base
768 768 revision, identified by ``deltabase``. The base node can be
769 769 ``nullid``, in which case the header from the delta can be ignored
770 770 and the delta used as the fulltext.
771 771
772 772 ``alwayscache`` instructs the lower layers to cache the content of the
773 773 newly added revision, even if it needs to be explicitly computed.
774 774 This used to be the default when ``addrevisioncb`` was provided up to
775 775 Mercurial 5.8.
776 776
777 ``addrevisioncb`` should be called for each node as it is committed.
778 ``duplicaterevisioncb`` should be called for each pre-existing node.
777 ``addrevisioncb`` should be called for each new rev as it is committed.
778 ``duplicaterevisioncb`` should be called for all revs with a
779 pre-existing node.
779 780
780 781 ``maybemissingparents`` is a bool indicating whether the incoming
781 782 data may reference parents/ancestor revisions that aren't present.
782 783 This flag is set when receiving data into a "shallow" store that
783 784 doesn't hold all history.
784 785
785 786 Returns a list of nodes that were processed. A node will be in the list
786 787 even if it existed in the store previously.
787 788 """
788 789
789 790 def censorrevision(tr, node, tombstone=b''):
790 791 """Remove the content of a single revision.
791 792
792 793 The specified ``node`` will have its content purged from storage.
793 794 Future attempts to access the revision data for this node will
794 795 result in failure.
795 796
796 797 A ``tombstone`` message can optionally be stored. This message may be
797 798 displayed to users when they attempt to access the missing revision
798 799 data.
799 800
800 801 Storage backends may have stored deltas against the previous content
801 802 in this revision. As part of censoring a revision, these storage
802 803 backends are expected to rewrite any internally stored deltas such
803 804 that they no longer reference the deleted content.
804 805 """
805 806
806 807 def getstrippoint(minlink):
807 808 """Find the minimum revision that must be stripped to strip a linkrev.
808 809
809 810 Returns a 2-tuple containing the minimum revision number and a set
810 811 of all revisions numbers that would be broken by this strip.
811 812
812 813 TODO this is highly revlog centric and should be abstracted into
813 814 a higher-level deletion API. ``repair.strip()`` relies on this.
814 815 """
815 816
816 817 def strip(minlink, transaction):
817 818 """Remove storage of items starting at a linkrev.
818 819
819 820 This uses ``getstrippoint()`` to determine the first node to remove.
820 821 Then it effectively truncates storage for all revisions after that.
821 822
822 823 TODO this is highly revlog centric and should be abstracted into a
823 824 higher-level deletion API.
824 825 """
825 826
826 827
827 828 class ifilestorage(ifileindex, ifiledata, ifilemutation):
828 829 """Complete storage interface for a single tracked file."""
829 830
830 831 def files():
831 832 """Obtain paths that are backing storage for this file.
832 833
833 834 TODO this is used heavily by verify code and there should probably
834 835 be a better API for that.
835 836 """
836 837
837 838 def storageinfo(
838 839 exclusivefiles=False,
839 840 sharedfiles=False,
840 841 revisionscount=False,
841 842 trackedsize=False,
842 843 storedsize=False,
843 844 ):
844 845 """Obtain information about storage for this file's data.
845 846
846 847 Returns a dict describing storage for this tracked path. The keys
847 848 in the dict map to arguments of the same. The arguments are bools
848 849 indicating whether to calculate and obtain that data.
849 850
850 851 exclusivefiles
851 852 Iterable of (vfs, path) describing files that are exclusively
852 853 used to back storage for this tracked path.
853 854
854 855 sharedfiles
855 856 Iterable of (vfs, path) describing files that are used to back
856 857 storage for this tracked path. Those files may also provide storage
857 858 for other stored entities.
858 859
859 860 revisionscount
860 861 Number of revisions available for retrieval.
861 862
862 863 trackedsize
863 864 Total size in bytes of all tracked revisions. This is a sum of the
864 865 length of the fulltext of all revisions.
865 866
866 867 storedsize
867 868 Total size in bytes used to store data for all tracked revisions.
868 869 This is commonly less than ``trackedsize`` due to internal usage
869 870 of deltas rather than fulltext revisions.
870 871
871 872 Not all storage backends may support all queries are have a reasonable
872 873 value to use. In that case, the value should be set to ``None`` and
873 874 callers are expected to handle this special value.
874 875 """
875 876
876 877 def verifyintegrity(state):
877 878 """Verifies the integrity of file storage.
878 879
879 880 ``state`` is a dict holding state of the verifier process. It can be
880 881 used to communicate data between invocations of multiple storage
881 882 primitives.
882 883
883 884 If individual revisions cannot have their revision content resolved,
884 885 the method is expected to set the ``skipread`` key to a set of nodes
885 886 that encountered problems. If set, the method can also add the node(s)
886 887 to ``safe_renamed`` in order to indicate nodes that may perform the
887 888 rename checks with currently accessible data.
888 889
889 890 The method yields objects conforming to the ``iverifyproblem``
890 891 interface.
891 892 """
892 893
893 894
894 895 class idirs(interfaceutil.Interface):
895 896 """Interface representing a collection of directories from paths.
896 897
897 898 This interface is essentially a derived data structure representing
898 899 directories from a collection of paths.
899 900 """
900 901
901 902 def addpath(path):
902 903 """Add a path to the collection.
903 904
904 905 All directories in the path will be added to the collection.
905 906 """
906 907
907 908 def delpath(path):
908 909 """Remove a path from the collection.
909 910
910 911 If the removal was the last path in a particular directory, the
911 912 directory is removed from the collection.
912 913 """
913 914
914 915 def __iter__():
915 916 """Iterate over the directories in this collection of paths."""
916 917
917 918 def __contains__(path):
918 919 """Whether a specific directory is in this collection."""
919 920
920 921
921 922 class imanifestdict(interfaceutil.Interface):
922 923 """Interface representing a manifest data structure.
923 924
924 925 A manifest is effectively a dict mapping paths to entries. Each entry
925 926 consists of a binary node and extra flags affecting that entry.
926 927 """
927 928
928 929 def __getitem__(path):
929 930 """Returns the binary node value for a path in the manifest.
930 931
931 932 Raises ``KeyError`` if the path does not exist in the manifest.
932 933
933 934 Equivalent to ``self.find(path)[0]``.
934 935 """
935 936
936 937 def find(path):
937 938 """Returns the entry for a path in the manifest.
938 939
939 940 Returns a 2-tuple of (node, flags).
940 941
941 942 Raises ``KeyError`` if the path does not exist in the manifest.
942 943 """
943 944
944 945 def __len__():
945 946 """Return the number of entries in the manifest."""
946 947
947 948 def __nonzero__():
948 949 """Returns True if the manifest has entries, False otherwise."""
949 950
950 951 __bool__ = __nonzero__
951 952
952 953 def __setitem__(path, node):
953 954 """Define the node value for a path in the manifest.
954 955
955 956 If the path is already in the manifest, its flags will be copied to
956 957 the new entry.
957 958 """
958 959
959 960 def __contains__(path):
960 961 """Whether a path exists in the manifest."""
961 962
962 963 def __delitem__(path):
963 964 """Remove a path from the manifest.
964 965
965 966 Raises ``KeyError`` if the path is not in the manifest.
966 967 """
967 968
968 969 def __iter__():
969 970 """Iterate over paths in the manifest."""
970 971
971 972 def iterkeys():
972 973 """Iterate over paths in the manifest."""
973 974
974 975 def keys():
975 976 """Obtain a list of paths in the manifest."""
976 977
977 978 def filesnotin(other, match=None):
978 979 """Obtain the set of paths in this manifest but not in another.
979 980
980 981 ``match`` is an optional matcher function to be applied to both
981 982 manifests.
982 983
983 984 Returns a set of paths.
984 985 """
985 986
986 987 def dirs():
987 988 """Returns an object implementing the ``idirs`` interface."""
988 989
989 990 def hasdir(dir):
990 991 """Returns a bool indicating if a directory is in this manifest."""
991 992
992 993 def walk(match):
993 994 """Generator of paths in manifest satisfying a matcher.
994 995
995 996 If the matcher has explicit files listed and they don't exist in
996 997 the manifest, ``match.bad()`` is called for each missing file.
997 998 """
998 999
999 1000 def diff(other, match=None, clean=False):
1000 1001 """Find differences between this manifest and another.
1001 1002
1002 1003 This manifest is compared to ``other``.
1003 1004
1004 1005 If ``match`` is provided, the two manifests are filtered against this
1005 1006 matcher and only entries satisfying the matcher are compared.
1006 1007
1007 1008 If ``clean`` is True, unchanged files are included in the returned
1008 1009 object.
1009 1010
1010 1011 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1011 1012 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1012 1013 represents the node and flags for this manifest and ``(node2, flag2)``
1013 1014 are the same for the other manifest.
1014 1015 """
1015 1016
1016 1017 def setflag(path, flag):
1017 1018 """Set the flag value for a given path.
1018 1019
1019 1020 Raises ``KeyError`` if the path is not already in the manifest.
1020 1021 """
1021 1022
1022 1023 def get(path, default=None):
1023 1024 """Obtain the node value for a path or a default value if missing."""
1024 1025
1025 1026 def flags(path):
1026 1027 """Return the flags value for a path (default: empty bytestring)."""
1027 1028
1028 1029 def copy():
1029 1030 """Return a copy of this manifest."""
1030 1031
1031 1032 def items():
1032 1033 """Returns an iterable of (path, node) for items in this manifest."""
1033 1034
1034 1035 def iteritems():
1035 1036 """Identical to items()."""
1036 1037
1037 1038 def iterentries():
1038 1039 """Returns an iterable of (path, node, flags) for this manifest.
1039 1040
1040 1041 Similar to ``iteritems()`` except items are a 3-tuple and include
1041 1042 flags.
1042 1043 """
1043 1044
1044 1045 def text():
1045 1046 """Obtain the raw data representation for this manifest.
1046 1047
1047 1048 Result is used to create a manifest revision.
1048 1049 """
1049 1050
1050 1051 def fastdelta(base, changes):
1051 1052 """Obtain a delta between this manifest and another given changes.
1052 1053
1053 1054 ``base`` in the raw data representation for another manifest.
1054 1055
1055 1056 ``changes`` is an iterable of ``(path, to_delete)``.
1056 1057
1057 1058 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1058 1059 delta between ``base`` and this manifest.
1059 1060
1060 1061 If this manifest implementation can't support ``fastdelta()``,
1061 1062 raise ``mercurial.manifest.FastdeltaUnavailable``.
1062 1063 """
1063 1064
1064 1065
1065 1066 class imanifestrevisionbase(interfaceutil.Interface):
1066 1067 """Base interface representing a single revision of a manifest.
1067 1068
1068 1069 Should not be used as a primary interface: should always be inherited
1069 1070 as part of a larger interface.
1070 1071 """
1071 1072
1072 1073 def copy():
1073 1074 """Obtain a copy of this manifest instance.
1074 1075
1075 1076 Returns an object conforming to the ``imanifestrevisionwritable``
1076 1077 interface. The instance will be associated with the same
1077 1078 ``imanifestlog`` collection as this instance.
1078 1079 """
1079 1080
1080 1081 def read():
1081 1082 """Obtain the parsed manifest data structure.
1082 1083
1083 1084 The returned object conforms to the ``imanifestdict`` interface.
1084 1085 """
1085 1086
1086 1087
1087 1088 class imanifestrevisionstored(imanifestrevisionbase):
1088 1089 """Interface representing a manifest revision committed to storage."""
1089 1090
1090 1091 def node():
1091 1092 """The binary node for this manifest."""
1092 1093
1093 1094 parents = interfaceutil.Attribute(
1094 1095 """List of binary nodes that are parents for this manifest revision."""
1095 1096 )
1096 1097
1097 1098 def readdelta(shallow=False):
1098 1099 """Obtain the manifest data structure representing changes from parent.
1099 1100
1100 1101 This manifest is compared to its 1st parent. A new manifest representing
1101 1102 those differences is constructed.
1102 1103
1103 1104 The returned object conforms to the ``imanifestdict`` interface.
1104 1105 """
1105 1106
1106 1107 def readfast(shallow=False):
1107 1108 """Calls either ``read()`` or ``readdelta()``.
1108 1109
1109 1110 The faster of the two options is called.
1110 1111 """
1111 1112
1112 1113 def find(key):
1113 1114 """Calls self.read().find(key)``.
1114 1115
1115 1116 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1116 1117 """
1117 1118
1118 1119
1119 1120 class imanifestrevisionwritable(imanifestrevisionbase):
1120 1121 """Interface representing a manifest revision that can be committed."""
1121 1122
1122 1123 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1123 1124 """Add this revision to storage.
1124 1125
1125 1126 Takes a transaction object, the changeset revision number it will
1126 1127 be associated with, its parent nodes, and lists of added and
1127 1128 removed paths.
1128 1129
1129 1130 If match is provided, storage can choose not to inspect or write out
1130 1131 items that do not match. Storage is still required to be able to provide
1131 1132 the full manifest in the future for any directories written (these
1132 1133 manifests should not be "narrowed on disk").
1133 1134
1134 1135 Returns the binary node of the created revision.
1135 1136 """
1136 1137
1137 1138
1138 1139 class imanifeststorage(interfaceutil.Interface):
1139 1140 """Storage interface for manifest data."""
1140 1141
1141 1142 tree = interfaceutil.Attribute(
1142 1143 """The path to the directory this manifest tracks.
1143 1144
1144 1145 The empty bytestring represents the root manifest.
1145 1146 """
1146 1147 )
1147 1148
1148 1149 index = interfaceutil.Attribute(
1149 1150 """An ``ifilerevisionssequence`` instance."""
1150 1151 )
1151 1152
1152 1153 indexfile = interfaceutil.Attribute(
1153 1154 """Path of revlog index file.
1154 1155
1155 1156 TODO this is revlog specific and should not be exposed.
1156 1157 """
1157 1158 )
1158 1159
1159 1160 opener = interfaceutil.Attribute(
1160 1161 """VFS opener to use to access underlying files used for storage.
1161 1162
1162 1163 TODO this is revlog specific and should not be exposed.
1163 1164 """
1164 1165 )
1165 1166
1166 1167 version = interfaceutil.Attribute(
1167 1168 """Revlog version number.
1168 1169
1169 1170 TODO this is revlog specific and should not be exposed.
1170 1171 """
1171 1172 )
1172 1173
1173 1174 _generaldelta = interfaceutil.Attribute(
1174 1175 """Whether generaldelta storage is being used.
1175 1176
1176 1177 TODO this is revlog specific and should not be exposed.
1177 1178 """
1178 1179 )
1179 1180
1180 1181 fulltextcache = interfaceutil.Attribute(
1181 1182 """Dict with cache of fulltexts.
1182 1183
1183 1184 TODO this doesn't feel appropriate for the storage interface.
1184 1185 """
1185 1186 )
1186 1187
1187 1188 def __len__():
1188 1189 """Obtain the number of revisions stored for this manifest."""
1189 1190
1190 1191 def __iter__():
1191 1192 """Iterate over revision numbers for this manifest."""
1192 1193
1193 1194 def rev(node):
1194 1195 """Obtain the revision number given a binary node.
1195 1196
1196 1197 Raises ``error.LookupError`` if the node is not known.
1197 1198 """
1198 1199
1199 1200 def node(rev):
1200 1201 """Obtain the node value given a revision number.
1201 1202
1202 1203 Raises ``error.LookupError`` if the revision is not known.
1203 1204 """
1204 1205
1205 1206 def lookup(value):
1206 1207 """Attempt to resolve a value to a node.
1207 1208
1208 1209 Value can be a binary node, hex node, revision number, or a bytes
1209 1210 that can be converted to an integer.
1210 1211
1211 1212 Raises ``error.LookupError`` if a ndoe could not be resolved.
1212 1213 """
1213 1214
1214 1215 def parents(node):
1215 1216 """Returns a 2-tuple of parent nodes for a node.
1216 1217
1217 1218 Values will be ``nullid`` if the parent is empty.
1218 1219 """
1219 1220
1220 1221 def parentrevs(rev):
1221 1222 """Like parents() but operates on revision numbers."""
1222 1223
1223 1224 def linkrev(rev):
1224 1225 """Obtain the changeset revision number a revision is linked to."""
1225 1226
1226 1227 def revision(node, _df=None, raw=False):
1227 1228 """Obtain fulltext data for a node."""
1228 1229
1229 1230 def rawdata(node, _df=None):
1230 1231 """Obtain raw data for a node."""
1231 1232
1232 1233 def revdiff(rev1, rev2):
1233 1234 """Obtain a delta between two revision numbers.
1234 1235
1235 1236 The returned data is the result of ``bdiff.bdiff()`` on the raw
1236 1237 revision data.
1237 1238 """
1238 1239
1239 1240 def cmp(node, fulltext):
1240 1241 """Compare fulltext to another revision.
1241 1242
1242 1243 Returns True if the fulltext is different from what is stored.
1243 1244 """
1244 1245
1245 1246 def emitrevisions(
1246 1247 nodes,
1247 1248 nodesorder=None,
1248 1249 revisiondata=False,
1249 1250 assumehaveparentrevisions=False,
1250 1251 ):
1251 1252 """Produce ``irevisiondelta`` describing revisions.
1252 1253
1253 1254 See the documentation for ``ifiledata`` for more.
1254 1255 """
1255 1256
1256 1257 def addgroup(
1257 1258 deltas,
1258 1259 linkmapper,
1259 1260 transaction,
1260 1261 addrevisioncb=None,
1261 1262 duplicaterevisioncb=None,
1262 1263 ):
1263 1264 """Process a series of deltas for storage.
1264 1265
1265 1266 See the documentation in ``ifilemutation`` for more.
1266 1267 """
1267 1268
1268 1269 def rawsize(rev):
1269 1270 """Obtain the size of tracked data.
1270 1271
1271 1272 Is equivalent to ``len(m.rawdata(node))``.
1272 1273
1273 1274 TODO this method is only used by upgrade code and may be removed.
1274 1275 """
1275 1276
1276 1277 def getstrippoint(minlink):
1277 1278 """Find minimum revision that must be stripped to strip a linkrev.
1278 1279
1279 1280 See the documentation in ``ifilemutation`` for more.
1280 1281 """
1281 1282
1282 1283 def strip(minlink, transaction):
1283 1284 """Remove storage of items starting at a linkrev.
1284 1285
1285 1286 See the documentation in ``ifilemutation`` for more.
1286 1287 """
1287 1288
1288 1289 def checksize():
1289 1290 """Obtain the expected sizes of backing files.
1290 1291
1291 1292 TODO this is used by verify and it should not be part of the interface.
1292 1293 """
1293 1294
1294 1295 def files():
1295 1296 """Obtain paths that are backing storage for this manifest.
1296 1297
1297 1298 TODO this is used by verify and there should probably be a better API
1298 1299 for this functionality.
1299 1300 """
1300 1301
1301 1302 def deltaparent(rev):
1302 1303 """Obtain the revision that a revision is delta'd against.
1303 1304
1304 1305 TODO delta encoding is an implementation detail of storage and should
1305 1306 not be exposed to the storage interface.
1306 1307 """
1307 1308
1308 1309 def clone(tr, dest, **kwargs):
1309 1310 """Clone this instance to another."""
1310 1311
1311 1312 def clearcaches(clear_persisted_data=False):
1312 1313 """Clear any caches associated with this instance."""
1313 1314
1314 1315 def dirlog(d):
1315 1316 """Obtain a manifest storage instance for a tree."""
1316 1317
1317 1318 def add(
1318 1319 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1319 1320 ):
1320 1321 """Add a revision to storage.
1321 1322
1322 1323 ``m`` is an object conforming to ``imanifestdict``.
1323 1324
1324 1325 ``link`` is the linkrev revision number.
1325 1326
1326 1327 ``p1`` and ``p2`` are the parent revision numbers.
1327 1328
1328 1329 ``added`` and ``removed`` are iterables of added and removed paths,
1329 1330 respectively.
1330 1331
1331 1332 ``readtree`` is a function that can be used to read the child tree(s)
1332 1333 when recursively writing the full tree structure when using
1333 1334 treemanifets.
1334 1335
1335 1336 ``match`` is a matcher that can be used to hint to storage that not all
1336 1337 paths must be inspected; this is an optimization and can be safely
1337 1338 ignored. Note that the storage must still be able to reproduce a full
1338 1339 manifest including files that did not match.
1339 1340 """
1340 1341
1341 1342 def storageinfo(
1342 1343 exclusivefiles=False,
1343 1344 sharedfiles=False,
1344 1345 revisionscount=False,
1345 1346 trackedsize=False,
1346 1347 storedsize=False,
1347 1348 ):
1348 1349 """Obtain information about storage for this manifest's data.
1349 1350
1350 1351 See ``ifilestorage.storageinfo()`` for a description of this method.
1351 1352 This one behaves the same way, except for manifest data.
1352 1353 """
1353 1354
1354 1355
1355 1356 class imanifestlog(interfaceutil.Interface):
1356 1357 """Interface representing a collection of manifest snapshots.
1357 1358
1358 1359 Represents the root manifest in a repository.
1359 1360
1360 1361 Also serves as a means to access nested tree manifests and to cache
1361 1362 tree manifests.
1362 1363 """
1363 1364
1364 1365 def __getitem__(node):
1365 1366 """Obtain a manifest instance for a given binary node.
1366 1367
1367 1368 Equivalent to calling ``self.get('', node)``.
1368 1369
1369 1370 The returned object conforms to the ``imanifestrevisionstored``
1370 1371 interface.
1371 1372 """
1372 1373
1373 1374 def get(tree, node, verify=True):
1374 1375 """Retrieve the manifest instance for a given directory and binary node.
1375 1376
1376 1377 ``node`` always refers to the node of the root manifest (which will be
1377 1378 the only manifest if flat manifests are being used).
1378 1379
1379 1380 If ``tree`` is the empty string, the root manifest is returned.
1380 1381 Otherwise the manifest for the specified directory will be returned
1381 1382 (requires tree manifests).
1382 1383
1383 1384 If ``verify`` is True, ``LookupError`` is raised if the node is not
1384 1385 known.
1385 1386
1386 1387 The returned object conforms to the ``imanifestrevisionstored``
1387 1388 interface.
1388 1389 """
1389 1390
1390 1391 def getstorage(tree):
1391 1392 """Retrieve an interface to storage for a particular tree.
1392 1393
1393 1394 If ``tree`` is the empty bytestring, storage for the root manifest will
1394 1395 be returned. Otherwise storage for a tree manifest is returned.
1395 1396
1396 1397 TODO formalize interface for returned object.
1397 1398 """
1398 1399
1399 1400 def clearcaches():
1400 1401 """Clear caches associated with this collection."""
1401 1402
1402 1403 def rev(node):
1403 1404 """Obtain the revision number for a binary node.
1404 1405
1405 1406 Raises ``error.LookupError`` if the node is not known.
1406 1407 """
1407 1408
1408 1409 def update_caches(transaction):
1409 1410 """update whatever cache are relevant for the used storage."""
1410 1411
1411 1412
1412 1413 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1413 1414 """Local repository sub-interface providing access to tracked file storage.
1414 1415
1415 1416 This interface defines how a repository accesses storage for a single
1416 1417 tracked file path.
1417 1418 """
1418 1419
1419 1420 def file(f):
1420 1421 """Obtain a filelog for a tracked path.
1421 1422
1422 1423 The returned type conforms to the ``ifilestorage`` interface.
1423 1424 """
1424 1425
1425 1426
1426 1427 class ilocalrepositorymain(interfaceutil.Interface):
1427 1428 """Main interface for local repositories.
1428 1429
1429 1430 This currently captures the reality of things - not how things should be.
1430 1431 """
1431 1432
1432 1433 supportedformats = interfaceutil.Attribute(
1433 1434 """Set of requirements that apply to stream clone.
1434 1435
1435 1436 This is actually a class attribute and is shared among all instances.
1436 1437 """
1437 1438 )
1438 1439
1439 1440 supported = interfaceutil.Attribute(
1440 1441 """Set of requirements that this repo is capable of opening."""
1441 1442 )
1442 1443
1443 1444 requirements = interfaceutil.Attribute(
1444 1445 """Set of requirements this repo uses."""
1445 1446 )
1446 1447
1447 1448 features = interfaceutil.Attribute(
1448 1449 """Set of "features" this repository supports.
1449 1450
1450 1451 A "feature" is a loosely-defined term. It can refer to a feature
1451 1452 in the classical sense or can describe an implementation detail
1452 1453 of the repository. For example, a ``readonly`` feature may denote
1453 1454 the repository as read-only. Or a ``revlogfilestore`` feature may
1454 1455 denote that the repository is using revlogs for file storage.
1455 1456
1456 1457 The intent of features is to provide a machine-queryable mechanism
1457 1458 for repo consumers to test for various repository characteristics.
1458 1459
1459 1460 Features are similar to ``requirements``. The main difference is that
1460 1461 requirements are stored on-disk and represent requirements to open the
1461 1462 repository. Features are more run-time capabilities of the repository
1462 1463 and more granular capabilities (which may be derived from requirements).
1463 1464 """
1464 1465 )
1465 1466
1466 1467 filtername = interfaceutil.Attribute(
1467 1468 """Name of the repoview that is active on this repo."""
1468 1469 )
1469 1470
1470 1471 wvfs = interfaceutil.Attribute(
1471 1472 """VFS used to access the working directory."""
1472 1473 )
1473 1474
1474 1475 vfs = interfaceutil.Attribute(
1475 1476 """VFS rooted at the .hg directory.
1476 1477
1477 1478 Used to access repository data not in the store.
1478 1479 """
1479 1480 )
1480 1481
1481 1482 svfs = interfaceutil.Attribute(
1482 1483 """VFS rooted at the store.
1483 1484
1484 1485 Used to access repository data in the store. Typically .hg/store.
1485 1486 But can point elsewhere if the store is shared.
1486 1487 """
1487 1488 )
1488 1489
1489 1490 root = interfaceutil.Attribute(
1490 1491 """Path to the root of the working directory."""
1491 1492 )
1492 1493
1493 1494 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1494 1495
1495 1496 origroot = interfaceutil.Attribute(
1496 1497 """The filesystem path that was used to construct the repo."""
1497 1498 )
1498 1499
1499 1500 auditor = interfaceutil.Attribute(
1500 1501 """A pathauditor for the working directory.
1501 1502
1502 1503 This checks if a path refers to a nested repository.
1503 1504
1504 1505 Operates on the filesystem.
1505 1506 """
1506 1507 )
1507 1508
1508 1509 nofsauditor = interfaceutil.Attribute(
1509 1510 """A pathauditor for the working directory.
1510 1511
1511 1512 This is like ``auditor`` except it doesn't do filesystem checks.
1512 1513 """
1513 1514 )
1514 1515
1515 1516 baseui = interfaceutil.Attribute(
1516 1517 """Original ui instance passed into constructor."""
1517 1518 )
1518 1519
1519 1520 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1520 1521
1521 1522 sharedpath = interfaceutil.Attribute(
1522 1523 """Path to the .hg directory of the repo this repo was shared from."""
1523 1524 )
1524 1525
1525 1526 store = interfaceutil.Attribute("""A store instance.""")
1526 1527
1527 1528 spath = interfaceutil.Attribute("""Path to the store.""")
1528 1529
1529 1530 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1530 1531
1531 1532 cachevfs = interfaceutil.Attribute(
1532 1533 """A VFS used to access the cache directory.
1533 1534
1534 1535 Typically .hg/cache.
1535 1536 """
1536 1537 )
1537 1538
1538 1539 wcachevfs = interfaceutil.Attribute(
1539 1540 """A VFS used to access the cache directory dedicated to working copy
1540 1541
1541 1542 Typically .hg/wcache.
1542 1543 """
1543 1544 )
1544 1545
1545 1546 filteredrevcache = interfaceutil.Attribute(
1546 1547 """Holds sets of revisions to be filtered."""
1547 1548 )
1548 1549
1549 1550 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1550 1551
1551 1552 filecopiesmode = interfaceutil.Attribute(
1552 1553 """The way files copies should be dealt with in this repo."""
1553 1554 )
1554 1555
1555 1556 def close():
1556 1557 """Close the handle on this repository."""
1557 1558
1558 1559 def peer():
1559 1560 """Obtain an object conforming to the ``peer`` interface."""
1560 1561
1561 1562 def unfiltered():
1562 1563 """Obtain an unfiltered/raw view of this repo."""
1563 1564
1564 1565 def filtered(name, visibilityexceptions=None):
1565 1566 """Obtain a named view of this repository."""
1566 1567
1567 1568 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1568 1569
1569 1570 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1570 1571
1571 1572 manifestlog = interfaceutil.Attribute(
1572 1573 """An instance conforming to the ``imanifestlog`` interface.
1573 1574
1574 1575 Provides access to manifests for the repository.
1575 1576 """
1576 1577 )
1577 1578
1578 1579 dirstate = interfaceutil.Attribute("""Working directory state.""")
1579 1580
1580 1581 narrowpats = interfaceutil.Attribute(
1581 1582 """Matcher patterns for this repository's narrowspec."""
1582 1583 )
1583 1584
1584 1585 def narrowmatch(match=None, includeexact=False):
1585 1586 """Obtain a matcher for the narrowspec."""
1586 1587
1587 1588 def setnarrowpats(newincludes, newexcludes):
1588 1589 """Define the narrowspec for this repository."""
1589 1590
1590 1591 def __getitem__(changeid):
1591 1592 """Try to resolve a changectx."""
1592 1593
1593 1594 def __contains__(changeid):
1594 1595 """Whether a changeset exists."""
1595 1596
1596 1597 def __nonzero__():
1597 1598 """Always returns True."""
1598 1599 return True
1599 1600
1600 1601 __bool__ = __nonzero__
1601 1602
1602 1603 def __len__():
1603 1604 """Returns the number of changesets in the repo."""
1604 1605
1605 1606 def __iter__():
1606 1607 """Iterate over revisions in the changelog."""
1607 1608
1608 1609 def revs(expr, *args):
1609 1610 """Evaluate a revset.
1610 1611
1611 1612 Emits revisions.
1612 1613 """
1613 1614
1614 1615 def set(expr, *args):
1615 1616 """Evaluate a revset.
1616 1617
1617 1618 Emits changectx instances.
1618 1619 """
1619 1620
1620 1621 def anyrevs(specs, user=False, localalias=None):
1621 1622 """Find revisions matching one of the given revsets."""
1622 1623
1623 1624 def url():
1624 1625 """Returns a string representing the location of this repo."""
1625 1626
1626 1627 def hook(name, throw=False, **args):
1627 1628 """Call a hook."""
1628 1629
1629 1630 def tags():
1630 1631 """Return a mapping of tag to node."""
1631 1632
1632 1633 def tagtype(tagname):
1633 1634 """Return the type of a given tag."""
1634 1635
1635 1636 def tagslist():
1636 1637 """Return a list of tags ordered by revision."""
1637 1638
1638 1639 def nodetags(node):
1639 1640 """Return the tags associated with a node."""
1640 1641
1641 1642 def nodebookmarks(node):
1642 1643 """Return the list of bookmarks pointing to the specified node."""
1643 1644
1644 1645 def branchmap():
1645 1646 """Return a mapping of branch to heads in that branch."""
1646 1647
1647 1648 def revbranchcache():
1648 1649 pass
1649 1650
1650 1651 def register_changeset(rev, changelogrevision):
1651 1652 """Extension point for caches for new nodes.
1652 1653
1653 1654 Multiple consumers are expected to need parts of the changelogrevision,
1654 1655 so it is provided as optimization to avoid duplicate lookups. A simple
1655 1656 cache would be fragile when other revisions are accessed, too."""
1656 1657 pass
1657 1658
1658 1659 def branchtip(branchtip, ignoremissing=False):
1659 1660 """Return the tip node for a given branch."""
1660 1661
1661 1662 def lookup(key):
1662 1663 """Resolve the node for a revision."""
1663 1664
1664 1665 def lookupbranch(key):
1665 1666 """Look up the branch name of the given revision or branch name."""
1666 1667
1667 1668 def known(nodes):
1668 1669 """Determine whether a series of nodes is known.
1669 1670
1670 1671 Returns a list of bools.
1671 1672 """
1672 1673
1673 1674 def local():
1674 1675 """Whether the repository is local."""
1675 1676 return True
1676 1677
1677 1678 def publishing():
1678 1679 """Whether the repository is a publishing repository."""
1679 1680
1680 1681 def cancopy():
1681 1682 pass
1682 1683
1683 1684 def shared():
1684 1685 """The type of shared repository or None."""
1685 1686
1686 1687 def wjoin(f, *insidef):
1687 1688 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1688 1689
1689 1690 def setparents(p1, p2):
1690 1691 """Set the parent nodes of the working directory."""
1691 1692
1692 1693 def filectx(path, changeid=None, fileid=None):
1693 1694 """Obtain a filectx for the given file revision."""
1694 1695
1695 1696 def getcwd():
1696 1697 """Obtain the current working directory from the dirstate."""
1697 1698
1698 1699 def pathto(f, cwd=None):
1699 1700 """Obtain the relative path to a file."""
1700 1701
1701 1702 def adddatafilter(name, fltr):
1702 1703 pass
1703 1704
1704 1705 def wread(filename):
1705 1706 """Read a file from wvfs, using data filters."""
1706 1707
1707 1708 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1708 1709 """Write data to a file in the wvfs, using data filters."""
1709 1710
1710 1711 def wwritedata(filename, data):
1711 1712 """Resolve data for writing to the wvfs, using data filters."""
1712 1713
1713 1714 def currenttransaction():
1714 1715 """Obtain the current transaction instance or None."""
1715 1716
1716 1717 def transaction(desc, report=None):
1717 1718 """Open a new transaction to write to the repository."""
1718 1719
1719 1720 def undofiles():
1720 1721 """Returns a list of (vfs, path) for files to undo transactions."""
1721 1722
1722 1723 def recover():
1723 1724 """Roll back an interrupted transaction."""
1724 1725
1725 1726 def rollback(dryrun=False, force=False):
1726 1727 """Undo the last transaction.
1727 1728
1728 1729 DANGEROUS.
1729 1730 """
1730 1731
1731 1732 def updatecaches(tr=None, full=False):
1732 1733 """Warm repo caches."""
1733 1734
1734 1735 def invalidatecaches():
1735 1736 """Invalidate cached data due to the repository mutating."""
1736 1737
1737 1738 def invalidatevolatilesets():
1738 1739 pass
1739 1740
1740 1741 def invalidatedirstate():
1741 1742 """Invalidate the dirstate."""
1742 1743
1743 1744 def invalidate(clearfilecache=False):
1744 1745 pass
1745 1746
1746 1747 def invalidateall():
1747 1748 pass
1748 1749
1749 1750 def lock(wait=True):
1750 1751 """Lock the repository store and return a lock instance."""
1751 1752
1752 1753 def wlock(wait=True):
1753 1754 """Lock the non-store parts of the repository."""
1754 1755
1755 1756 def currentwlock():
1756 1757 """Return the wlock if it's held or None."""
1757 1758
1758 1759 def checkcommitpatterns(wctx, match, status, fail):
1759 1760 pass
1760 1761
1761 1762 def commit(
1762 1763 text=b'',
1763 1764 user=None,
1764 1765 date=None,
1765 1766 match=None,
1766 1767 force=False,
1767 1768 editor=False,
1768 1769 extra=None,
1769 1770 ):
1770 1771 """Add a new revision to the repository."""
1771 1772
1772 1773 def commitctx(ctx, error=False, origctx=None):
1773 1774 """Commit a commitctx instance to the repository."""
1774 1775
1775 1776 def destroying():
1776 1777 """Inform the repository that nodes are about to be destroyed."""
1777 1778
1778 1779 def destroyed():
1779 1780 """Inform the repository that nodes have been destroyed."""
1780 1781
1781 1782 def status(
1782 1783 node1=b'.',
1783 1784 node2=None,
1784 1785 match=None,
1785 1786 ignored=False,
1786 1787 clean=False,
1787 1788 unknown=False,
1788 1789 listsubrepos=False,
1789 1790 ):
1790 1791 """Convenience method to call repo[x].status()."""
1791 1792
1792 1793 def addpostdsstatus(ps):
1793 1794 pass
1794 1795
1795 1796 def postdsstatus():
1796 1797 pass
1797 1798
1798 1799 def clearpostdsstatus():
1799 1800 pass
1800 1801
1801 1802 def heads(start=None):
1802 1803 """Obtain list of nodes that are DAG heads."""
1803 1804
1804 1805 def branchheads(branch=None, start=None, closed=False):
1805 1806 pass
1806 1807
1807 1808 def branches(nodes):
1808 1809 pass
1809 1810
1810 1811 def between(pairs):
1811 1812 pass
1812 1813
1813 1814 def checkpush(pushop):
1814 1815 pass
1815 1816
1816 1817 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1817 1818
1818 1819 def pushkey(namespace, key, old, new):
1819 1820 pass
1820 1821
1821 1822 def listkeys(namespace):
1822 1823 pass
1823 1824
1824 1825 def debugwireargs(one, two, three=None, four=None, five=None):
1825 1826 pass
1826 1827
1827 1828 def savecommitmessage(text):
1828 1829 pass
1829 1830
1830 1831
1831 1832 class completelocalrepository(
1832 1833 ilocalrepositorymain, ilocalrepositoryfilestorage
1833 1834 ):
1834 1835 """Complete interface for a local repository."""
1835 1836
1836 1837
1837 1838 class iwireprotocolcommandcacher(interfaceutil.Interface):
1838 1839 """Represents a caching backend for wire protocol commands.
1839 1840
1840 1841 Wire protocol version 2 supports transparent caching of many commands.
1841 1842 To leverage this caching, servers can activate objects that cache
1842 1843 command responses. Objects handle both cache writing and reading.
1843 1844 This interface defines how that response caching mechanism works.
1844 1845
1845 1846 Wire protocol version 2 commands emit a series of objects that are
1846 1847 serialized and sent to the client. The caching layer exists between
1847 1848 the invocation of the command function and the sending of its output
1848 1849 objects to an output layer.
1849 1850
1850 1851 Instances of this interface represent a binding to a cache that
1851 1852 can serve a response (in place of calling a command function) and/or
1852 1853 write responses to a cache for subsequent use.
1853 1854
1854 1855 When a command request arrives, the following happens with regards
1855 1856 to this interface:
1856 1857
1857 1858 1. The server determines whether the command request is cacheable.
1858 1859 2. If it is, an instance of this interface is spawned.
1859 1860 3. The cacher is activated in a context manager (``__enter__`` is called).
1860 1861 4. A cache *key* for that request is derived. This will call the
1861 1862 instance's ``adjustcachekeystate()`` method so the derivation
1862 1863 can be influenced.
1863 1864 5. The cacher is informed of the derived cache key via a call to
1864 1865 ``setcachekey()``.
1865 1866 6. The cacher's ``lookup()`` method is called to test for presence of
1866 1867 the derived key in the cache.
1867 1868 7. If ``lookup()`` returns a hit, that cached result is used in place
1868 1869 of invoking the command function. ``__exit__`` is called and the instance
1869 1870 is discarded.
1870 1871 8. The command function is invoked.
1871 1872 9. ``onobject()`` is called for each object emitted by the command
1872 1873 function.
1873 1874 10. After the final object is seen, ``onfinished()`` is called.
1874 1875 11. ``__exit__`` is called to signal the end of use of the instance.
1875 1876
1876 1877 Cache *key* derivation can be influenced by the instance.
1877 1878
1878 1879 Cache keys are initially derived by a deterministic representation of
1879 1880 the command request. This includes the command name, arguments, protocol
1880 1881 version, etc. This initial key derivation is performed by CBOR-encoding a
1881 1882 data structure and feeding that output into a hasher.
1882 1883
1883 1884 Instances of this interface can influence this initial key derivation
1884 1885 via ``adjustcachekeystate()``.
1885 1886
1886 1887 The instance is informed of the derived cache key via a call to
1887 1888 ``setcachekey()``. The instance must store the key locally so it can
1888 1889 be consulted on subsequent operations that may require it.
1889 1890
1890 1891 When constructed, the instance has access to a callable that can be used
1891 1892 for encoding response objects. This callable receives as its single
1892 1893 argument an object emitted by a command function. It returns an iterable
1893 1894 of bytes chunks representing the encoded object. Unless the cacher is
1894 1895 caching native Python objects in memory or has a way of reconstructing
1895 1896 the original Python objects, implementations typically call this function
1896 1897 to produce bytes from the output objects and then store those bytes in
1897 1898 the cache. When it comes time to re-emit those bytes, they are wrapped
1898 1899 in a ``wireprototypes.encodedresponse`` instance to tell the output
1899 1900 layer that they are pre-encoded.
1900 1901
1901 1902 When receiving the objects emitted by the command function, instances
1902 1903 can choose what to do with those objects. The simplest thing to do is
1903 1904 re-emit the original objects. They will be forwarded to the output
1904 1905 layer and will be processed as if the cacher did not exist.
1905 1906
1906 1907 Implementations could also choose to not emit objects - instead locally
1907 1908 buffering objects or their encoded representation. They could then emit
1908 1909 a single "coalesced" object when ``onfinished()`` is called. In
1909 1910 this way, the implementation would function as a filtering layer of
1910 1911 sorts.
1911 1912
1912 1913 When caching objects, typically the encoded form of the object will
1913 1914 be stored. Keep in mind that if the original object is forwarded to
1914 1915 the output layer, it will need to be encoded there as well. For large
1915 1916 output, this redundant encoding could add overhead. Implementations
1916 1917 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1917 1918 instances to avoid this overhead.
1918 1919 """
1919 1920
1920 1921 def __enter__():
1921 1922 """Marks the instance as active.
1922 1923
1923 1924 Should return self.
1924 1925 """
1925 1926
1926 1927 def __exit__(exctype, excvalue, exctb):
1927 1928 """Called when cacher is no longer used.
1928 1929
1929 1930 This can be used by implementations to perform cleanup actions (e.g.
1930 1931 disconnecting network sockets, aborting a partially cached response.
1931 1932 """
1932 1933
1933 1934 def adjustcachekeystate(state):
1934 1935 """Influences cache key derivation by adjusting state to derive key.
1935 1936
1936 1937 A dict defining the state used to derive the cache key is passed.
1937 1938
1938 1939 Implementations can modify this dict to record additional state that
1939 1940 is wanted to influence key derivation.
1940 1941
1941 1942 Implementations are *highly* encouraged to not modify or delete
1942 1943 existing keys.
1943 1944 """
1944 1945
1945 1946 def setcachekey(key):
1946 1947 """Record the derived cache key for this request.
1947 1948
1948 1949 Instances may mutate the key for internal usage, as desired. e.g.
1949 1950 instances may wish to prepend the repo name, introduce path
1950 1951 components for filesystem or URL addressing, etc. Behavior is up to
1951 1952 the cache.
1952 1953
1953 1954 Returns a bool indicating if the request is cacheable by this
1954 1955 instance.
1955 1956 """
1956 1957
1957 1958 def lookup():
1958 1959 """Attempt to resolve an entry in the cache.
1959 1960
1960 1961 The instance is instructed to look for the cache key that it was
1961 1962 informed about via the call to ``setcachekey()``.
1962 1963
1963 1964 If there's no cache hit or the cacher doesn't wish to use the cached
1964 1965 entry, ``None`` should be returned.
1965 1966
1966 1967 Else, a dict defining the cached result should be returned. The
1967 1968 dict may have the following keys:
1968 1969
1969 1970 objs
1970 1971 An iterable of objects that should be sent to the client. That
1971 1972 iterable of objects is expected to be what the command function
1972 1973 would return if invoked or an equivalent representation thereof.
1973 1974 """
1974 1975
1975 1976 def onobject(obj):
1976 1977 """Called when a new object is emitted from the command function.
1977 1978
1978 1979 Receives as its argument the object that was emitted from the
1979 1980 command function.
1980 1981
1981 1982 This method returns an iterator of objects to forward to the output
1982 1983 layer. The easiest implementation is a generator that just
1983 1984 ``yield obj``.
1984 1985 """
1985 1986
1986 1987 def onfinished():
1987 1988 """Called after all objects have been emitted from the command function.
1988 1989
1989 1990 Implementations should return an iterator of objects to forward to
1990 1991 the output layer.
1991 1992
1992 1993 This method can be a generator.
1993 1994 """
@@ -1,3086 +1,3087 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @attr.s(slots=True, frozen=True)
175 175 class _revisioninfo(object):
176 176 """Information about a revision that allows building its fulltext
177 177 node: expected hash of the revision
178 178 p1, p2: parent revs of the revision
179 179 btext: built text cache consisting of a one-element list
180 180 cachedelta: (baserev, uncompressed_delta) or None
181 181 flags: flags associated to the revision storage
182 182
183 183 One of btext[0] or cachedelta must be set.
184 184 """
185 185
186 186 node = attr.ib()
187 187 p1 = attr.ib()
188 188 p2 = attr.ib()
189 189 btext = attr.ib()
190 190 textlen = attr.ib()
191 191 cachedelta = attr.ib()
192 192 flags = attr.ib()
193 193
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 # index v0:
218 218 # 4 bytes: offset
219 219 # 4 bytes: compressed length
220 220 # 4 bytes: base rev
221 221 # 4 bytes: link rev
222 222 # 20 bytes: parent 1 nodeid
223 223 # 20 bytes: parent 2 nodeid
224 224 # 20 bytes: nodeid
225 225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 226 indexformatv0_pack = indexformatv0.pack
227 227 indexformatv0_unpack = indexformatv0.unpack
228 228
229 229
230 230 class revlogoldindex(list):
231 231 @property
232 232 def nodemap(self):
233 233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 235 return self._nodemap
236 236
237 237 @util.propertycache
238 238 def _nodemap(self):
239 239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 240 for r in range(0, len(self)):
241 241 n = self[r][7]
242 242 nodemap[n] = r
243 243 return nodemap
244 244
245 245 def has_node(self, node):
246 246 """return True if the node exist in the index"""
247 247 return node in self._nodemap
248 248
249 249 def rev(self, node):
250 250 """return a revision for a node
251 251
252 252 If the node is unknown, raise a RevlogError"""
253 253 return self._nodemap[node]
254 254
255 255 def get_rev(self, node):
256 256 """return a revision for a node
257 257
258 258 If the node is unknown, return None"""
259 259 return self._nodemap.get(node)
260 260
261 261 def append(self, tup):
262 262 self._nodemap[tup[7]] = len(self)
263 263 super(revlogoldindex, self).append(tup)
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 for r in pycompat.xrange(i.start, len(self)):
269 269 del self._nodemap[self[r][7]]
270 270 super(revlogoldindex, self).__delitem__(i)
271 271
272 272 def clearcaches(self):
273 273 self.__dict__.pop('_nodemap', None)
274 274
275 275 def __getitem__(self, i):
276 276 if i == -1:
277 277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 278 return list.__getitem__(self, i)
279 279
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off : off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (
297 297 offset_type(e[0], 0),
298 298 e[1],
299 299 -1,
300 300 e[2],
301 301 e[3],
302 302 nodemap.get(e[4], nullrev),
303 303 nodemap.get(e[5], nullrev),
304 304 e[6],
305 305 )
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 index = revlogoldindex(index)
311 311 return index, None
312 312
313 313 def packentry(self, entry, node, version, rev):
314 314 if gettype(entry[0]):
315 315 raise error.RevlogError(
316 316 _(b'index entry flags need revlog version 1')
317 317 )
318 318 e2 = (
319 319 getoffset(entry[0]),
320 320 entry[1],
321 321 entry[3],
322 322 entry[4],
323 323 node(entry[5]),
324 324 node(entry[6]),
325 325 entry[7],
326 326 )
327 327 return indexformatv0_pack(*e2)
328 328
329 329
330 330 # index ng:
331 331 # 6 bytes: offset
332 332 # 2 bytes: flags
333 333 # 4 bytes: compressed length
334 334 # 4 bytes: uncompressed length
335 335 # 4 bytes: base rev
336 336 # 4 bytes: link rev
337 337 # 4 bytes: parent 1 rev
338 338 # 4 bytes: parent 2 rev
339 339 # 32 bytes: nodeid
340 340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 341 indexformatng_pack = indexformatng.pack
342 342 versionformat = struct.Struct(b">I")
343 343 versionformat_pack = versionformat.pack
344 344 versionformat_unpack = versionformat.unpack
345 345
346 346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 347 # signed integer)
348 348 _maxentrysize = 0x7FFFFFFF
349 349
350 350
351 351 class revlogio(object):
352 352 def __init__(self):
353 353 self.size = indexformatng.size
354 354
355 355 def parseindex(self, data, inline):
356 356 # call the C implementation to parse the index data
357 357 index, cache = parsers.parse_index2(data, inline)
358 358 return index, cache
359 359
360 360 def packentry(self, entry, node, version, rev):
361 361 p = indexformatng_pack(*entry)
362 362 if rev == 0:
363 363 p = versionformat_pack(version) + p[4:]
364 364 return p
365 365
366 366
367 367 NodemapRevlogIO = None
368 368
369 369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 370
371 371 class NodemapRevlogIO(revlogio):
372 372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 373
374 374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 375 """
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 379 return index, cache
380 380
381 381
382 382 class rustrevlogio(revlogio):
383 383 def parseindex(self, data, inline):
384 384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 385 return rustrevlog.MixedIndex(index), cache
386 386
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412
413 413 If checkambig, indexfile is opened with checkambig=True at
414 414 writing, to avoid file stat ambiguity.
415 415
416 416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 417 index will be mmapped rather than read if it is larger than the
418 418 configured threshold.
419 419
420 420 If censorable is True, the revlog can have censored revisions.
421 421
422 422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 423 compression for the data content.
424 424 """
425 425
426 426 _flagserrorclass = error.RevlogError
427 427
428 428 def __init__(
429 429 self,
430 430 opener,
431 431 indexfile,
432 432 datafile=None,
433 433 checkambig=False,
434 434 mmaplargeindex=False,
435 435 censorable=False,
436 436 upperboundcomp=None,
437 437 persistentnodemap=False,
438 438 ):
439 439 """
440 440 create a revlog object
441 441
442 442 opener is a function that abstracts the file opening operation
443 443 and can be used to implement COW semantics or the like.
444 444
445 445 """
446 446 self.upperboundcomp = upperboundcomp
447 447 self.indexfile = indexfile
448 448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 449 self.nodemap_file = None
450 450 if persistentnodemap:
451 451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 452 opener, self.indexfile
453 453 )
454 454
455 455 self.opener = opener
456 456 # When True, indexfile is opened with checkambig=True at writing, to
457 457 # avoid file stat ambiguity.
458 458 self._checkambig = checkambig
459 459 self._mmaplargeindex = mmaplargeindex
460 460 self._censorable = censorable
461 461 # 3-tuple of (node, rev, text) for a raw revision.
462 462 self._revisioncache = None
463 463 # Maps rev to chain base rev.
464 464 self._chainbasecache = util.lrucachedict(100)
465 465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
466 466 self._chunkcache = (0, b'')
467 467 # How much data to read and cache into the raw revlog data cache.
468 468 self._chunkcachesize = 65536
469 469 self._maxchainlen = None
470 470 self._deltabothparents = True
471 471 self.index = None
472 472 self._nodemap_docket = None
473 473 # Mapping of partial identifiers to full nodes.
474 474 self._pcache = {}
475 475 # Mapping of revision integer to full node.
476 476 self._compengine = b'zlib'
477 477 self._compengineopts = {}
478 478 self._maxdeltachainspan = -1
479 479 self._withsparseread = False
480 480 self._sparserevlog = False
481 481 self._srdensitythreshold = 0.50
482 482 self._srmingapsize = 262144
483 483
484 484 # Make copy of flag processors so each revlog instance can support
485 485 # custom flags.
486 486 self._flagprocessors = dict(flagutil.flagprocessors)
487 487
488 488 # 2-tuple of file handles being used for active writing.
489 489 self._writinghandles = None
490 490
491 491 self._loadindex()
492 492
493 493 def _loadindex(self):
494 494 mmapindexthreshold = None
495 495 opts = self.opener.options
496 496
497 497 if b'revlogv2' in opts:
498 498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
499 499 elif b'revlogv1' in opts:
500 500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
501 501 if b'generaldelta' in opts:
502 502 newversionflags |= FLAG_GENERALDELTA
503 503 elif b'revlogv0' in self.opener.options:
504 504 newversionflags = REVLOGV0
505 505 else:
506 506 newversionflags = REVLOG_DEFAULT_VERSION
507 507
508 508 if b'chunkcachesize' in opts:
509 509 self._chunkcachesize = opts[b'chunkcachesize']
510 510 if b'maxchainlen' in opts:
511 511 self._maxchainlen = opts[b'maxchainlen']
512 512 if b'deltabothparents' in opts:
513 513 self._deltabothparents = opts[b'deltabothparents']
514 514 self._lazydelta = bool(opts.get(b'lazydelta', True))
515 515 self._lazydeltabase = False
516 516 if self._lazydelta:
517 517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
518 518 if b'compengine' in opts:
519 519 self._compengine = opts[b'compengine']
520 520 if b'zlib.level' in opts:
521 521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
522 522 if b'zstd.level' in opts:
523 523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
524 524 if b'maxdeltachainspan' in opts:
525 525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
526 526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
527 527 mmapindexthreshold = opts[b'mmapindexthreshold']
528 528 self.hassidedata = bool(opts.get(b'side-data', False))
529 529 if self.hassidedata:
530 530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
531 531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
532 532 withsparseread = bool(opts.get(b'with-sparse-read', False))
533 533 # sparse-revlog forces sparse-read
534 534 self._withsparseread = self._sparserevlog or withsparseread
535 535 if b'sparse-read-density-threshold' in opts:
536 536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
537 537 if b'sparse-read-min-gap-size' in opts:
538 538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
539 539 if opts.get(b'enableellipsis'):
540 540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
541 541
542 542 # revlog v0 doesn't have flag processors
543 543 for flag, processor in pycompat.iteritems(
544 544 opts.get(b'flagprocessors', {})
545 545 ):
546 546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
547 547
548 548 if self._chunkcachesize <= 0:
549 549 raise error.RevlogError(
550 550 _(b'revlog chunk cache size %r is not greater than 0')
551 551 % self._chunkcachesize
552 552 )
553 553 elif self._chunkcachesize & (self._chunkcachesize - 1):
554 554 raise error.RevlogError(
555 555 _(b'revlog chunk cache size %r is not a power of 2')
556 556 % self._chunkcachesize
557 557 )
558 558
559 559 indexdata = b''
560 560 self._initempty = True
561 561 try:
562 562 with self._indexfp() as f:
563 563 if (
564 564 mmapindexthreshold is not None
565 565 and self.opener.fstat(f).st_size >= mmapindexthreshold
566 566 ):
567 567 # TODO: should .close() to release resources without
568 568 # relying on Python GC
569 569 indexdata = util.buffer(util.mmapread(f))
570 570 else:
571 571 indexdata = f.read()
572 572 if len(indexdata) > 0:
573 573 versionflags = versionformat_unpack(indexdata[:4])[0]
574 574 self._initempty = False
575 575 else:
576 576 versionflags = newversionflags
577 577 except IOError as inst:
578 578 if inst.errno != errno.ENOENT:
579 579 raise
580 580
581 581 versionflags = newversionflags
582 582
583 583 self.version = versionflags
584 584
585 585 flags = versionflags & ~0xFFFF
586 586 fmt = versionflags & 0xFFFF
587 587
588 588 if fmt == REVLOGV0:
589 589 if flags:
590 590 raise error.RevlogError(
591 591 _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 % (flags >> 16, fmt, self.indexfile)
593 593 )
594 594
595 595 self._inline = False
596 596 self._generaldelta = False
597 597
598 598 elif fmt == REVLOGV1:
599 599 if flags & ~REVLOGV1_FLAGS:
600 600 raise error.RevlogError(
601 601 _(b'unknown flags (%#04x) in version %d revlog %s')
602 602 % (flags >> 16, fmt, self.indexfile)
603 603 )
604 604
605 605 self._inline = versionflags & FLAG_INLINE_DATA
606 606 self._generaldelta = versionflags & FLAG_GENERALDELTA
607 607
608 608 elif fmt == REVLOGV2:
609 609 if flags & ~REVLOGV2_FLAGS:
610 610 raise error.RevlogError(
611 611 _(b'unknown flags (%#04x) in version %d revlog %s')
612 612 % (flags >> 16, fmt, self.indexfile)
613 613 )
614 614
615 615 self._inline = versionflags & FLAG_INLINE_DATA
616 616 # generaldelta implied by version 2 revlogs.
617 617 self._generaldelta = True
618 618
619 619 else:
620 620 raise error.RevlogError(
621 621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
622 622 )
623 623 # sparse-revlog can't be on without general-delta (issue6056)
624 624 if not self._generaldelta:
625 625 self._sparserevlog = False
626 626
627 627 self._storedeltachains = True
628 628
629 629 devel_nodemap = (
630 630 self.nodemap_file
631 631 and opts.get(b'devel-force-nodemap', False)
632 632 and NodemapRevlogIO is not None
633 633 )
634 634
635 635 use_rust_index = False
636 636 if rustrevlog is not None:
637 637 if self.nodemap_file is not None:
638 638 use_rust_index = True
639 639 else:
640 640 use_rust_index = self.opener.options.get(b'rust.index')
641 641
642 642 self._io = revlogio()
643 643 if self.version == REVLOGV0:
644 644 self._io = revlogoldio()
645 645 elif devel_nodemap:
646 646 self._io = NodemapRevlogIO()
647 647 elif use_rust_index:
648 648 self._io = rustrevlogio()
649 649 try:
650 650 d = self._io.parseindex(indexdata, self._inline)
651 651 index, _chunkcache = d
652 652 use_nodemap = (
653 653 not self._inline
654 654 and self.nodemap_file is not None
655 655 and util.safehasattr(index, 'update_nodemap_data')
656 656 )
657 657 if use_nodemap:
658 658 nodemap_data = nodemaputil.persisted_data(self)
659 659 if nodemap_data is not None:
660 660 docket = nodemap_data[0]
661 661 if (
662 662 len(d[0]) > docket.tip_rev
663 663 and d[0][docket.tip_rev][7] == docket.tip_node
664 664 ):
665 665 # no changelog tampering
666 666 self._nodemap_docket = docket
667 667 index.update_nodemap_data(*nodemap_data)
668 668 except (ValueError, IndexError):
669 669 raise error.RevlogError(
670 670 _(b"index %s is corrupted") % self.indexfile
671 671 )
672 672 self.index, self._chunkcache = d
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = util.lrucachedict(500)
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def _compressor(self):
682 682 engine = util.compengines[self._compengine]
683 683 return engine.revlogcompressor(self._compengineopts)
684 684
685 685 def _indexfp(self, mode=b'r'):
686 686 """file object for the revlog's index file"""
687 687 args = {'mode': mode}
688 688 if mode != b'r':
689 689 args['checkambig'] = self._checkambig
690 690 if mode == b'w':
691 691 args['atomictemp'] = True
692 692 return self.opener(self.indexfile, **args)
693 693
694 694 def _datafp(self, mode=b'r'):
695 695 """file object for the revlog's data file"""
696 696 return self.opener(self.datafile, mode=mode)
697 697
698 698 @contextlib.contextmanager
699 699 def _datareadfp(self, existingfp=None):
700 700 """file object suitable to read data"""
701 701 # Use explicit file handle, if given.
702 702 if existingfp is not None:
703 703 yield existingfp
704 704
705 705 # Use a file handle being actively used for writes, if available.
706 706 # There is some danger to doing this because reads will seek the
707 707 # file. However, _writeentry() performs a SEEK_END before all writes,
708 708 # so we should be safe.
709 709 elif self._writinghandles:
710 710 if self._inline:
711 711 yield self._writinghandles[0]
712 712 else:
713 713 yield self._writinghandles[1]
714 714
715 715 # Otherwise open a new file handle.
716 716 else:
717 717 if self._inline:
718 718 func = self._indexfp
719 719 else:
720 720 func = self._datafp
721 721 with func() as fp:
722 722 yield fp
723 723
724 724 def tiprev(self):
725 725 return len(self.index) - 1
726 726
727 727 def tip(self):
728 728 return self.node(self.tiprev())
729 729
730 730 def __contains__(self, rev):
731 731 return 0 <= rev < len(self)
732 732
733 733 def __len__(self):
734 734 return len(self.index)
735 735
736 736 def __iter__(self):
737 737 return iter(pycompat.xrange(len(self)))
738 738
739 739 def revs(self, start=0, stop=None):
740 740 """iterate over all rev in this revlog (from start to stop)"""
741 741 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 742
743 743 @property
744 744 def nodemap(self):
745 745 msg = (
746 746 b"revlog.nodemap is deprecated, "
747 747 b"use revlog.index.[has_node|rev|get_rev]"
748 748 )
749 749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 750 return self.index.nodemap
751 751
752 752 @property
753 753 def _nodecache(self):
754 754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 756 return self.index.nodemap
757 757
758 758 def hasnode(self, node):
759 759 try:
760 760 self.rev(node)
761 761 return True
762 762 except KeyError:
763 763 return False
764 764
765 765 def candelta(self, baserev, rev):
766 766 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 767 # Disable delta if either rev requires a content-changing flag
768 768 # processor (ex. LFS). This is because such flag processor can alter
769 769 # the rawtext content that the delta will be based on, and two clients
770 770 # could have a same revlog node with different flags (i.e. different
771 771 # rawtext contents) and the delta could be incompatible.
772 772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 774 ):
775 775 return False
776 776 return True
777 777
778 778 def update_caches(self, transaction):
779 779 if self.nodemap_file is not None:
780 780 if transaction is None:
781 781 nodemaputil.update_persistent_nodemap(self)
782 782 else:
783 783 nodemaputil.setup_persistent_nodemap(transaction, self)
784 784
785 785 def clearcaches(self):
786 786 self._revisioncache = None
787 787 self._chainbasecache.clear()
788 788 self._chunkcache = (0, b'')
789 789 self._pcache = {}
790 790 self._nodemap_docket = None
791 791 self.index.clearcaches()
792 792 # The python code is the one responsible for validating the docket, we
793 793 # end up having to refresh it here.
794 794 use_nodemap = (
795 795 not self._inline
796 796 and self.nodemap_file is not None
797 797 and util.safehasattr(self.index, 'update_nodemap_data')
798 798 )
799 799 if use_nodemap:
800 800 nodemap_data = nodemaputil.persisted_data(self)
801 801 if nodemap_data is not None:
802 802 self._nodemap_docket = nodemap_data[0]
803 803 self.index.update_nodemap_data(*nodemap_data)
804 804
805 805 def rev(self, node):
806 806 try:
807 807 return self.index.rev(node)
808 808 except TypeError:
809 809 raise
810 810 except error.RevlogError:
811 811 # parsers.c radix tree lookup failed
812 812 if node == wdirid or node in wdirfilenodeids:
813 813 raise error.WdirUnsupported
814 814 raise error.LookupError(node, self.indexfile, _(b'no node'))
815 815
816 816 # Accessors for index entries.
817 817
818 818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
819 819 # are flags.
820 820 def start(self, rev):
821 821 return int(self.index[rev][0] >> 16)
822 822
823 823 def flags(self, rev):
824 824 return self.index[rev][0] & 0xFFFF
825 825
826 826 def length(self, rev):
827 827 return self.index[rev][1]
828 828
829 829 def rawsize(self, rev):
830 830 """return the length of the uncompressed text for a given revision"""
831 831 l = self.index[rev][2]
832 832 if l >= 0:
833 833 return l
834 834
835 835 t = self.rawdata(rev)
836 836 return len(t)
837 837
838 838 def size(self, rev):
839 839 """length of non-raw text (processed by a "read" flag processor)"""
840 840 # fast path: if no "read" flag processor could change the content,
841 841 # size is rawsize. note: ELLIPSIS is known to not change the content.
842 842 flags = self.flags(rev)
843 843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
844 844 return self.rawsize(rev)
845 845
846 846 return len(self.revision(rev, raw=False))
847 847
848 848 def chainbase(self, rev):
849 849 base = self._chainbasecache.get(rev)
850 850 if base is not None:
851 851 return base
852 852
853 853 index = self.index
854 854 iterrev = rev
855 855 base = index[iterrev][3]
856 856 while base != iterrev:
857 857 iterrev = base
858 858 base = index[iterrev][3]
859 859
860 860 self._chainbasecache[rev] = base
861 861 return base
862 862
863 863 def linkrev(self, rev):
864 864 return self.index[rev][4]
865 865
866 866 def parentrevs(self, rev):
867 867 try:
868 868 entry = self.index[rev]
869 869 except IndexError:
870 870 if rev == wdirrev:
871 871 raise error.WdirUnsupported
872 872 raise
873 873
874 874 return entry[5], entry[6]
875 875
876 876 # fast parentrevs(rev) where rev isn't filtered
877 877 _uncheckedparentrevs = parentrevs
878 878
879 879 def node(self, rev):
880 880 try:
881 881 return self.index[rev][7]
882 882 except IndexError:
883 883 if rev == wdirrev:
884 884 raise error.WdirUnsupported
885 885 raise
886 886
887 887 # Derived from index values.
888 888
889 889 def end(self, rev):
890 890 return self.start(rev) + self.length(rev)
891 891
892 892 def parents(self, node):
893 893 i = self.index
894 894 d = i[self.rev(node)]
895 895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
896 896
897 897 def chainlen(self, rev):
898 898 return self._chaininfo(rev)[0]
899 899
900 900 def _chaininfo(self, rev):
901 901 chaininfocache = self._chaininfocache
902 902 if rev in chaininfocache:
903 903 return chaininfocache[rev]
904 904 index = self.index
905 905 generaldelta = self._generaldelta
906 906 iterrev = rev
907 907 e = index[iterrev]
908 908 clen = 0
909 909 compresseddeltalen = 0
910 910 while iterrev != e[3]:
911 911 clen += 1
912 912 compresseddeltalen += e[1]
913 913 if generaldelta:
914 914 iterrev = e[3]
915 915 else:
916 916 iterrev -= 1
917 917 if iterrev in chaininfocache:
918 918 t = chaininfocache[iterrev]
919 919 clen += t[0]
920 920 compresseddeltalen += t[1]
921 921 break
922 922 e = index[iterrev]
923 923 else:
924 924 # Add text length of base since decompressing that also takes
925 925 # work. For cache hits the length is already included.
926 926 compresseddeltalen += e[1]
927 927 r = (clen, compresseddeltalen)
928 928 chaininfocache[rev] = r
929 929 return r
930 930
931 931 def _deltachain(self, rev, stoprev=None):
932 932 """Obtain the delta chain for a revision.
933 933
934 934 ``stoprev`` specifies a revision to stop at. If not specified, we
935 935 stop at the base of the chain.
936 936
937 937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
938 938 revs in ascending order and ``stopped`` is a bool indicating whether
939 939 ``stoprev`` was hit.
940 940 """
941 941 # Try C implementation.
942 942 try:
943 943 return self.index.deltachain(rev, stoprev, self._generaldelta)
944 944 except AttributeError:
945 945 pass
946 946
947 947 chain = []
948 948
949 949 # Alias to prevent attribute lookup in tight loop.
950 950 index = self.index
951 951 generaldelta = self._generaldelta
952 952
953 953 iterrev = rev
954 954 e = index[iterrev]
955 955 while iterrev != e[3] and iterrev != stoprev:
956 956 chain.append(iterrev)
957 957 if generaldelta:
958 958 iterrev = e[3]
959 959 else:
960 960 iterrev -= 1
961 961 e = index[iterrev]
962 962
963 963 if iterrev == stoprev:
964 964 stopped = True
965 965 else:
966 966 chain.append(iterrev)
967 967 stopped = False
968 968
969 969 chain.reverse()
970 970 return chain, stopped
971 971
972 972 def ancestors(self, revs, stoprev=0, inclusive=False):
973 973 """Generate the ancestors of 'revs' in reverse revision order.
974 974 Does not generate revs lower than stoprev.
975 975
976 976 See the documentation for ancestor.lazyancestors for more details."""
977 977
978 978 # first, make sure start revisions aren't filtered
979 979 revs = list(revs)
980 980 checkrev = self.node
981 981 for r in revs:
982 982 checkrev(r)
983 983 # and we're sure ancestors aren't filtered as well
984 984
985 985 if rustancestor is not None:
986 986 lazyancestors = rustancestor.LazyAncestors
987 987 arg = self.index
988 988 else:
989 989 lazyancestors = ancestor.lazyancestors
990 990 arg = self._uncheckedparentrevs
991 991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
992 992
993 993 def descendants(self, revs):
994 994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
995 995
996 996 def findcommonmissing(self, common=None, heads=None):
997 997 """Return a tuple of the ancestors of common and the ancestors of heads
998 998 that are not ancestors of common. In revset terminology, we return the
999 999 tuple:
1000 1000
1001 1001 ::common, (::heads) - (::common)
1002 1002
1003 1003 The list is sorted by revision number, meaning it is
1004 1004 topologically sorted.
1005 1005
1006 1006 'heads' and 'common' are both lists of node IDs. If heads is
1007 1007 not supplied, uses all of the revlog's heads. If common is not
1008 1008 supplied, uses nullid."""
1009 1009 if common is None:
1010 1010 common = [nullid]
1011 1011 if heads is None:
1012 1012 heads = self.heads()
1013 1013
1014 1014 common = [self.rev(n) for n in common]
1015 1015 heads = [self.rev(n) for n in heads]
1016 1016
1017 1017 # we want the ancestors, but inclusive
1018 1018 class lazyset(object):
1019 1019 def __init__(self, lazyvalues):
1020 1020 self.addedvalues = set()
1021 1021 self.lazyvalues = lazyvalues
1022 1022
1023 1023 def __contains__(self, value):
1024 1024 return value in self.addedvalues or value in self.lazyvalues
1025 1025
1026 1026 def __iter__(self):
1027 1027 added = self.addedvalues
1028 1028 for r in added:
1029 1029 yield r
1030 1030 for r in self.lazyvalues:
1031 1031 if not r in added:
1032 1032 yield r
1033 1033
1034 1034 def add(self, value):
1035 1035 self.addedvalues.add(value)
1036 1036
1037 1037 def update(self, values):
1038 1038 self.addedvalues.update(values)
1039 1039
1040 1040 has = lazyset(self.ancestors(common))
1041 1041 has.add(nullrev)
1042 1042 has.update(common)
1043 1043
1044 1044 # take all ancestors from heads that aren't in has
1045 1045 missing = set()
1046 1046 visit = collections.deque(r for r in heads if r not in has)
1047 1047 while visit:
1048 1048 r = visit.popleft()
1049 1049 if r in missing:
1050 1050 continue
1051 1051 else:
1052 1052 missing.add(r)
1053 1053 for p in self.parentrevs(r):
1054 1054 if p not in has:
1055 1055 visit.append(p)
1056 1056 missing = list(missing)
1057 1057 missing.sort()
1058 1058 return has, [self.node(miss) for miss in missing]
1059 1059
1060 1060 def incrementalmissingrevs(self, common=None):
1061 1061 """Return an object that can be used to incrementally compute the
1062 1062 revision numbers of the ancestors of arbitrary sets that are not
1063 1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1064 1064 object.
1065 1065
1066 1066 'common' is a list of revision numbers. If common is not supplied, uses
1067 1067 nullrev.
1068 1068 """
1069 1069 if common is None:
1070 1070 common = [nullrev]
1071 1071
1072 1072 if rustancestor is not None:
1073 1073 return rustancestor.MissingAncestors(self.index, common)
1074 1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1075 1075
1076 1076 def findmissingrevs(self, common=None, heads=None):
1077 1077 """Return the revision numbers of the ancestors of heads that
1078 1078 are not ancestors of common.
1079 1079
1080 1080 More specifically, return a list of revision numbers corresponding to
1081 1081 nodes N such that every N satisfies the following constraints:
1082 1082
1083 1083 1. N is an ancestor of some node in 'heads'
1084 1084 2. N is not an ancestor of any node in 'common'
1085 1085
1086 1086 The list is sorted by revision number, meaning it is
1087 1087 topologically sorted.
1088 1088
1089 1089 'heads' and 'common' are both lists of revision numbers. If heads is
1090 1090 not supplied, uses all of the revlog's heads. If common is not
1091 1091 supplied, uses nullid."""
1092 1092 if common is None:
1093 1093 common = [nullrev]
1094 1094 if heads is None:
1095 1095 heads = self.headrevs()
1096 1096
1097 1097 inc = self.incrementalmissingrevs(common=common)
1098 1098 return inc.missingancestors(heads)
1099 1099
1100 1100 def findmissing(self, common=None, heads=None):
1101 1101 """Return the ancestors of heads that are not ancestors of common.
1102 1102
1103 1103 More specifically, return a list of nodes N such that every N
1104 1104 satisfies the following constraints:
1105 1105
1106 1106 1. N is an ancestor of some node in 'heads'
1107 1107 2. N is not an ancestor of any node in 'common'
1108 1108
1109 1109 The list is sorted by revision number, meaning it is
1110 1110 topologically sorted.
1111 1111
1112 1112 'heads' and 'common' are both lists of node IDs. If heads is
1113 1113 not supplied, uses all of the revlog's heads. If common is not
1114 1114 supplied, uses nullid."""
1115 1115 if common is None:
1116 1116 common = [nullid]
1117 1117 if heads is None:
1118 1118 heads = self.heads()
1119 1119
1120 1120 common = [self.rev(n) for n in common]
1121 1121 heads = [self.rev(n) for n in heads]
1122 1122
1123 1123 inc = self.incrementalmissingrevs(common=common)
1124 1124 return [self.node(r) for r in inc.missingancestors(heads)]
1125 1125
1126 1126 def nodesbetween(self, roots=None, heads=None):
1127 1127 """Return a topological path from 'roots' to 'heads'.
1128 1128
1129 1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1130 1130 topologically sorted list of all nodes N that satisfy both of
1131 1131 these constraints:
1132 1132
1133 1133 1. N is a descendant of some node in 'roots'
1134 1134 2. N is an ancestor of some node in 'heads'
1135 1135
1136 1136 Every node is considered to be both a descendant and an ancestor
1137 1137 of itself, so every reachable node in 'roots' and 'heads' will be
1138 1138 included in 'nodes'.
1139 1139
1140 1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1141 1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1142 1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1143 1143
1144 1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1145 1145 unspecified, uses nullid as the only root. If 'heads' is
1146 1146 unspecified, uses list of all of the revlog's heads."""
1147 1147 nonodes = ([], [], [])
1148 1148 if roots is not None:
1149 1149 roots = list(roots)
1150 1150 if not roots:
1151 1151 return nonodes
1152 1152 lowestrev = min([self.rev(n) for n in roots])
1153 1153 else:
1154 1154 roots = [nullid] # Everybody's a descendant of nullid
1155 1155 lowestrev = nullrev
1156 1156 if (lowestrev == nullrev) and (heads is None):
1157 1157 # We want _all_ the nodes!
1158 1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1159 1159 if heads is None:
1160 1160 # All nodes are ancestors, so the latest ancestor is the last
1161 1161 # node.
1162 1162 highestrev = len(self) - 1
1163 1163 # Set ancestors to None to signal that every node is an ancestor.
1164 1164 ancestors = None
1165 1165 # Set heads to an empty dictionary for later discovery of heads
1166 1166 heads = {}
1167 1167 else:
1168 1168 heads = list(heads)
1169 1169 if not heads:
1170 1170 return nonodes
1171 1171 ancestors = set()
1172 1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 1173 # Also, later we will be using it to filter out the heads we can't
1174 1174 # find from roots.
1175 1175 heads = dict.fromkeys(heads, False)
1176 1176 # Start at the top and keep marking parents until we're done.
1177 1177 nodestotag = set(heads)
1178 1178 # Remember where the top was so we can use it as a limit later.
1179 1179 highestrev = max([self.rev(n) for n in nodestotag])
1180 1180 while nodestotag:
1181 1181 # grab a node to tag
1182 1182 n = nodestotag.pop()
1183 1183 # Never tag nullid
1184 1184 if n == nullid:
1185 1185 continue
1186 1186 # A node's revision number represents its place in a
1187 1187 # topologically sorted list of nodes.
1188 1188 r = self.rev(n)
1189 1189 if r >= lowestrev:
1190 1190 if n not in ancestors:
1191 1191 # If we are possibly a descendant of one of the roots
1192 1192 # and we haven't already been marked as an ancestor
1193 1193 ancestors.add(n) # Mark as ancestor
1194 1194 # Add non-nullid parents to list of nodes to tag.
1195 1195 nodestotag.update(
1196 1196 [p for p in self.parents(n) if p != nullid]
1197 1197 )
1198 1198 elif n in heads: # We've seen it before, is it a fake head?
1199 1199 # So it is, real heads should not be the ancestors of
1200 1200 # any other heads.
1201 1201 heads.pop(n)
1202 1202 if not ancestors:
1203 1203 return nonodes
1204 1204 # Now that we have our set of ancestors, we want to remove any
1205 1205 # roots that are not ancestors.
1206 1206
1207 1207 # If one of the roots was nullid, everything is included anyway.
1208 1208 if lowestrev > nullrev:
1209 1209 # But, since we weren't, let's recompute the lowest rev to not
1210 1210 # include roots that aren't ancestors.
1211 1211
1212 1212 # Filter out roots that aren't ancestors of heads
1213 1213 roots = [root for root in roots if root in ancestors]
1214 1214 # Recompute the lowest revision
1215 1215 if roots:
1216 1216 lowestrev = min([self.rev(root) for root in roots])
1217 1217 else:
1218 1218 # No more roots? Return empty list
1219 1219 return nonodes
1220 1220 else:
1221 1221 # We are descending from nullid, and don't need to care about
1222 1222 # any other roots.
1223 1223 lowestrev = nullrev
1224 1224 roots = [nullid]
1225 1225 # Transform our roots list into a set.
1226 1226 descendants = set(roots)
1227 1227 # Also, keep the original roots so we can filter out roots that aren't
1228 1228 # 'real' roots (i.e. are descended from other roots).
1229 1229 roots = descendants.copy()
1230 1230 # Our topologically sorted list of output nodes.
1231 1231 orderedout = []
1232 1232 # Don't start at nullid since we don't want nullid in our output list,
1233 1233 # and if nullid shows up in descendants, empty parents will look like
1234 1234 # they're descendants.
1235 1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 1236 n = self.node(r)
1237 1237 isdescendant = False
1238 1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 1239 isdescendant = True
1240 1240 elif n in descendants:
1241 1241 # n is already a descendant
1242 1242 isdescendant = True
1243 1243 # This check only needs to be done here because all the roots
1244 1244 # will start being marked is descendants before the loop.
1245 1245 if n in roots:
1246 1246 # If n was a root, check if it's a 'real' root.
1247 1247 p = tuple(self.parents(n))
1248 1248 # If any of its parents are descendants, it's not a root.
1249 1249 if (p[0] in descendants) or (p[1] in descendants):
1250 1250 roots.remove(n)
1251 1251 else:
1252 1252 p = tuple(self.parents(n))
1253 1253 # A node is a descendant if either of its parents are
1254 1254 # descendants. (We seeded the dependents list with the roots
1255 1255 # up there, remember?)
1256 1256 if (p[0] in descendants) or (p[1] in descendants):
1257 1257 descendants.add(n)
1258 1258 isdescendant = True
1259 1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 1260 # Only include nodes that are both descendants and ancestors.
1261 1261 orderedout.append(n)
1262 1262 if (ancestors is not None) and (n in heads):
1263 1263 # We're trying to figure out which heads are reachable
1264 1264 # from roots.
1265 1265 # Mark this head as having been reached
1266 1266 heads[n] = True
1267 1267 elif ancestors is None:
1268 1268 # Otherwise, we're trying to discover the heads.
1269 1269 # Assume this is a head because if it isn't, the next step
1270 1270 # will eventually remove it.
1271 1271 heads[n] = True
1272 1272 # But, obviously its parents aren't.
1273 1273 for p in self.parents(n):
1274 1274 heads.pop(p, None)
1275 1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 1276 roots = list(roots)
1277 1277 assert orderedout
1278 1278 assert roots
1279 1279 assert heads
1280 1280 return (orderedout, roots, heads)
1281 1281
1282 1282 def headrevs(self, revs=None):
1283 1283 if revs is None:
1284 1284 try:
1285 1285 return self.index.headrevs()
1286 1286 except AttributeError:
1287 1287 return self._headrevs()
1288 1288 if rustdagop is not None:
1289 1289 return rustdagop.headrevs(self.index, revs)
1290 1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291 1291
1292 1292 def computephases(self, roots):
1293 1293 return self.index.computephasesmapsets(roots)
1294 1294
1295 1295 def _headrevs(self):
1296 1296 count = len(self)
1297 1297 if not count:
1298 1298 return [nullrev]
1299 1299 # we won't iter over filtered rev so nobody is a head at start
1300 1300 ishead = [0] * (count + 1)
1301 1301 index = self.index
1302 1302 for r in self:
1303 1303 ishead[r] = 1 # I may be an head
1304 1304 e = index[r]
1305 1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 1306 return [r for r, val in enumerate(ishead) if val]
1307 1307
1308 1308 def heads(self, start=None, stop=None):
1309 1309 """return the list of all nodes that have no children
1310 1310
1311 1311 if start is specified, only heads that are descendants of
1312 1312 start will be returned
1313 1313 if stop is specified, it will consider all the revs from stop
1314 1314 as if they had no children
1315 1315 """
1316 1316 if start is None and stop is None:
1317 1317 if not len(self):
1318 1318 return [nullid]
1319 1319 return [self.node(r) for r in self.headrevs()]
1320 1320
1321 1321 if start is None:
1322 1322 start = nullrev
1323 1323 else:
1324 1324 start = self.rev(start)
1325 1325
1326 1326 stoprevs = {self.rev(n) for n in stop or []}
1327 1327
1328 1328 revs = dagop.headrevssubset(
1329 1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 1330 )
1331 1331
1332 1332 return [self.node(rev) for rev in revs]
1333 1333
1334 1334 def children(self, node):
1335 1335 """find the children of a given node"""
1336 1336 c = []
1337 1337 p = self.rev(node)
1338 1338 for r in self.revs(start=p + 1):
1339 1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 1340 if prevs:
1341 1341 for pr in prevs:
1342 1342 if pr == p:
1343 1343 c.append(self.node(r))
1344 1344 elif p == nullrev:
1345 1345 c.append(self.node(r))
1346 1346 return c
1347 1347
1348 1348 def commonancestorsheads(self, a, b):
1349 1349 """calculate all the heads of the common ancestors of nodes a and b"""
1350 1350 a, b = self.rev(a), self.rev(b)
1351 1351 ancs = self._commonancestorsheads(a, b)
1352 1352 return pycompat.maplist(self.node, ancs)
1353 1353
1354 1354 def _commonancestorsheads(self, *revs):
1355 1355 """calculate all the heads of the common ancestors of revs"""
1356 1356 try:
1357 1357 ancs = self.index.commonancestorsheads(*revs)
1358 1358 except (AttributeError, OverflowError): # C implementation failed
1359 1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 1360 return ancs
1361 1361
1362 1362 def isancestor(self, a, b):
1363 1363 """return True if node a is an ancestor of node b
1364 1364
1365 1365 A revision is considered an ancestor of itself."""
1366 1366 a, b = self.rev(a), self.rev(b)
1367 1367 return self.isancestorrev(a, b)
1368 1368
1369 1369 def isancestorrev(self, a, b):
1370 1370 """return True if revision a is an ancestor of revision b
1371 1371
1372 1372 A revision is considered an ancestor of itself.
1373 1373
1374 1374 The implementation of this is trivial but the use of
1375 1375 reachableroots is not."""
1376 1376 if a == nullrev:
1377 1377 return True
1378 1378 elif a == b:
1379 1379 return True
1380 1380 elif a > b:
1381 1381 return False
1382 1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383 1383
1384 1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 1385 """return (heads(::(<roots> and <roots>::<heads>)))
1386 1386
1387 1387 If includepath is True, return (<roots>::<heads>)."""
1388 1388 try:
1389 1389 return self.index.reachableroots2(
1390 1390 minroot, heads, roots, includepath
1391 1391 )
1392 1392 except AttributeError:
1393 1393 return dagop._reachablerootspure(
1394 1394 self.parentrevs, minroot, roots, heads, includepath
1395 1395 )
1396 1396
1397 1397 def ancestor(self, a, b):
1398 1398 """calculate the "best" common ancestor of nodes a and b"""
1399 1399
1400 1400 a, b = self.rev(a), self.rev(b)
1401 1401 try:
1402 1402 ancs = self.index.ancestors(a, b)
1403 1403 except (AttributeError, OverflowError):
1404 1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 1405 if ancs:
1406 1406 # choose a consistent winner when there's a tie
1407 1407 return min(map(self.node, ancs))
1408 1408 return nullid
1409 1409
1410 1410 def _match(self, id):
1411 1411 if isinstance(id, int):
1412 1412 # rev
1413 1413 return self.node(id)
1414 1414 if len(id) == 20:
1415 1415 # possibly a binary node
1416 1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 1417 try:
1418 1418 node = id
1419 1419 self.rev(node) # quick search the index
1420 1420 return node
1421 1421 except error.LookupError:
1422 1422 pass # may be partial hex id
1423 1423 try:
1424 1424 # str(rev)
1425 1425 rev = int(id)
1426 1426 if b"%d" % rev != id:
1427 1427 raise ValueError
1428 1428 if rev < 0:
1429 1429 rev = len(self) + rev
1430 1430 if rev < 0 or rev >= len(self):
1431 1431 raise ValueError
1432 1432 return self.node(rev)
1433 1433 except (ValueError, OverflowError):
1434 1434 pass
1435 1435 if len(id) == 40:
1436 1436 try:
1437 1437 # a full hex nodeid?
1438 1438 node = bin(id)
1439 1439 self.rev(node)
1440 1440 return node
1441 1441 except (TypeError, error.LookupError):
1442 1442 pass
1443 1443
1444 1444 def _partialmatch(self, id):
1445 1445 # we don't care wdirfilenodeids as they should be always full hash
1446 1446 maybewdir = wdirhex.startswith(id)
1447 1447 try:
1448 1448 partial = self.index.partialmatch(id)
1449 1449 if partial and self.hasnode(partial):
1450 1450 if maybewdir:
1451 1451 # single 'ff...' match in radix tree, ambiguous with wdir
1452 1452 raise error.RevlogError
1453 1453 return partial
1454 1454 if maybewdir:
1455 1455 # no 'ff...' match in radix tree, wdir identified
1456 1456 raise error.WdirUnsupported
1457 1457 return None
1458 1458 except error.RevlogError:
1459 1459 # parsers.c radix tree lookup gave multiple matches
1460 1460 # fast path: for unfiltered changelog, radix tree is accurate
1461 1461 if not getattr(self, 'filteredrevs', None):
1462 1462 raise error.AmbiguousPrefixLookupError(
1463 1463 id, self.indexfile, _(b'ambiguous identifier')
1464 1464 )
1465 1465 # fall through to slow path that filters hidden revisions
1466 1466 except (AttributeError, ValueError):
1467 1467 # we are pure python, or key was too short to search radix tree
1468 1468 pass
1469 1469
1470 1470 if id in self._pcache:
1471 1471 return self._pcache[id]
1472 1472
1473 1473 if len(id) <= 40:
1474 1474 try:
1475 1475 # hex(node)[:...]
1476 1476 l = len(id) // 2 # grab an even number of digits
1477 1477 prefix = bin(id[: l * 2])
1478 1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 1479 nl = [
1480 1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 1481 ]
1482 1482 if nullhex.startswith(id):
1483 1483 nl.append(nullid)
1484 1484 if len(nl) > 0:
1485 1485 if len(nl) == 1 and not maybewdir:
1486 1486 self._pcache[id] = nl[0]
1487 1487 return nl[0]
1488 1488 raise error.AmbiguousPrefixLookupError(
1489 1489 id, self.indexfile, _(b'ambiguous identifier')
1490 1490 )
1491 1491 if maybewdir:
1492 1492 raise error.WdirUnsupported
1493 1493 return None
1494 1494 except TypeError:
1495 1495 pass
1496 1496
1497 1497 def lookup(self, id):
1498 1498 """locate a node based on:
1499 1499 - revision number or str(revision number)
1500 1500 - nodeid or subset of hex nodeid
1501 1501 """
1502 1502 n = self._match(id)
1503 1503 if n is not None:
1504 1504 return n
1505 1505 n = self._partialmatch(id)
1506 1506 if n:
1507 1507 return n
1508 1508
1509 1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510 1510
1511 1511 def shortest(self, node, minlength=1):
1512 1512 """Find the shortest unambiguous prefix that matches node."""
1513 1513
1514 1514 def isvalid(prefix):
1515 1515 try:
1516 1516 matchednode = self._partialmatch(prefix)
1517 1517 except error.AmbiguousPrefixLookupError:
1518 1518 return False
1519 1519 except error.WdirUnsupported:
1520 1520 # single 'ff...' match
1521 1521 return True
1522 1522 if matchednode is None:
1523 1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 1524 return True
1525 1525
1526 1526 def maybewdir(prefix):
1527 1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528 1528
1529 1529 hexnode = hex(node)
1530 1530
1531 1531 def disambiguate(hexnode, minlength):
1532 1532 """Disambiguate against wdirid."""
1533 1533 for length in range(minlength, len(hexnode) + 1):
1534 1534 prefix = hexnode[:length]
1535 1535 if not maybewdir(prefix):
1536 1536 return prefix
1537 1537
1538 1538 if not getattr(self, 'filteredrevs', None):
1539 1539 try:
1540 1540 length = max(self.index.shortest(node), minlength)
1541 1541 return disambiguate(hexnode, length)
1542 1542 except error.RevlogError:
1543 1543 if node != wdirid:
1544 1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 1545 except AttributeError:
1546 1546 # Fall through to pure code
1547 1547 pass
1548 1548
1549 1549 if node == wdirid:
1550 1550 for length in range(minlength, len(hexnode) + 1):
1551 1551 prefix = hexnode[:length]
1552 1552 if isvalid(prefix):
1553 1553 return prefix
1554 1554
1555 1555 for length in range(minlength, len(hexnode) + 1):
1556 1556 prefix = hexnode[:length]
1557 1557 if isvalid(prefix):
1558 1558 return disambiguate(hexnode, length)
1559 1559
1560 1560 def cmp(self, node, text):
1561 1561 """compare text with a given file revision
1562 1562
1563 1563 returns True if text is different than what is stored.
1564 1564 """
1565 1565 p1, p2 = self.parents(node)
1566 1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567 1567
1568 1568 def _cachesegment(self, offset, data):
1569 1569 """Add a segment to the revlog cache.
1570 1570
1571 1571 Accepts an absolute offset and the data that is at that location.
1572 1572 """
1573 1573 o, d = self._chunkcache
1574 1574 # try to add to existing cache
1575 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 1576 self._chunkcache = o, d + data
1577 1577 else:
1578 1578 self._chunkcache = offset, data
1579 1579
1580 1580 def _readsegment(self, offset, length, df=None):
1581 1581 """Load a segment of raw data from the revlog.
1582 1582
1583 1583 Accepts an absolute offset, length to read, and an optional existing
1584 1584 file handle to read from.
1585 1585
1586 1586 If an existing file handle is passed, it will be seeked and the
1587 1587 original seek position will NOT be restored.
1588 1588
1589 1589 Returns a str or buffer of raw byte data.
1590 1590
1591 1591 Raises if the requested number of bytes could not be read.
1592 1592 """
1593 1593 # Cache data both forward and backward around the requested
1594 1594 # data, in a fixed size window. This helps speed up operations
1595 1595 # involving reading the revlog backwards.
1596 1596 cachesize = self._chunkcachesize
1597 1597 realoffset = offset & ~(cachesize - 1)
1598 1598 reallength = (
1599 1599 (offset + length + cachesize) & ~(cachesize - 1)
1600 1600 ) - realoffset
1601 1601 with self._datareadfp(df) as df:
1602 1602 df.seek(realoffset)
1603 1603 d = df.read(reallength)
1604 1604
1605 1605 self._cachesegment(realoffset, d)
1606 1606 if offset != realoffset or reallength != length:
1607 1607 startoffset = offset - realoffset
1608 1608 if len(d) - startoffset < length:
1609 1609 raise error.RevlogError(
1610 1610 _(
1611 1611 b'partial read of revlog %s; expected %d bytes from '
1612 1612 b'offset %d, got %d'
1613 1613 )
1614 1614 % (
1615 1615 self.indexfile if self._inline else self.datafile,
1616 1616 length,
1617 1617 realoffset,
1618 1618 len(d) - startoffset,
1619 1619 )
1620 1620 )
1621 1621
1622 1622 return util.buffer(d, startoffset, length)
1623 1623
1624 1624 if len(d) < length:
1625 1625 raise error.RevlogError(
1626 1626 _(
1627 1627 b'partial read of revlog %s; expected %d bytes from offset '
1628 1628 b'%d, got %d'
1629 1629 )
1630 1630 % (
1631 1631 self.indexfile if self._inline else self.datafile,
1632 1632 length,
1633 1633 offset,
1634 1634 len(d),
1635 1635 )
1636 1636 )
1637 1637
1638 1638 return d
1639 1639
1640 1640 def _getsegment(self, offset, length, df=None):
1641 1641 """Obtain a segment of raw data from the revlog.
1642 1642
1643 1643 Accepts an absolute offset, length of bytes to obtain, and an
1644 1644 optional file handle to the already-opened revlog. If the file
1645 1645 handle is used, it's original seek position will not be preserved.
1646 1646
1647 1647 Requests for data may be returned from a cache.
1648 1648
1649 1649 Returns a str or a buffer instance of raw byte data.
1650 1650 """
1651 1651 o, d = self._chunkcache
1652 1652 l = len(d)
1653 1653
1654 1654 # is it in the cache?
1655 1655 cachestart = offset - o
1656 1656 cacheend = cachestart + length
1657 1657 if cachestart >= 0 and cacheend <= l:
1658 1658 if cachestart == 0 and cacheend == l:
1659 1659 return d # avoid a copy
1660 1660 return util.buffer(d, cachestart, cacheend - cachestart)
1661 1661
1662 1662 return self._readsegment(offset, length, df=df)
1663 1663
1664 1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 1665 """Obtain a segment of raw data corresponding to a range of revisions.
1666 1666
1667 1667 Accepts the start and end revisions and an optional already-open
1668 1668 file handle to be used for reading. If the file handle is read, its
1669 1669 seek position will not be preserved.
1670 1670
1671 1671 Requests for data may be satisfied by a cache.
1672 1672
1673 1673 Returns a 2-tuple of (offset, data) for the requested range of
1674 1674 revisions. Offset is the integer offset from the beginning of the
1675 1675 revlog and data is a str or buffer of the raw byte data.
1676 1676
1677 1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 1678 to determine where each revision's data begins and ends.
1679 1679 """
1680 1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 1681 # (functions are expensive).
1682 1682 index = self.index
1683 1683 istart = index[startrev]
1684 1684 start = int(istart[0] >> 16)
1685 1685 if startrev == endrev:
1686 1686 end = start + istart[1]
1687 1687 else:
1688 1688 iend = index[endrev]
1689 1689 end = int(iend[0] >> 16) + iend[1]
1690 1690
1691 1691 if self._inline:
1692 1692 start += (startrev + 1) * self._io.size
1693 1693 end += (endrev + 1) * self._io.size
1694 1694 length = end - start
1695 1695
1696 1696 return start, self._getsegment(start, length, df=df)
1697 1697
1698 1698 def _chunk(self, rev, df=None):
1699 1699 """Obtain a single decompressed chunk for a revision.
1700 1700
1701 1701 Accepts an integer revision and an optional already-open file handle
1702 1702 to be used for reading. If used, the seek position of the file will not
1703 1703 be preserved.
1704 1704
1705 1705 Returns a str holding uncompressed data for the requested revision.
1706 1706 """
1707 1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708 1708
1709 1709 def _chunks(self, revs, df=None, targetsize=None):
1710 1710 """Obtain decompressed chunks for the specified revisions.
1711 1711
1712 1712 Accepts an iterable of numeric revisions that are assumed to be in
1713 1713 ascending order. Also accepts an optional already-open file handle
1714 1714 to be used for reading. If used, the seek position of the file will
1715 1715 not be preserved.
1716 1716
1717 1717 This function is similar to calling ``self._chunk()`` multiple times,
1718 1718 but is faster.
1719 1719
1720 1720 Returns a list with decompressed data for each requested revision.
1721 1721 """
1722 1722 if not revs:
1723 1723 return []
1724 1724 start = self.start
1725 1725 length = self.length
1726 1726 inline = self._inline
1727 1727 iosize = self._io.size
1728 1728 buffer = util.buffer
1729 1729
1730 1730 l = []
1731 1731 ladd = l.append
1732 1732
1733 1733 if not self._withsparseread:
1734 1734 slicedchunks = (revs,)
1735 1735 else:
1736 1736 slicedchunks = deltautil.slicechunk(
1737 1737 self, revs, targetsize=targetsize
1738 1738 )
1739 1739
1740 1740 for revschunk in slicedchunks:
1741 1741 firstrev = revschunk[0]
1742 1742 # Skip trailing revisions with empty diff
1743 1743 for lastrev in revschunk[::-1]:
1744 1744 if length(lastrev) != 0:
1745 1745 break
1746 1746
1747 1747 try:
1748 1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 1749 except OverflowError:
1750 1750 # issue4215 - we can't cache a run of chunks greater than
1751 1751 # 2G on Windows
1752 1752 return [self._chunk(rev, df=df) for rev in revschunk]
1753 1753
1754 1754 decomp = self.decompress
1755 1755 for rev in revschunk:
1756 1756 chunkstart = start(rev)
1757 1757 if inline:
1758 1758 chunkstart += (rev + 1) * iosize
1759 1759 chunklength = length(rev)
1760 1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761 1761
1762 1762 return l
1763 1763
1764 1764 def _chunkclear(self):
1765 1765 """Clear the raw chunk cache."""
1766 1766 self._chunkcache = (0, b'')
1767 1767
1768 1768 def deltaparent(self, rev):
1769 1769 """return deltaparent of the given revision"""
1770 1770 base = self.index[rev][3]
1771 1771 if base == rev:
1772 1772 return nullrev
1773 1773 elif self._generaldelta:
1774 1774 return base
1775 1775 else:
1776 1776 return rev - 1
1777 1777
1778 1778 def issnapshot(self, rev):
1779 1779 """tells whether rev is a snapshot"""
1780 1780 if not self._sparserevlog:
1781 1781 return self.deltaparent(rev) == nullrev
1782 1782 elif util.safehasattr(self.index, b'issnapshot'):
1783 1783 # directly assign the method to cache the testing and access
1784 1784 self.issnapshot = self.index.issnapshot
1785 1785 return self.issnapshot(rev)
1786 1786 if rev == nullrev:
1787 1787 return True
1788 1788 entry = self.index[rev]
1789 1789 base = entry[3]
1790 1790 if base == rev:
1791 1791 return True
1792 1792 if base == nullrev:
1793 1793 return True
1794 1794 p1 = entry[5]
1795 1795 p2 = entry[6]
1796 1796 if base == p1 or base == p2:
1797 1797 return False
1798 1798 return self.issnapshot(base)
1799 1799
1800 1800 def snapshotdepth(self, rev):
1801 1801 """number of snapshot in the chain before this one"""
1802 1802 if not self.issnapshot(rev):
1803 1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 1804 return len(self._deltachain(rev)[0]) - 1
1805 1805
1806 1806 def revdiff(self, rev1, rev2):
1807 1807 """return or calculate a delta between two revisions
1808 1808
1809 1809 The delta calculated is in binary form and is intended to be written to
1810 1810 revlog data directly. So this function needs raw revision data.
1811 1811 """
1812 1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 1813 return bytes(self._chunk(rev2))
1814 1814
1815 1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816 1816
1817 1817 def _processflags(self, text, flags, operation, raw=False):
1818 1818 """deprecated entry point to access flag processors"""
1819 1819 msg = b'_processflag(...) use the specialized variant'
1820 1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 1821 if raw:
1822 1822 return text, flagutil.processflagsraw(self, text, flags)
1823 1823 elif operation == b'read':
1824 1824 return flagutil.processflagsread(self, text, flags)
1825 1825 else: # write operation
1826 1826 return flagutil.processflagswrite(self, text, flags, None)
1827 1827
1828 1828 def revision(self, nodeorrev, _df=None, raw=False):
1829 1829 """return an uncompressed revision of a given node or revision
1830 1830 number.
1831 1831
1832 1832 _df - an existing file handle to read from. (internal-only)
1833 1833 raw - an optional argument specifying if the revision data is to be
1834 1834 treated as raw data when applying flag transforms. 'raw' should be set
1835 1835 to True when generating changegroups or in debug commands.
1836 1836 """
1837 1837 if raw:
1838 1838 msg = (
1839 1839 b'revlog.revision(..., raw=True) is deprecated, '
1840 1840 b'use revlog.rawdata(...)'
1841 1841 )
1842 1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844 1844
1845 1845 def sidedata(self, nodeorrev, _df=None):
1846 1846 """a map of extra data related to the changeset but not part of the hash
1847 1847
1848 1848 This function currently return a dictionary. However, more advanced
1849 1849 mapping object will likely be used in the future for a more
1850 1850 efficient/lazy code.
1851 1851 """
1852 1852 return self._revisiondata(nodeorrev, _df)[1]
1853 1853
1854 1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 1855 # deal with <nodeorrev> argument type
1856 1856 if isinstance(nodeorrev, int):
1857 1857 rev = nodeorrev
1858 1858 node = self.node(rev)
1859 1859 else:
1860 1860 node = nodeorrev
1861 1861 rev = None
1862 1862
1863 1863 # fast path the special `nullid` rev
1864 1864 if node == nullid:
1865 1865 return b"", {}
1866 1866
1867 1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 1868 # revision or might need to be processed to retrieve the revision.
1869 1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870 1870
1871 1871 if raw and validated:
1872 1872 # if we don't want to process the raw text and that raw
1873 1873 # text is cached, we can exit early.
1874 1874 return rawtext, {}
1875 1875 if rev is None:
1876 1876 rev = self.rev(node)
1877 1877 # the revlog's flag for this revision
1878 1878 # (usually alter its state or content)
1879 1879 flags = self.flags(rev)
1880 1880
1881 1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1882 1882 # no extra flags set, no flag processor runs, text = rawtext
1883 1883 return rawtext, {}
1884 1884
1885 1885 sidedata = {}
1886 1886 if raw:
1887 1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 1888 text = rawtext
1889 1889 else:
1890 1890 try:
1891 1891 r = flagutil.processflagsread(self, rawtext, flags)
1892 1892 except error.SidedataHashError as exc:
1893 1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1894 1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1895 1895 raise error.RevlogError(msg)
1896 1896 text, validatehash, sidedata = r
1897 1897 if validatehash:
1898 1898 self.checkhash(text, node, rev=rev)
1899 1899 if not validated:
1900 1900 self._revisioncache = (node, rev, rawtext)
1901 1901
1902 1902 return text, sidedata
1903 1903
1904 1904 def _rawtext(self, node, rev, _df=None):
1905 1905 """return the possibly unvalidated rawtext for a revision
1906 1906
1907 1907 returns (rev, rawtext, validated)
1908 1908 """
1909 1909
1910 1910 # revision in the cache (could be useful to apply delta)
1911 1911 cachedrev = None
1912 1912 # An intermediate text to apply deltas to
1913 1913 basetext = None
1914 1914
1915 1915 # Check if we have the entry in cache
1916 1916 # The cache entry looks like (node, rev, rawtext)
1917 1917 if self._revisioncache:
1918 1918 if self._revisioncache[0] == node:
1919 1919 return (rev, self._revisioncache[2], True)
1920 1920 cachedrev = self._revisioncache[1]
1921 1921
1922 1922 if rev is None:
1923 1923 rev = self.rev(node)
1924 1924
1925 1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1926 1926 if stopped:
1927 1927 basetext = self._revisioncache[2]
1928 1928
1929 1929 # drop cache to save memory, the caller is expected to
1930 1930 # update self._revisioncache after validating the text
1931 1931 self._revisioncache = None
1932 1932
1933 1933 targetsize = None
1934 1934 rawsize = self.index[rev][2]
1935 1935 if 0 <= rawsize:
1936 1936 targetsize = 4 * rawsize
1937 1937
1938 1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1939 1939 if basetext is None:
1940 1940 basetext = bytes(bins[0])
1941 1941 bins = bins[1:]
1942 1942
1943 1943 rawtext = mdiff.patches(basetext, bins)
1944 1944 del basetext # let us have a chance to free memory early
1945 1945 return (rev, rawtext, False)
1946 1946
1947 1947 def rawdata(self, nodeorrev, _df=None):
1948 1948 """return an uncompressed raw data of a given node or revision number.
1949 1949
1950 1950 _df - an existing file handle to read from. (internal-only)
1951 1951 """
1952 1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1953 1953
1954 1954 def hash(self, text, p1, p2):
1955 1955 """Compute a node hash.
1956 1956
1957 1957 Available as a function so that subclasses can replace the hash
1958 1958 as needed.
1959 1959 """
1960 1960 return storageutil.hashrevisionsha1(text, p1, p2)
1961 1961
1962 1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 1963 """Check node hash integrity.
1964 1964
1965 1965 Available as a function so that subclasses can extend hash mismatch
1966 1966 behaviors as needed.
1967 1967 """
1968 1968 try:
1969 1969 if p1 is None and p2 is None:
1970 1970 p1, p2 = self.parents(node)
1971 1971 if node != self.hash(text, p1, p2):
1972 1972 # Clear the revision cache on hash failure. The revision cache
1973 1973 # only stores the raw revision and clearing the cache does have
1974 1974 # the side-effect that we won't have a cache hit when the raw
1975 1975 # revision data is accessed. But this case should be rare and
1976 1976 # it is extra work to teach the cache about the hash
1977 1977 # verification state.
1978 1978 if self._revisioncache and self._revisioncache[0] == node:
1979 1979 self._revisioncache = None
1980 1980
1981 1981 revornode = rev
1982 1982 if revornode is None:
1983 1983 revornode = templatefilters.short(hex(node))
1984 1984 raise error.RevlogError(
1985 1985 _(b"integrity check failed on %s:%s")
1986 1986 % (self.indexfile, pycompat.bytestr(revornode))
1987 1987 )
1988 1988 except error.RevlogError:
1989 1989 if self._censorable and storageutil.iscensoredtext(text):
1990 1990 raise error.CensoredNodeError(self.indexfile, node, text)
1991 1991 raise
1992 1992
1993 1993 def _enforceinlinesize(self, tr, fp=None):
1994 1994 """Check if the revlog is too big for inline and convert if so.
1995 1995
1996 1996 This should be called after revisions are added to the revlog. If the
1997 1997 revlog has grown too large to be an inline revlog, it will convert it
1998 1998 to use multiple index and data files.
1999 1999 """
2000 2000 tiprev = len(self) - 1
2001 2001 if (
2002 2002 not self._inline
2003 2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2004 2004 ):
2005 2005 return
2006 2006
2007 2007 troffset = tr.findoffset(self.indexfile)
2008 2008 if troffset is None:
2009 2009 raise error.RevlogError(
2010 2010 _(b"%s not found in the transaction") % self.indexfile
2011 2011 )
2012 2012 trindex = 0
2013 2013 tr.add(self.datafile, 0)
2014 2014
2015 2015 if fp:
2016 2016 fp.flush()
2017 2017 fp.close()
2018 2018 # We can't use the cached file handle after close(). So prevent
2019 2019 # its usage.
2020 2020 self._writinghandles = None
2021 2021
2022 2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2023 2023 for r in self:
2024 2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2025 2025 if troffset <= self.start(r):
2026 2026 trindex = r
2027 2027
2028 2028 with self._indexfp(b'w') as fp:
2029 2029 self.version &= ~FLAG_INLINE_DATA
2030 2030 self._inline = False
2031 2031 io = self._io
2032 2032 for i in self:
2033 2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 2034 fp.write(e)
2035 2035
2036 2036 # the temp file replace the real index when we exit the context
2037 2037 # manager
2038 2038
2039 2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 2041 self._chunkclear()
2042 2042
2043 2043 def _nodeduplicatecallback(self, transaction, node):
2044 2044 """called when trying to add a node already stored."""
2045 2045
2046 2046 def addrevision(
2047 2047 self,
2048 2048 text,
2049 2049 transaction,
2050 2050 link,
2051 2051 p1,
2052 2052 p2,
2053 2053 cachedelta=None,
2054 2054 node=None,
2055 2055 flags=REVIDX_DEFAULT_FLAGS,
2056 2056 deltacomputer=None,
2057 2057 sidedata=None,
2058 2058 ):
2059 2059 """add a revision to the log
2060 2060
2061 2061 text - the revision data to add
2062 2062 transaction - the transaction object used for rollback
2063 2063 link - the linkrev data to add
2064 2064 p1, p2 - the parent nodeids of the revision
2065 2065 cachedelta - an optional precomputed delta
2066 2066 node - nodeid of revision; typically node is not specified, and it is
2067 2067 computed by default as hash(text, p1, p2), however subclasses might
2068 2068 use different hashing method (and override checkhash() in such case)
2069 2069 flags - the known flags to set on the revision
2070 2070 deltacomputer - an optional deltacomputer instance shared between
2071 2071 multiple calls
2072 2072 """
2073 2073 if link == nullrev:
2074 2074 raise error.RevlogError(
2075 2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2076 2076 )
2077 2077
2078 2078 if sidedata is None:
2079 2079 sidedata = {}
2080 2080 flags = flags & ~REVIDX_SIDEDATA
2081 2081 elif not self.hassidedata:
2082 2082 raise error.ProgrammingError(
2083 2083 _(b"trying to add sidedata to a revlog who don't support them")
2084 2084 )
2085 2085 else:
2086 2086 flags |= REVIDX_SIDEDATA
2087 2087
2088 2088 if flags:
2089 2089 node = node or self.hash(text, p1, p2)
2090 2090
2091 2091 rawtext, validatehash = flagutil.processflagswrite(
2092 2092 self, text, flags, sidedata=sidedata
2093 2093 )
2094 2094
2095 2095 # If the flag processor modifies the revision data, ignore any provided
2096 2096 # cachedelta.
2097 2097 if rawtext != text:
2098 2098 cachedelta = None
2099 2099
2100 2100 if len(rawtext) > _maxentrysize:
2101 2101 raise error.RevlogError(
2102 2102 _(
2103 2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2104 2104 )
2105 2105 % (self.indexfile, len(rawtext))
2106 2106 )
2107 2107
2108 2108 node = node or self.hash(rawtext, p1, p2)
2109 2109 rev = self.index.get_rev(node)
2110 2110 if rev is not None:
2111 2111 return rev
2112 2112
2113 2113 if validatehash:
2114 2114 self.checkhash(rawtext, node, p1=p1, p2=p2)
2115 2115
2116 2116 return self.addrawrevision(
2117 2117 rawtext,
2118 2118 transaction,
2119 2119 link,
2120 2120 p1,
2121 2121 p2,
2122 2122 node,
2123 2123 flags,
2124 2124 cachedelta=cachedelta,
2125 2125 deltacomputer=deltacomputer,
2126 2126 )
2127 2127
2128 2128 def addrawrevision(
2129 2129 self,
2130 2130 rawtext,
2131 2131 transaction,
2132 2132 link,
2133 2133 p1,
2134 2134 p2,
2135 2135 node,
2136 2136 flags,
2137 2137 cachedelta=None,
2138 2138 deltacomputer=None,
2139 2139 ):
2140 2140 """add a raw revision with known flags, node and parents
2141 2141 useful when reusing a revision not stored in this revlog (ex: received
2142 2142 over wire, or read from an external bundle).
2143 2143 """
2144 2144 dfh = None
2145 2145 if not self._inline:
2146 2146 dfh = self._datafp(b"a+")
2147 2147 ifh = self._indexfp(b"a+")
2148 2148 try:
2149 2149 return self._addrevision(
2150 2150 node,
2151 2151 rawtext,
2152 2152 transaction,
2153 2153 link,
2154 2154 p1,
2155 2155 p2,
2156 2156 flags,
2157 2157 cachedelta,
2158 2158 ifh,
2159 2159 dfh,
2160 2160 deltacomputer=deltacomputer,
2161 2161 )
2162 2162 finally:
2163 2163 if dfh:
2164 2164 dfh.close()
2165 2165 ifh.close()
2166 2166
2167 2167 def compress(self, data):
2168 2168 """Generate a possibly-compressed representation of data."""
2169 2169 if not data:
2170 2170 return b'', data
2171 2171
2172 2172 compressed = self._compressor.compress(data)
2173 2173
2174 2174 if compressed:
2175 2175 # The revlog compressor added the header in the returned data.
2176 2176 return b'', compressed
2177 2177
2178 2178 if data[0:1] == b'\0':
2179 2179 return b'', data
2180 2180 return b'u', data
2181 2181
2182 2182 def decompress(self, data):
2183 2183 """Decompress a revlog chunk.
2184 2184
2185 2185 The chunk is expected to begin with a header identifying the
2186 2186 format type so it can be routed to an appropriate decompressor.
2187 2187 """
2188 2188 if not data:
2189 2189 return data
2190 2190
2191 2191 # Revlogs are read much more frequently than they are written and many
2192 2192 # chunks only take microseconds to decompress, so performance is
2193 2193 # important here.
2194 2194 #
2195 2195 # We can make a few assumptions about revlogs:
2196 2196 #
2197 2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 2198 # raw data).
2199 2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 2200 # returning raw inline data.
2201 2201 # 3) we want to prioritize common and officially supported compression
2202 2202 # engines
2203 2203 #
2204 2204 # It follows that we want to optimize for "decompress compressed data
2205 2205 # when encoded with common and officially supported compression engines"
2206 2206 # case over "raw data" and "data encoded by less common or non-official
2207 2207 # compression engines." That is why we have the inline lookup first
2208 2208 # followed by the compengines lookup.
2209 2209 #
2210 2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 2212 t = data[0:1]
2213 2213
2214 2214 if t == b'x':
2215 2215 try:
2216 2216 return _zlibdecompress(data)
2217 2217 except zlib.error as e:
2218 2218 raise error.RevlogError(
2219 2219 _(b'revlog decompress error: %s')
2220 2220 % stringutil.forcebytestr(e)
2221 2221 )
2222 2222 # '\0' is more common than 'u' so it goes first.
2223 2223 elif t == b'\0':
2224 2224 return data
2225 2225 elif t == b'u':
2226 2226 return util.buffer(data, 1)
2227 2227
2228 2228 try:
2229 2229 compressor = self._decompressors[t]
2230 2230 except KeyError:
2231 2231 try:
2232 2232 engine = util.compengines.forrevlogheader(t)
2233 2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 2234 self._decompressors[t] = compressor
2235 2235 except KeyError:
2236 2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237 2237
2238 2238 return compressor.decompress(data)
2239 2239
2240 2240 def _addrevision(
2241 2241 self,
2242 2242 node,
2243 2243 rawtext,
2244 2244 transaction,
2245 2245 link,
2246 2246 p1,
2247 2247 p2,
2248 2248 flags,
2249 2249 cachedelta,
2250 2250 ifh,
2251 2251 dfh,
2252 2252 alwayscache=False,
2253 2253 deltacomputer=None,
2254 2254 ):
2255 2255 """internal function to add revisions to the log
2256 2256
2257 2257 see addrevision for argument descriptions.
2258 2258
2259 2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 2260
2261 2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 2262 be used.
2263 2263
2264 2264 invariants:
2265 2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 2266 if both are set, they must correspond to each other.
2267 2267 """
2268 2268 if node == nullid:
2269 2269 raise error.RevlogError(
2270 2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 2271 )
2272 2272 if node == wdirid or node in wdirfilenodeids:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 2275 )
2276 2276
2277 2277 if self._inline:
2278 2278 fh = ifh
2279 2279 else:
2280 2280 fh = dfh
2281 2281
2282 2282 btext = [rawtext]
2283 2283
2284 2284 curr = len(self)
2285 2285 prev = curr - 1
2286 2286 offset = self.end(prev)
2287 2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288 2288
2289 2289 # full versions are inserted when the needed deltas
2290 2290 # become comparable to the uncompressed text
2291 2291 if rawtext is None:
2292 2292 # need rawtext size, before changed by flag processors, which is
2293 2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 2294 # logic that might remove metadata size.
2295 2295 textlen = mdiff.patchedsize(
2296 2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 2297 )
2298 2298 else:
2299 2299 textlen = len(rawtext)
2300 2300
2301 2301 if deltacomputer is None:
2302 2302 deltacomputer = deltautil.deltacomputer(self)
2303 2303
2304 2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305 2305
2306 2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307 2307
2308 2308 e = (
2309 2309 offset_type(offset, flags),
2310 2310 deltainfo.deltalen,
2311 2311 textlen,
2312 2312 deltainfo.base,
2313 2313 link,
2314 2314 p1r,
2315 2315 p2r,
2316 2316 node,
2317 2317 )
2318 2318 self.index.append(e)
2319 2319
2320 2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 2321 self._writeentry(
2322 2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 2323 )
2324 2324
2325 2325 rawtext = btext[0]
2326 2326
2327 2327 if alwayscache and rawtext is None:
2328 2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329 2329
2330 2330 if type(rawtext) == bytes: # only accept immutable objects
2331 2331 self._revisioncache = (node, curr, rawtext)
2332 2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 2333 return curr
2334 2334
2335 2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 2336 # Files opened in a+ mode have inconsistent behavior on various
2337 2337 # platforms. Windows requires that a file positioning call be made
2338 2338 # when the file handle transitions between reads and writes. See
2339 2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 2341 # of Solaris have been observed to not append at the end of the file
2342 2342 # if the file was seeked to before the end. See issue4943 for more.
2343 2343 #
2344 2344 # We work around this issue by inserting a seek() before writing.
2345 2345 # Note: This is likely not necessary on Python 3. However, because
2346 2346 # the file handle is reused for reads and may be seeked there, we need
2347 2347 # to be careful before changing this.
2348 2348 ifh.seek(0, os.SEEK_END)
2349 2349 if dfh:
2350 2350 dfh.seek(0, os.SEEK_END)
2351 2351
2352 2352 curr = len(self) - 1
2353 2353 if not self._inline:
2354 2354 transaction.add(self.datafile, offset)
2355 2355 transaction.add(self.indexfile, curr * len(entry))
2356 2356 if data[0]:
2357 2357 dfh.write(data[0])
2358 2358 dfh.write(data[1])
2359 2359 ifh.write(entry)
2360 2360 else:
2361 2361 offset += curr * self._io.size
2362 2362 transaction.add(self.indexfile, offset)
2363 2363 ifh.write(entry)
2364 2364 ifh.write(data[0])
2365 2365 ifh.write(data[1])
2366 2366 self._enforceinlinesize(transaction, ifh)
2367 2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368 2368
2369 2369 def addgroup(
2370 2370 self,
2371 2371 deltas,
2372 2372 linkmapper,
2373 2373 transaction,
2374 2374 alwayscache=False,
2375 2375 addrevisioncb=None,
2376 2376 duplicaterevisioncb=None,
2377 2377 ):
2378 2378 """
2379 2379 add a delta group
2380 2380
2381 2381 given a set of deltas, add them to the revision log. the
2382 2382 first delta is against its parent, which should be in our
2383 2383 log, the rest are against the previous delta.
2384 2384
2385 2385 If ``addrevisioncb`` is defined, it will be called with arguments of
2386 2386 this revlog and the node that was added.
2387 2387 """
2388 2388
2389 2389 if self._writinghandles:
2390 2390 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2391 2391
2392 2392 r = len(self)
2393 2393 end = 0
2394 2394 if r:
2395 2395 end = self.end(r - 1)
2396 2396 ifh = self._indexfp(b"a+")
2397 2397 isize = r * self._io.size
2398 2398 if self._inline:
2399 2399 transaction.add(self.indexfile, end + isize)
2400 2400 dfh = None
2401 2401 else:
2402 2402 transaction.add(self.indexfile, isize)
2403 2403 transaction.add(self.datafile, end)
2404 2404 dfh = self._datafp(b"a+")
2405 2405
2406 2406 def flush():
2407 2407 if dfh:
2408 2408 dfh.flush()
2409 2409 ifh.flush()
2410 2410
2411 2411 self._writinghandles = (ifh, dfh)
2412 2412 empty = True
2413 2413
2414 2414 try:
2415 2415 deltacomputer = deltautil.deltacomputer(self)
2416 2416 # loop through our set of deltas
2417 2417 for data in deltas:
2418 2418 node, p1, p2, linknode, deltabase, delta, flags = data
2419 2419 link = linkmapper(linknode)
2420 2420 flags = flags or REVIDX_DEFAULT_FLAGS
2421 2421
2422 if self.index.has_node(node):
2422 rev = self.index.get_rev(node)
2423 if rev is not None:
2423 2424 # this can happen if two branches make the same change
2424 self._nodeduplicatecallback(transaction, node)
2425 self._nodeduplicatecallback(transaction, rev)
2425 2426 if duplicaterevisioncb:
2426 duplicaterevisioncb(self, node)
2427 duplicaterevisioncb(self, rev)
2427 2428 empty = False
2428 2429 continue
2429 2430
2430 2431 for p in (p1, p2):
2431 2432 if not self.index.has_node(p):
2432 2433 raise error.LookupError(
2433 2434 p, self.indexfile, _(b'unknown parent')
2434 2435 )
2435 2436
2436 2437 if not self.index.has_node(deltabase):
2437 2438 raise error.LookupError(
2438 2439 deltabase, self.indexfile, _(b'unknown delta base')
2439 2440 )
2440 2441
2441 2442 baserev = self.rev(deltabase)
2442 2443
2443 2444 if baserev != nullrev and self.iscensored(baserev):
2444 2445 # if base is censored, delta must be full replacement in a
2445 2446 # single patch operation
2446 2447 hlen = struct.calcsize(b">lll")
2447 2448 oldlen = self.rawsize(baserev)
2448 2449 newlen = len(delta) - hlen
2449 2450 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2450 2451 raise error.CensoredBaseError(
2451 2452 self.indexfile, self.node(baserev)
2452 2453 )
2453 2454
2454 2455 if not flags and self._peek_iscensored(baserev, delta, flush):
2455 2456 flags |= REVIDX_ISCENSORED
2456 2457
2457 2458 # We assume consumers of addrevisioncb will want to retrieve
2458 2459 # the added revision, which will require a call to
2459 2460 # revision(). revision() will fast path if there is a cache
2460 2461 # hit. So, we tell _addrevision() to always cache in this case.
2461 2462 # We're only using addgroup() in the context of changegroup
2462 2463 # generation so the revision data can always be handled as raw
2463 2464 # by the flagprocessor.
2464 self._addrevision(
2465 rev = self._addrevision(
2465 2466 node,
2466 2467 None,
2467 2468 transaction,
2468 2469 link,
2469 2470 p1,
2470 2471 p2,
2471 2472 flags,
2472 2473 (baserev, delta),
2473 2474 ifh,
2474 2475 dfh,
2475 2476 alwayscache=alwayscache,
2476 2477 deltacomputer=deltacomputer,
2477 2478 )
2478 2479
2479 2480 if addrevisioncb:
2480 addrevisioncb(self, node)
2481 addrevisioncb(self, rev)
2481 2482 empty = False
2482 2483
2483 2484 if not dfh and not self._inline:
2484 2485 # addrevision switched from inline to conventional
2485 2486 # reopen the index
2486 2487 ifh.close()
2487 2488 dfh = self._datafp(b"a+")
2488 2489 ifh = self._indexfp(b"a+")
2489 2490 self._writinghandles = (ifh, dfh)
2490 2491 finally:
2491 2492 self._writinghandles = None
2492 2493
2493 2494 if dfh:
2494 2495 dfh.close()
2495 2496 ifh.close()
2496 2497 return not empty
2497 2498
2498 2499 def iscensored(self, rev):
2499 2500 """Check if a file revision is censored."""
2500 2501 if not self._censorable:
2501 2502 return False
2502 2503
2503 2504 return self.flags(rev) & REVIDX_ISCENSORED
2504 2505
2505 2506 def _peek_iscensored(self, baserev, delta, flush):
2506 2507 """Quickly check if a delta produces a censored revision."""
2507 2508 if not self._censorable:
2508 2509 return False
2509 2510
2510 2511 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2511 2512
2512 2513 def getstrippoint(self, minlink):
2513 2514 """find the minimum rev that must be stripped to strip the linkrev
2514 2515
2515 2516 Returns a tuple containing the minimum rev and a set of all revs that
2516 2517 have linkrevs that will be broken by this strip.
2517 2518 """
2518 2519 return storageutil.resolvestripinfo(
2519 2520 minlink,
2520 2521 len(self) - 1,
2521 2522 self.headrevs(),
2522 2523 self.linkrev,
2523 2524 self.parentrevs,
2524 2525 )
2525 2526
2526 2527 def strip(self, minlink, transaction):
2527 2528 """truncate the revlog on the first revision with a linkrev >= minlink
2528 2529
2529 2530 This function is called when we're stripping revision minlink and
2530 2531 its descendants from the repository.
2531 2532
2532 2533 We have to remove all revisions with linkrev >= minlink, because
2533 2534 the equivalent changelog revisions will be renumbered after the
2534 2535 strip.
2535 2536
2536 2537 So we truncate the revlog on the first of these revisions, and
2537 2538 trust that the caller has saved the revisions that shouldn't be
2538 2539 removed and that it'll re-add them after this truncation.
2539 2540 """
2540 2541 if len(self) == 0:
2541 2542 return
2542 2543
2543 2544 rev, _ = self.getstrippoint(minlink)
2544 2545 if rev == len(self):
2545 2546 return
2546 2547
2547 2548 # first truncate the files on disk
2548 2549 end = self.start(rev)
2549 2550 if not self._inline:
2550 2551 transaction.add(self.datafile, end)
2551 2552 end = rev * self._io.size
2552 2553 else:
2553 2554 end += rev * self._io.size
2554 2555
2555 2556 transaction.add(self.indexfile, end)
2556 2557
2557 2558 # then reset internal state in memory to forget those revisions
2558 2559 self._revisioncache = None
2559 2560 self._chaininfocache = util.lrucachedict(500)
2560 2561 self._chunkclear()
2561 2562
2562 2563 del self.index[rev:-1]
2563 2564
2564 2565 def checksize(self):
2565 2566 """Check size of index and data files
2566 2567
2567 2568 return a (dd, di) tuple.
2568 2569 - dd: extra bytes for the "data" file
2569 2570 - di: extra bytes for the "index" file
2570 2571
2571 2572 A healthy revlog will return (0, 0).
2572 2573 """
2573 2574 expected = 0
2574 2575 if len(self):
2575 2576 expected = max(0, self.end(len(self) - 1))
2576 2577
2577 2578 try:
2578 2579 with self._datafp() as f:
2579 2580 f.seek(0, io.SEEK_END)
2580 2581 actual = f.tell()
2581 2582 dd = actual - expected
2582 2583 except IOError as inst:
2583 2584 if inst.errno != errno.ENOENT:
2584 2585 raise
2585 2586 dd = 0
2586 2587
2587 2588 try:
2588 2589 f = self.opener(self.indexfile)
2589 2590 f.seek(0, io.SEEK_END)
2590 2591 actual = f.tell()
2591 2592 f.close()
2592 2593 s = self._io.size
2593 2594 i = max(0, actual // s)
2594 2595 di = actual - (i * s)
2595 2596 if self._inline:
2596 2597 databytes = 0
2597 2598 for r in self:
2598 2599 databytes += max(0, self.length(r))
2599 2600 dd = 0
2600 2601 di = actual - len(self) * s - databytes
2601 2602 except IOError as inst:
2602 2603 if inst.errno != errno.ENOENT:
2603 2604 raise
2604 2605 di = 0
2605 2606
2606 2607 return (dd, di)
2607 2608
2608 2609 def files(self):
2609 2610 res = [self.indexfile]
2610 2611 if not self._inline:
2611 2612 res.append(self.datafile)
2612 2613 return res
2613 2614
2614 2615 def emitrevisions(
2615 2616 self,
2616 2617 nodes,
2617 2618 nodesorder=None,
2618 2619 revisiondata=False,
2619 2620 assumehaveparentrevisions=False,
2620 2621 deltamode=repository.CG_DELTAMODE_STD,
2621 2622 ):
2622 2623 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2623 2624 raise error.ProgrammingError(
2624 2625 b'unhandled value for nodesorder: %s' % nodesorder
2625 2626 )
2626 2627
2627 2628 if nodesorder is None and not self._generaldelta:
2628 2629 nodesorder = b'storage'
2629 2630
2630 2631 if (
2631 2632 not self._storedeltachains
2632 2633 and deltamode != repository.CG_DELTAMODE_PREV
2633 2634 ):
2634 2635 deltamode = repository.CG_DELTAMODE_FULL
2635 2636
2636 2637 return storageutil.emitrevisions(
2637 2638 self,
2638 2639 nodes,
2639 2640 nodesorder,
2640 2641 revlogrevisiondelta,
2641 2642 deltaparentfn=self.deltaparent,
2642 2643 candeltafn=self.candelta,
2643 2644 rawsizefn=self.rawsize,
2644 2645 revdifffn=self.revdiff,
2645 2646 flagsfn=self.flags,
2646 2647 deltamode=deltamode,
2647 2648 revisiondata=revisiondata,
2648 2649 assumehaveparentrevisions=assumehaveparentrevisions,
2649 2650 )
2650 2651
2651 2652 DELTAREUSEALWAYS = b'always'
2652 2653 DELTAREUSESAMEREVS = b'samerevs'
2653 2654 DELTAREUSENEVER = b'never'
2654 2655
2655 2656 DELTAREUSEFULLADD = b'fulladd'
2656 2657
2657 2658 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2658 2659
2659 2660 def clone(
2660 2661 self,
2661 2662 tr,
2662 2663 destrevlog,
2663 2664 addrevisioncb=None,
2664 2665 deltareuse=DELTAREUSESAMEREVS,
2665 2666 forcedeltabothparents=None,
2666 2667 sidedatacompanion=None,
2667 2668 ):
2668 2669 """Copy this revlog to another, possibly with format changes.
2669 2670
2670 2671 The destination revlog will contain the same revisions and nodes.
2671 2672 However, it may not be bit-for-bit identical due to e.g. delta encoding
2672 2673 differences.
2673 2674
2674 2675 The ``deltareuse`` argument control how deltas from the existing revlog
2675 2676 are preserved in the destination revlog. The argument can have the
2676 2677 following values:
2677 2678
2678 2679 DELTAREUSEALWAYS
2679 2680 Deltas will always be reused (if possible), even if the destination
2680 2681 revlog would not select the same revisions for the delta. This is the
2681 2682 fastest mode of operation.
2682 2683 DELTAREUSESAMEREVS
2683 2684 Deltas will be reused if the destination revlog would pick the same
2684 2685 revisions for the delta. This mode strikes a balance between speed
2685 2686 and optimization.
2686 2687 DELTAREUSENEVER
2687 2688 Deltas will never be reused. This is the slowest mode of execution.
2688 2689 This mode can be used to recompute deltas (e.g. if the diff/delta
2689 2690 algorithm changes).
2690 2691 DELTAREUSEFULLADD
2691 2692 Revision will be re-added as if their were new content. This is
2692 2693 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2693 2694 eg: large file detection and handling.
2694 2695
2695 2696 Delta computation can be slow, so the choice of delta reuse policy can
2696 2697 significantly affect run time.
2697 2698
2698 2699 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2699 2700 two extremes. Deltas will be reused if they are appropriate. But if the
2700 2701 delta could choose a better revision, it will do so. This means if you
2701 2702 are converting a non-generaldelta revlog to a generaldelta revlog,
2702 2703 deltas will be recomputed if the delta's parent isn't a parent of the
2703 2704 revision.
2704 2705
2705 2706 In addition to the delta policy, the ``forcedeltabothparents``
2706 2707 argument controls whether to force compute deltas against both parents
2707 2708 for merges. By default, the current default is used.
2708 2709
2709 2710 If not None, the `sidedatacompanion` is callable that accept two
2710 2711 arguments:
2711 2712
2712 2713 (srcrevlog, rev)
2713 2714
2714 2715 and return a quintet that control changes to sidedata content from the
2715 2716 old revision to the new clone result:
2716 2717
2717 2718 (dropall, filterout, update, new_flags, dropped_flags)
2718 2719
2719 2720 * if `dropall` is True, all sidedata should be dropped
2720 2721 * `filterout` is a set of sidedata keys that should be dropped
2721 2722 * `update` is a mapping of additionnal/new key -> value
2722 2723 * new_flags is a bitfields of new flags that the revision should get
2723 2724 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2724 2725 """
2725 2726 if deltareuse not in self.DELTAREUSEALL:
2726 2727 raise ValueError(
2727 2728 _(b'value for deltareuse invalid: %s') % deltareuse
2728 2729 )
2729 2730
2730 2731 if len(destrevlog):
2731 2732 raise ValueError(_(b'destination revlog is not empty'))
2732 2733
2733 2734 if getattr(self, 'filteredrevs', None):
2734 2735 raise ValueError(_(b'source revlog has filtered revisions'))
2735 2736 if getattr(destrevlog, 'filteredrevs', None):
2736 2737 raise ValueError(_(b'destination revlog has filtered revisions'))
2737 2738
2738 2739 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 2740 # if possible.
2740 2741 oldlazydelta = destrevlog._lazydelta
2741 2742 oldlazydeltabase = destrevlog._lazydeltabase
2742 2743 oldamd = destrevlog._deltabothparents
2743 2744
2744 2745 try:
2745 2746 if deltareuse == self.DELTAREUSEALWAYS:
2746 2747 destrevlog._lazydeltabase = True
2747 2748 destrevlog._lazydelta = True
2748 2749 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 2750 destrevlog._lazydeltabase = False
2750 2751 destrevlog._lazydelta = True
2751 2752 elif deltareuse == self.DELTAREUSENEVER:
2752 2753 destrevlog._lazydeltabase = False
2753 2754 destrevlog._lazydelta = False
2754 2755
2755 2756 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756 2757
2757 2758 self._clone(
2758 2759 tr,
2759 2760 destrevlog,
2760 2761 addrevisioncb,
2761 2762 deltareuse,
2762 2763 forcedeltabothparents,
2763 2764 sidedatacompanion,
2764 2765 )
2765 2766
2766 2767 finally:
2767 2768 destrevlog._lazydelta = oldlazydelta
2768 2769 destrevlog._lazydeltabase = oldlazydeltabase
2769 2770 destrevlog._deltabothparents = oldamd
2770 2771
2771 2772 def _clone(
2772 2773 self,
2773 2774 tr,
2774 2775 destrevlog,
2775 2776 addrevisioncb,
2776 2777 deltareuse,
2777 2778 forcedeltabothparents,
2778 2779 sidedatacompanion,
2779 2780 ):
2780 2781 """perform the core duty of `revlog.clone` after parameter processing"""
2781 2782 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 2783 index = self.index
2783 2784 for rev in self:
2784 2785 entry = index[rev]
2785 2786
2786 2787 # Some classes override linkrev to take filtered revs into
2787 2788 # account. Use raw entry from index.
2788 2789 flags = entry[0] & 0xFFFF
2789 2790 linkrev = entry[4]
2790 2791 p1 = index[entry[5]][7]
2791 2792 p2 = index[entry[6]][7]
2792 2793 node = entry[7]
2793 2794
2794 2795 sidedataactions = (False, [], {}, 0, 0)
2795 2796 if sidedatacompanion is not None:
2796 2797 sidedataactions = sidedatacompanion(self, rev)
2797 2798
2798 2799 # (Possibly) reuse the delta from the revlog if allowed and
2799 2800 # the revlog chunk is a delta.
2800 2801 cachedelta = None
2801 2802 rawtext = None
2802 2803 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2803 2804 dropall = sidedataactions[0]
2804 2805 filterout = sidedataactions[1]
2805 2806 update = sidedataactions[2]
2806 2807 new_flags = sidedataactions[3]
2807 2808 dropped_flags = sidedataactions[4]
2808 2809 text, sidedata = self._revisiondata(rev)
2809 2810 if dropall:
2810 2811 sidedata = {}
2811 2812 for key in filterout:
2812 2813 sidedata.pop(key, None)
2813 2814 sidedata.update(update)
2814 2815 if not sidedata:
2815 2816 sidedata = None
2816 2817
2817 2818 flags |= new_flags
2818 2819 flags &= ~dropped_flags
2819 2820
2820 2821 destrevlog.addrevision(
2821 2822 text,
2822 2823 tr,
2823 2824 linkrev,
2824 2825 p1,
2825 2826 p2,
2826 2827 cachedelta=cachedelta,
2827 2828 node=node,
2828 2829 flags=flags,
2829 2830 deltacomputer=deltacomputer,
2830 2831 sidedata=sidedata,
2831 2832 )
2832 2833 else:
2833 2834 if destrevlog._lazydelta:
2834 2835 dp = self.deltaparent(rev)
2835 2836 if dp != nullrev:
2836 2837 cachedelta = (dp, bytes(self._chunk(rev)))
2837 2838
2838 2839 if not cachedelta:
2839 2840 rawtext = self.rawdata(rev)
2840 2841
2841 2842 ifh = destrevlog.opener(
2842 2843 destrevlog.indexfile, b'a+', checkambig=False
2843 2844 )
2844 2845 dfh = None
2845 2846 if not destrevlog._inline:
2846 2847 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2847 2848 try:
2848 2849 destrevlog._addrevision(
2849 2850 node,
2850 2851 rawtext,
2851 2852 tr,
2852 2853 linkrev,
2853 2854 p1,
2854 2855 p2,
2855 2856 flags,
2856 2857 cachedelta,
2857 2858 ifh,
2858 2859 dfh,
2859 2860 deltacomputer=deltacomputer,
2860 2861 )
2861 2862 finally:
2862 2863 if dfh:
2863 2864 dfh.close()
2864 2865 ifh.close()
2865 2866
2866 2867 if addrevisioncb:
2867 2868 addrevisioncb(self, rev, node)
2868 2869
2869 2870 def censorrevision(self, tr, censornode, tombstone=b''):
2870 2871 if (self.version & 0xFFFF) == REVLOGV0:
2871 2872 raise error.RevlogError(
2872 2873 _(b'cannot censor with version %d revlogs') % self.version
2873 2874 )
2874 2875
2875 2876 censorrev = self.rev(censornode)
2876 2877 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2877 2878
2878 2879 if len(tombstone) > self.rawsize(censorrev):
2879 2880 raise error.Abort(
2880 2881 _(b'censor tombstone must be no longer than censored data')
2881 2882 )
2882 2883
2883 2884 # Rewriting the revlog in place is hard. Our strategy for censoring is
2884 2885 # to create a new revlog, copy all revisions to it, then replace the
2885 2886 # revlogs on transaction close.
2886 2887
2887 2888 newindexfile = self.indexfile + b'.tmpcensored'
2888 2889 newdatafile = self.datafile + b'.tmpcensored'
2889 2890
2890 2891 # This is a bit dangerous. We could easily have a mismatch of state.
2891 2892 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2892 2893 newrl.version = self.version
2893 2894 newrl._generaldelta = self._generaldelta
2894 2895 newrl._io = self._io
2895 2896
2896 2897 for rev in self.revs():
2897 2898 node = self.node(rev)
2898 2899 p1, p2 = self.parents(node)
2899 2900
2900 2901 if rev == censorrev:
2901 2902 newrl.addrawrevision(
2902 2903 tombstone,
2903 2904 tr,
2904 2905 self.linkrev(censorrev),
2905 2906 p1,
2906 2907 p2,
2907 2908 censornode,
2908 2909 REVIDX_ISCENSORED,
2909 2910 )
2910 2911
2911 2912 if newrl.deltaparent(rev) != nullrev:
2912 2913 raise error.Abort(
2913 2914 _(
2914 2915 b'censored revision stored as delta; '
2915 2916 b'cannot censor'
2916 2917 ),
2917 2918 hint=_(
2918 2919 b'censoring of revlogs is not '
2919 2920 b'fully implemented; please report '
2920 2921 b'this bug'
2921 2922 ),
2922 2923 )
2923 2924 continue
2924 2925
2925 2926 if self.iscensored(rev):
2926 2927 if self.deltaparent(rev) != nullrev:
2927 2928 raise error.Abort(
2928 2929 _(
2929 2930 b'cannot censor due to censored '
2930 2931 b'revision having delta stored'
2931 2932 )
2932 2933 )
2933 2934 rawtext = self._chunk(rev)
2934 2935 else:
2935 2936 rawtext = self.rawdata(rev)
2936 2937
2937 2938 newrl.addrawrevision(
2938 2939 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2939 2940 )
2940 2941
2941 2942 tr.addbackup(self.indexfile, location=b'store')
2942 2943 if not self._inline:
2943 2944 tr.addbackup(self.datafile, location=b'store')
2944 2945
2945 2946 self.opener.rename(newrl.indexfile, self.indexfile)
2946 2947 if not self._inline:
2947 2948 self.opener.rename(newrl.datafile, self.datafile)
2948 2949
2949 2950 self.clearcaches()
2950 2951 self._loadindex()
2951 2952
2952 2953 def verifyintegrity(self, state):
2953 2954 """Verifies the integrity of the revlog.
2954 2955
2955 2956 Yields ``revlogproblem`` instances describing problems that are
2956 2957 found.
2957 2958 """
2958 2959 dd, di = self.checksize()
2959 2960 if dd:
2960 2961 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2961 2962 if di:
2962 2963 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2963 2964
2964 2965 version = self.version & 0xFFFF
2965 2966
2966 2967 # The verifier tells us what version revlog we should be.
2967 2968 if version != state[b'expectedversion']:
2968 2969 yield revlogproblem(
2969 2970 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2970 2971 % (self.indexfile, version, state[b'expectedversion'])
2971 2972 )
2972 2973
2973 2974 state[b'skipread'] = set()
2974 2975 state[b'safe_renamed'] = set()
2975 2976
2976 2977 for rev in self:
2977 2978 node = self.node(rev)
2978 2979
2979 2980 # Verify contents. 4 cases to care about:
2980 2981 #
2981 2982 # common: the most common case
2982 2983 # rename: with a rename
2983 2984 # meta: file content starts with b'\1\n', the metadata
2984 2985 # header defined in filelog.py, but without a rename
2985 2986 # ext: content stored externally
2986 2987 #
2987 2988 # More formally, their differences are shown below:
2988 2989 #
2989 2990 # | common | rename | meta | ext
2990 2991 # -------------------------------------------------------
2991 2992 # flags() | 0 | 0 | 0 | not 0
2992 2993 # renamed() | False | True | False | ?
2993 2994 # rawtext[0:2]=='\1\n'| False | True | True | ?
2994 2995 #
2995 2996 # "rawtext" means the raw text stored in revlog data, which
2996 2997 # could be retrieved by "rawdata(rev)". "text"
2997 2998 # mentioned below is "revision(rev)".
2998 2999 #
2999 3000 # There are 3 different lengths stored physically:
3000 3001 # 1. L1: rawsize, stored in revlog index
3001 3002 # 2. L2: len(rawtext), stored in revlog data
3002 3003 # 3. L3: len(text), stored in revlog data if flags==0, or
3003 3004 # possibly somewhere else if flags!=0
3004 3005 #
3005 3006 # L1 should be equal to L2. L3 could be different from them.
3006 3007 # "text" may or may not affect commit hash depending on flag
3007 3008 # processors (see flagutil.addflagprocessor).
3008 3009 #
3009 3010 # | common | rename | meta | ext
3010 3011 # -------------------------------------------------
3011 3012 # rawsize() | L1 | L1 | L1 | L1
3012 3013 # size() | L1 | L2-LM | L1(*) | L1 (?)
3013 3014 # len(rawtext) | L2 | L2 | L2 | L2
3014 3015 # len(text) | L2 | L2 | L2 | L3
3015 3016 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3016 3017 #
3017 3018 # LM: length of metadata, depending on rawtext
3018 3019 # (*): not ideal, see comment in filelog.size
3019 3020 # (?): could be "- len(meta)" if the resolved content has
3020 3021 # rename metadata
3021 3022 #
3022 3023 # Checks needed to be done:
3023 3024 # 1. length check: L1 == L2, in all cases.
3024 3025 # 2. hash check: depending on flag processor, we may need to
3025 3026 # use either "text" (external), or "rawtext" (in revlog).
3026 3027
3027 3028 try:
3028 3029 skipflags = state.get(b'skipflags', 0)
3029 3030 if skipflags:
3030 3031 skipflags &= self.flags(rev)
3031 3032
3032 3033 _verify_revision(self, skipflags, state, node)
3033 3034
3034 3035 l1 = self.rawsize(rev)
3035 3036 l2 = len(self.rawdata(node))
3036 3037
3037 3038 if l1 != l2:
3038 3039 yield revlogproblem(
3039 3040 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3040 3041 node=node,
3041 3042 )
3042 3043
3043 3044 except error.CensoredNodeError:
3044 3045 if state[b'erroroncensored']:
3045 3046 yield revlogproblem(
3046 3047 error=_(b'censored file data'), node=node
3047 3048 )
3048 3049 state[b'skipread'].add(node)
3049 3050 except Exception as e:
3050 3051 yield revlogproblem(
3051 3052 error=_(b'unpacking %s: %s')
3052 3053 % (short(node), stringutil.forcebytestr(e)),
3053 3054 node=node,
3054 3055 )
3055 3056 state[b'skipread'].add(node)
3056 3057
3057 3058 def storageinfo(
3058 3059 self,
3059 3060 exclusivefiles=False,
3060 3061 sharedfiles=False,
3061 3062 revisionscount=False,
3062 3063 trackedsize=False,
3063 3064 storedsize=False,
3064 3065 ):
3065 3066 d = {}
3066 3067
3067 3068 if exclusivefiles:
3068 3069 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3069 3070 if not self._inline:
3070 3071 d[b'exclusivefiles'].append((self.opener, self.datafile))
3071 3072
3072 3073 if sharedfiles:
3073 3074 d[b'sharedfiles'] = []
3074 3075
3075 3076 if revisionscount:
3076 3077 d[b'revisionscount'] = len(self)
3077 3078
3078 3079 if trackedsize:
3079 3080 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3080 3081
3081 3082 if storedsize:
3082 3083 d[b'storedsize'] = sum(
3083 3084 self.opener.stat(path).st_size for path in self.files()
3084 3085 )
3085 3086
3086 3087 return d
@@ -1,1448 +1,1450 b''
1 1 # storage.py - Testing of storage primitives.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import unittest
11 11
12 12 from ..node import (
13 13 hex,
14 14 nullid,
15 15 nullrev,
16 16 )
17 17 from ..pycompat import getattr
18 18 from .. import (
19 19 error,
20 20 mdiff,
21 21 )
22 22 from ..interfaces import repository
23 23 from ..utils import storageutil
24 24
25 25
26 26 class basetestcase(unittest.TestCase):
27 27 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
28 28 assertRaisesRegex = ( # camelcase-required
29 29 unittest.TestCase.assertRaisesRegexp
30 30 )
31 31
32 32
33 33 class ifileindextests(basetestcase):
34 34 """Generic tests for the ifileindex interface.
35 35
36 36 All file storage backends for index data should conform to the tests in this
37 37 class.
38 38
39 39 Use ``makeifileindextests()`` to create an instance of this type.
40 40 """
41 41
42 42 def testempty(self):
43 43 f = self._makefilefn()
44 44 self.assertEqual(len(f), 0, b'new file store has 0 length by default')
45 45 self.assertEqual(list(f), [], b'iter yields nothing by default')
46 46
47 47 gen = iter(f)
48 48 with self.assertRaises(StopIteration):
49 49 next(gen)
50 50
51 51 self.assertFalse(f.hasnode(None))
52 52 self.assertFalse(f.hasnode(0))
53 53 self.assertFalse(f.hasnode(nullrev))
54 54 self.assertFalse(f.hasnode(nullid))
55 55 self.assertFalse(f.hasnode(b'0'))
56 56 self.assertFalse(f.hasnode(b'a' * 20))
57 57
58 58 # revs() should evaluate to an empty list.
59 59 self.assertEqual(list(f.revs()), [])
60 60
61 61 revs = iter(f.revs())
62 62 with self.assertRaises(StopIteration):
63 63 next(revs)
64 64
65 65 self.assertEqual(list(f.revs(start=20)), [])
66 66
67 67 # parents() and parentrevs() work with nullid/nullrev.
68 68 self.assertEqual(f.parents(nullid), (nullid, nullid))
69 69 self.assertEqual(f.parentrevs(nullrev), (nullrev, nullrev))
70 70
71 71 with self.assertRaises(error.LookupError):
72 72 f.parents(b'\x01' * 20)
73 73
74 74 for i in range(-5, 5):
75 75 if i == nullrev:
76 76 continue
77 77
78 78 with self.assertRaises(IndexError):
79 79 f.parentrevs(i)
80 80
81 81 # nullid/nullrev lookup always works.
82 82 self.assertEqual(f.rev(nullid), nullrev)
83 83 self.assertEqual(f.node(nullrev), nullid)
84 84
85 85 with self.assertRaises(error.LookupError):
86 86 f.rev(b'\x01' * 20)
87 87
88 88 for i in range(-5, 5):
89 89 if i == nullrev:
90 90 continue
91 91
92 92 with self.assertRaises(IndexError):
93 93 f.node(i)
94 94
95 95 self.assertEqual(f.lookup(nullid), nullid)
96 96 self.assertEqual(f.lookup(nullrev), nullid)
97 97 self.assertEqual(f.lookup(hex(nullid)), nullid)
98 98 self.assertEqual(f.lookup(b'%d' % nullrev), nullid)
99 99
100 100 with self.assertRaises(error.LookupError):
101 101 f.lookup(b'badvalue')
102 102
103 103 with self.assertRaises(error.LookupError):
104 104 f.lookup(hex(nullid)[0:12])
105 105
106 106 with self.assertRaises(error.LookupError):
107 107 f.lookup(b'-2')
108 108
109 109 with self.assertRaises(error.LookupError):
110 110 f.lookup(b'0')
111 111
112 112 with self.assertRaises(error.LookupError):
113 113 f.lookup(b'1')
114 114
115 115 with self.assertRaises(error.LookupError):
116 116 f.lookup(b'11111111111111111111111111111111111111')
117 117
118 118 for i in range(-5, 5):
119 119 if i == nullrev:
120 120 continue
121 121
122 122 with self.assertRaises(LookupError):
123 123 f.lookup(i)
124 124
125 125 self.assertEqual(f.linkrev(nullrev), nullrev)
126 126
127 127 for i in range(-5, 5):
128 128 if i == nullrev:
129 129 continue
130 130
131 131 with self.assertRaises(IndexError):
132 132 f.linkrev(i)
133 133
134 134 self.assertFalse(f.iscensored(nullrev))
135 135
136 136 for i in range(-5, 5):
137 137 if i == nullrev:
138 138 continue
139 139
140 140 with self.assertRaises(IndexError):
141 141 f.iscensored(i)
142 142
143 143 self.assertEqual(list(f.commonancestorsheads(nullid, nullid)), [])
144 144
145 145 with self.assertRaises(ValueError):
146 146 self.assertEqual(list(f.descendants([])), [])
147 147
148 148 self.assertEqual(list(f.descendants([nullrev])), [])
149 149
150 150 self.assertEqual(f.heads(), [nullid])
151 151 self.assertEqual(f.heads(nullid), [nullid])
152 152 self.assertEqual(f.heads(None, [nullid]), [nullid])
153 153 self.assertEqual(f.heads(nullid, [nullid]), [nullid])
154 154
155 155 self.assertEqual(f.children(nullid), [])
156 156
157 157 with self.assertRaises(error.LookupError):
158 158 f.children(b'\x01' * 20)
159 159
160 160 def testsinglerevision(self):
161 161 f = self._makefilefn()
162 162 with self._maketransactionfn() as tr:
163 163 node = f.add(b'initial', None, tr, 0, nullid, nullid)
164 164
165 165 self.assertEqual(len(f), 1)
166 166 self.assertEqual(list(f), [0])
167 167
168 168 gen = iter(f)
169 169 self.assertEqual(next(gen), 0)
170 170
171 171 with self.assertRaises(StopIteration):
172 172 next(gen)
173 173
174 174 self.assertTrue(f.hasnode(node))
175 175 self.assertFalse(f.hasnode(hex(node)))
176 176 self.assertFalse(f.hasnode(nullrev))
177 177 self.assertFalse(f.hasnode(nullid))
178 178 self.assertFalse(f.hasnode(node[0:12]))
179 179 self.assertFalse(f.hasnode(hex(node)[0:20]))
180 180
181 181 self.assertEqual(list(f.revs()), [0])
182 182 self.assertEqual(list(f.revs(start=1)), [])
183 183 self.assertEqual(list(f.revs(start=0)), [0])
184 184 self.assertEqual(list(f.revs(stop=0)), [0])
185 185 self.assertEqual(list(f.revs(stop=1)), [0])
186 186 self.assertEqual(list(f.revs(1, 1)), [])
187 187 # TODO buggy
188 188 self.assertEqual(list(f.revs(1, 0)), [1, 0])
189 189 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
190 190
191 191 self.assertEqual(f.parents(node), (nullid, nullid))
192 192 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
193 193
194 194 with self.assertRaises(error.LookupError):
195 195 f.parents(b'\x01' * 20)
196 196
197 197 with self.assertRaises(IndexError):
198 198 f.parentrevs(1)
199 199
200 200 self.assertEqual(f.rev(node), 0)
201 201
202 202 with self.assertRaises(error.LookupError):
203 203 f.rev(b'\x01' * 20)
204 204
205 205 self.assertEqual(f.node(0), node)
206 206
207 207 with self.assertRaises(IndexError):
208 208 f.node(1)
209 209
210 210 self.assertEqual(f.lookup(node), node)
211 211 self.assertEqual(f.lookup(0), node)
212 212 self.assertEqual(f.lookup(-1), nullid)
213 213 self.assertEqual(f.lookup(b'0'), node)
214 214 self.assertEqual(f.lookup(hex(node)), node)
215 215
216 216 with self.assertRaises(error.LookupError):
217 217 f.lookup(hex(node)[0:12])
218 218
219 219 with self.assertRaises(error.LookupError):
220 220 f.lookup(-2)
221 221
222 222 with self.assertRaises(error.LookupError):
223 223 f.lookup(b'-2')
224 224
225 225 with self.assertRaises(error.LookupError):
226 226 f.lookup(1)
227 227
228 228 with self.assertRaises(error.LookupError):
229 229 f.lookup(b'1')
230 230
231 231 self.assertEqual(f.linkrev(0), 0)
232 232
233 233 with self.assertRaises(IndexError):
234 234 f.linkrev(1)
235 235
236 236 self.assertFalse(f.iscensored(0))
237 237
238 238 with self.assertRaises(IndexError):
239 239 f.iscensored(1)
240 240
241 241 self.assertEqual(list(f.descendants([0])), [])
242 242
243 243 self.assertEqual(f.heads(), [node])
244 244 self.assertEqual(f.heads(node), [node])
245 245 self.assertEqual(f.heads(stop=[node]), [node])
246 246
247 247 with self.assertRaises(error.LookupError):
248 248 f.heads(stop=[b'\x01' * 20])
249 249
250 250 self.assertEqual(f.children(node), [])
251 251
252 252 def testmultiplerevisions(self):
253 253 fulltext0 = b'x' * 1024
254 254 fulltext1 = fulltext0 + b'y'
255 255 fulltext2 = b'y' + fulltext0 + b'z'
256 256
257 257 f = self._makefilefn()
258 258 with self._maketransactionfn() as tr:
259 259 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
260 260 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
261 261 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
262 262
263 263 self.assertEqual(len(f), 3)
264 264 self.assertEqual(list(f), [0, 1, 2])
265 265
266 266 gen = iter(f)
267 267 self.assertEqual(next(gen), 0)
268 268 self.assertEqual(next(gen), 1)
269 269 self.assertEqual(next(gen), 2)
270 270
271 271 with self.assertRaises(StopIteration):
272 272 next(gen)
273 273
274 274 self.assertEqual(list(f.revs()), [0, 1, 2])
275 275 self.assertEqual(list(f.revs(0)), [0, 1, 2])
276 276 self.assertEqual(list(f.revs(1)), [1, 2])
277 277 self.assertEqual(list(f.revs(2)), [2])
278 278 self.assertEqual(list(f.revs(3)), [])
279 279 self.assertEqual(list(f.revs(stop=1)), [0, 1])
280 280 self.assertEqual(list(f.revs(stop=2)), [0, 1, 2])
281 281 self.assertEqual(list(f.revs(stop=3)), [0, 1, 2])
282 282 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
283 283 self.assertEqual(list(f.revs(2, 1)), [2, 1])
284 284 # TODO this is wrong
285 285 self.assertEqual(list(f.revs(3, 2)), [3, 2])
286 286
287 287 self.assertEqual(f.parents(node0), (nullid, nullid))
288 288 self.assertEqual(f.parents(node1), (node0, nullid))
289 289 self.assertEqual(f.parents(node2), (node1, nullid))
290 290
291 291 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
292 292 self.assertEqual(f.parentrevs(1), (0, nullrev))
293 293 self.assertEqual(f.parentrevs(2), (1, nullrev))
294 294
295 295 self.assertEqual(f.rev(node0), 0)
296 296 self.assertEqual(f.rev(node1), 1)
297 297 self.assertEqual(f.rev(node2), 2)
298 298
299 299 with self.assertRaises(error.LookupError):
300 300 f.rev(b'\x01' * 20)
301 301
302 302 self.assertEqual(f.node(0), node0)
303 303 self.assertEqual(f.node(1), node1)
304 304 self.assertEqual(f.node(2), node2)
305 305
306 306 with self.assertRaises(IndexError):
307 307 f.node(3)
308 308
309 309 self.assertEqual(f.lookup(node0), node0)
310 310 self.assertEqual(f.lookup(0), node0)
311 311 self.assertEqual(f.lookup(b'0'), node0)
312 312 self.assertEqual(f.lookup(hex(node0)), node0)
313 313
314 314 self.assertEqual(f.lookup(node1), node1)
315 315 self.assertEqual(f.lookup(1), node1)
316 316 self.assertEqual(f.lookup(b'1'), node1)
317 317 self.assertEqual(f.lookup(hex(node1)), node1)
318 318
319 319 self.assertEqual(f.linkrev(0), 0)
320 320 self.assertEqual(f.linkrev(1), 1)
321 321 self.assertEqual(f.linkrev(2), 3)
322 322
323 323 with self.assertRaises(IndexError):
324 324 f.linkrev(3)
325 325
326 326 self.assertFalse(f.iscensored(0))
327 327 self.assertFalse(f.iscensored(1))
328 328 self.assertFalse(f.iscensored(2))
329 329
330 330 with self.assertRaises(IndexError):
331 331 f.iscensored(3)
332 332
333 333 self.assertEqual(f.commonancestorsheads(node1, nullid), [])
334 334 self.assertEqual(f.commonancestorsheads(node1, node0), [node0])
335 335 self.assertEqual(f.commonancestorsheads(node1, node1), [node1])
336 336 self.assertEqual(f.commonancestorsheads(node0, node1), [node0])
337 337 self.assertEqual(f.commonancestorsheads(node1, node2), [node1])
338 338 self.assertEqual(f.commonancestorsheads(node2, node1), [node1])
339 339
340 340 self.assertEqual(list(f.descendants([0])), [1, 2])
341 341 self.assertEqual(list(f.descendants([1])), [2])
342 342 self.assertEqual(list(f.descendants([0, 1])), [1, 2])
343 343
344 344 self.assertEqual(f.heads(), [node2])
345 345 self.assertEqual(f.heads(node0), [node2])
346 346 self.assertEqual(f.heads(node1), [node2])
347 347 self.assertEqual(f.heads(node2), [node2])
348 348
349 349 # TODO this behavior seems wonky. Is it correct? If so, the
350 350 # docstring for heads() should be updated to reflect desired
351 351 # behavior.
352 352 self.assertEqual(f.heads(stop=[node1]), [node1, node2])
353 353 self.assertEqual(f.heads(stop=[node0]), [node0, node2])
354 354 self.assertEqual(f.heads(stop=[node1, node2]), [node1, node2])
355 355
356 356 with self.assertRaises(error.LookupError):
357 357 f.heads(stop=[b'\x01' * 20])
358 358
359 359 self.assertEqual(f.children(node0), [node1])
360 360 self.assertEqual(f.children(node1), [node2])
361 361 self.assertEqual(f.children(node2), [])
362 362
363 363 def testmultipleheads(self):
364 364 f = self._makefilefn()
365 365
366 366 with self._maketransactionfn() as tr:
367 367 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
368 368 node1 = f.add(b'1', None, tr, 1, node0, nullid)
369 369 node2 = f.add(b'2', None, tr, 2, node1, nullid)
370 370 node3 = f.add(b'3', None, tr, 3, node0, nullid)
371 371 node4 = f.add(b'4', None, tr, 4, node3, nullid)
372 372 node5 = f.add(b'5', None, tr, 5, node0, nullid)
373 373
374 374 self.assertEqual(len(f), 6)
375 375
376 376 self.assertEqual(list(f.descendants([0])), [1, 2, 3, 4, 5])
377 377 self.assertEqual(list(f.descendants([1])), [2])
378 378 self.assertEqual(list(f.descendants([2])), [])
379 379 self.assertEqual(list(f.descendants([3])), [4])
380 380 self.assertEqual(list(f.descendants([0, 1])), [1, 2, 3, 4, 5])
381 381 self.assertEqual(list(f.descendants([1, 3])), [2, 4])
382 382
383 383 self.assertEqual(f.heads(), [node2, node4, node5])
384 384 self.assertEqual(f.heads(node0), [node2, node4, node5])
385 385 self.assertEqual(f.heads(node1), [node2])
386 386 self.assertEqual(f.heads(node2), [node2])
387 387 self.assertEqual(f.heads(node3), [node4])
388 388 self.assertEqual(f.heads(node4), [node4])
389 389 self.assertEqual(f.heads(node5), [node5])
390 390
391 391 # TODO this seems wrong.
392 392 self.assertEqual(f.heads(stop=[node0]), [node0, node2, node4, node5])
393 393 self.assertEqual(f.heads(stop=[node1]), [node1, node2, node4, node5])
394 394
395 395 self.assertEqual(f.children(node0), [node1, node3, node5])
396 396 self.assertEqual(f.children(node1), [node2])
397 397 self.assertEqual(f.children(node2), [])
398 398 self.assertEqual(f.children(node3), [node4])
399 399 self.assertEqual(f.children(node4), [])
400 400 self.assertEqual(f.children(node5), [])
401 401
402 402
403 403 class ifiledatatests(basetestcase):
404 404 """Generic tests for the ifiledata interface.
405 405
406 406 All file storage backends for data should conform to the tests in this
407 407 class.
408 408
409 409 Use ``makeifiledatatests()`` to create an instance of this type.
410 410 """
411 411
412 412 def testempty(self):
413 413 f = self._makefilefn()
414 414
415 415 self.assertEqual(f.storageinfo(), {})
416 416 self.assertEqual(
417 417 f.storageinfo(revisionscount=True, trackedsize=True),
418 418 {b'revisionscount': 0, b'trackedsize': 0},
419 419 )
420 420
421 421 self.assertEqual(f.size(nullrev), 0)
422 422
423 423 for i in range(-5, 5):
424 424 if i == nullrev:
425 425 continue
426 426
427 427 with self.assertRaises(IndexError):
428 428 f.size(i)
429 429
430 430 self.assertEqual(f.revision(nullid), b'')
431 431 self.assertEqual(f.rawdata(nullid), b'')
432 432
433 433 with self.assertRaises(error.LookupError):
434 434 f.revision(b'\x01' * 20)
435 435
436 436 self.assertEqual(f.read(nullid), b'')
437 437
438 438 with self.assertRaises(error.LookupError):
439 439 f.read(b'\x01' * 20)
440 440
441 441 self.assertFalse(f.renamed(nullid))
442 442
443 443 with self.assertRaises(error.LookupError):
444 444 f.read(b'\x01' * 20)
445 445
446 446 self.assertTrue(f.cmp(nullid, b''))
447 447 self.assertTrue(f.cmp(nullid, b'foo'))
448 448
449 449 with self.assertRaises(error.LookupError):
450 450 f.cmp(b'\x01' * 20, b'irrelevant')
451 451
452 452 # Emitting empty list is an empty generator.
453 453 gen = f.emitrevisions([])
454 454 with self.assertRaises(StopIteration):
455 455 next(gen)
456 456
457 457 # Emitting null node yields nothing.
458 458 gen = f.emitrevisions([nullid])
459 459 with self.assertRaises(StopIteration):
460 460 next(gen)
461 461
462 462 # Requesting unknown node fails.
463 463 with self.assertRaises(error.LookupError):
464 464 list(f.emitrevisions([b'\x01' * 20]))
465 465
466 466 def testsinglerevision(self):
467 467 fulltext = b'initial'
468 468
469 469 f = self._makefilefn()
470 470 with self._maketransactionfn() as tr:
471 471 node = f.add(fulltext, None, tr, 0, nullid, nullid)
472 472
473 473 self.assertEqual(f.storageinfo(), {})
474 474 self.assertEqual(
475 475 f.storageinfo(revisionscount=True, trackedsize=True),
476 476 {b'revisionscount': 1, b'trackedsize': len(fulltext)},
477 477 )
478 478
479 479 self.assertEqual(f.size(0), len(fulltext))
480 480
481 481 with self.assertRaises(IndexError):
482 482 f.size(1)
483 483
484 484 self.assertEqual(f.revision(node), fulltext)
485 485 self.assertEqual(f.rawdata(node), fulltext)
486 486
487 487 self.assertEqual(f.read(node), fulltext)
488 488
489 489 self.assertFalse(f.renamed(node))
490 490
491 491 self.assertFalse(f.cmp(node, fulltext))
492 492 self.assertTrue(f.cmp(node, fulltext + b'extra'))
493 493
494 494 # Emitting a single revision works.
495 495 gen = f.emitrevisions([node])
496 496 rev = next(gen)
497 497
498 498 self.assertEqual(rev.node, node)
499 499 self.assertEqual(rev.p1node, nullid)
500 500 self.assertEqual(rev.p2node, nullid)
501 501 self.assertIsNone(rev.linknode)
502 502 self.assertEqual(rev.basenode, nullid)
503 503 self.assertIsNone(rev.baserevisionsize)
504 504 self.assertIsNone(rev.revision)
505 505 self.assertIsNone(rev.delta)
506 506
507 507 with self.assertRaises(StopIteration):
508 508 next(gen)
509 509
510 510 # Requesting revision data works.
511 511 gen = f.emitrevisions([node], revisiondata=True)
512 512 rev = next(gen)
513 513
514 514 self.assertEqual(rev.node, node)
515 515 self.assertEqual(rev.p1node, nullid)
516 516 self.assertEqual(rev.p2node, nullid)
517 517 self.assertIsNone(rev.linknode)
518 518 self.assertEqual(rev.basenode, nullid)
519 519 self.assertIsNone(rev.baserevisionsize)
520 520 self.assertEqual(rev.revision, fulltext)
521 521 self.assertIsNone(rev.delta)
522 522
523 523 with self.assertRaises(StopIteration):
524 524 next(gen)
525 525
526 526 # Emitting an unknown node after a known revision results in error.
527 527 with self.assertRaises(error.LookupError):
528 528 list(f.emitrevisions([node, b'\x01' * 20]))
529 529
530 530 def testmultiplerevisions(self):
531 531 fulltext0 = b'x' * 1024
532 532 fulltext1 = fulltext0 + b'y'
533 533 fulltext2 = b'y' + fulltext0 + b'z'
534 534
535 535 f = self._makefilefn()
536 536 with self._maketransactionfn() as tr:
537 537 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
538 538 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
539 539 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
540 540
541 541 self.assertEqual(f.storageinfo(), {})
542 542 self.assertEqual(
543 543 f.storageinfo(revisionscount=True, trackedsize=True),
544 544 {
545 545 b'revisionscount': 3,
546 546 b'trackedsize': len(fulltext0)
547 547 + len(fulltext1)
548 548 + len(fulltext2),
549 549 },
550 550 )
551 551
552 552 self.assertEqual(f.size(0), len(fulltext0))
553 553 self.assertEqual(f.size(1), len(fulltext1))
554 554 self.assertEqual(f.size(2), len(fulltext2))
555 555
556 556 with self.assertRaises(IndexError):
557 557 f.size(3)
558 558
559 559 self.assertEqual(f.revision(node0), fulltext0)
560 560 self.assertEqual(f.rawdata(node0), fulltext0)
561 561 self.assertEqual(f.revision(node1), fulltext1)
562 562 self.assertEqual(f.rawdata(node1), fulltext1)
563 563 self.assertEqual(f.revision(node2), fulltext2)
564 564 self.assertEqual(f.rawdata(node2), fulltext2)
565 565
566 566 with self.assertRaises(error.LookupError):
567 567 f.revision(b'\x01' * 20)
568 568
569 569 self.assertEqual(f.read(node0), fulltext0)
570 570 self.assertEqual(f.read(node1), fulltext1)
571 571 self.assertEqual(f.read(node2), fulltext2)
572 572
573 573 with self.assertRaises(error.LookupError):
574 574 f.read(b'\x01' * 20)
575 575
576 576 self.assertFalse(f.renamed(node0))
577 577 self.assertFalse(f.renamed(node1))
578 578 self.assertFalse(f.renamed(node2))
579 579
580 580 with self.assertRaises(error.LookupError):
581 581 f.renamed(b'\x01' * 20)
582 582
583 583 self.assertFalse(f.cmp(node0, fulltext0))
584 584 self.assertFalse(f.cmp(node1, fulltext1))
585 585 self.assertFalse(f.cmp(node2, fulltext2))
586 586
587 587 self.assertTrue(f.cmp(node1, fulltext0))
588 588 self.assertTrue(f.cmp(node2, fulltext1))
589 589
590 590 with self.assertRaises(error.LookupError):
591 591 f.cmp(b'\x01' * 20, b'irrelevant')
592 592
593 593 # Nodes should be emitted in order.
594 594 gen = f.emitrevisions([node0, node1, node2], revisiondata=True)
595 595
596 596 rev = next(gen)
597 597
598 598 self.assertEqual(rev.node, node0)
599 599 self.assertEqual(rev.p1node, nullid)
600 600 self.assertEqual(rev.p2node, nullid)
601 601 self.assertIsNone(rev.linknode)
602 602 self.assertEqual(rev.basenode, nullid)
603 603 self.assertIsNone(rev.baserevisionsize)
604 604 self.assertEqual(rev.revision, fulltext0)
605 605 self.assertIsNone(rev.delta)
606 606
607 607 rev = next(gen)
608 608
609 609 self.assertEqual(rev.node, node1)
610 610 self.assertEqual(rev.p1node, node0)
611 611 self.assertEqual(rev.p2node, nullid)
612 612 self.assertIsNone(rev.linknode)
613 613 self.assertEqual(rev.basenode, node0)
614 614 self.assertIsNone(rev.baserevisionsize)
615 615 self.assertIsNone(rev.revision)
616 616 self.assertEqual(
617 617 rev.delta,
618 618 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
619 619 )
620 620
621 621 rev = next(gen)
622 622
623 623 self.assertEqual(rev.node, node2)
624 624 self.assertEqual(rev.p1node, node1)
625 625 self.assertEqual(rev.p2node, nullid)
626 626 self.assertIsNone(rev.linknode)
627 627 self.assertEqual(rev.basenode, node1)
628 628 self.assertIsNone(rev.baserevisionsize)
629 629 self.assertIsNone(rev.revision)
630 630 self.assertEqual(
631 631 rev.delta,
632 632 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
633 633 )
634 634
635 635 with self.assertRaises(StopIteration):
636 636 next(gen)
637 637
638 638 # Request not in DAG order is reordered to be in DAG order.
639 639 gen = f.emitrevisions([node2, node1, node0], revisiondata=True)
640 640
641 641 rev = next(gen)
642 642
643 643 self.assertEqual(rev.node, node0)
644 644 self.assertEqual(rev.p1node, nullid)
645 645 self.assertEqual(rev.p2node, nullid)
646 646 self.assertIsNone(rev.linknode)
647 647 self.assertEqual(rev.basenode, nullid)
648 648 self.assertIsNone(rev.baserevisionsize)
649 649 self.assertEqual(rev.revision, fulltext0)
650 650 self.assertIsNone(rev.delta)
651 651
652 652 rev = next(gen)
653 653
654 654 self.assertEqual(rev.node, node1)
655 655 self.assertEqual(rev.p1node, node0)
656 656 self.assertEqual(rev.p2node, nullid)
657 657 self.assertIsNone(rev.linknode)
658 658 self.assertEqual(rev.basenode, node0)
659 659 self.assertIsNone(rev.baserevisionsize)
660 660 self.assertIsNone(rev.revision)
661 661 self.assertEqual(
662 662 rev.delta,
663 663 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
664 664 )
665 665
666 666 rev = next(gen)
667 667
668 668 self.assertEqual(rev.node, node2)
669 669 self.assertEqual(rev.p1node, node1)
670 670 self.assertEqual(rev.p2node, nullid)
671 671 self.assertIsNone(rev.linknode)
672 672 self.assertEqual(rev.basenode, node1)
673 673 self.assertIsNone(rev.baserevisionsize)
674 674 self.assertIsNone(rev.revision)
675 675 self.assertEqual(
676 676 rev.delta,
677 677 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
678 678 )
679 679
680 680 with self.assertRaises(StopIteration):
681 681 next(gen)
682 682
683 683 # Unrecognized nodesorder value raises ProgrammingError.
684 684 with self.assertRaises(error.ProgrammingError):
685 685 list(f.emitrevisions([], nodesorder=b'bad'))
686 686
687 687 # nodesorder=storage is recognized. But we can't test it thoroughly
688 688 # because behavior is storage-dependent.
689 689 res = list(
690 690 f.emitrevisions([node2, node1, node0], nodesorder=b'storage')
691 691 )
692 692 self.assertEqual(len(res), 3)
693 693 self.assertEqual({o.node for o in res}, {node0, node1, node2})
694 694
695 695 # nodesorder=nodes forces the order.
696 696 gen = f.emitrevisions(
697 697 [node2, node0], nodesorder=b'nodes', revisiondata=True
698 698 )
699 699
700 700 rev = next(gen)
701 701 self.assertEqual(rev.node, node2)
702 702 self.assertEqual(rev.p1node, node1)
703 703 self.assertEqual(rev.p2node, nullid)
704 704 self.assertEqual(rev.basenode, nullid)
705 705 self.assertIsNone(rev.baserevisionsize)
706 706 self.assertEqual(rev.revision, fulltext2)
707 707 self.assertIsNone(rev.delta)
708 708
709 709 rev = next(gen)
710 710 self.assertEqual(rev.node, node0)
711 711 self.assertEqual(rev.p1node, nullid)
712 712 self.assertEqual(rev.p2node, nullid)
713 713 # Delta behavior is storage dependent, so we can't easily test it.
714 714
715 715 with self.assertRaises(StopIteration):
716 716 next(gen)
717 717
718 718 # assumehaveparentrevisions=False (the default) won't send a delta for
719 719 # the first revision.
720 720 gen = f.emitrevisions({node2, node1}, revisiondata=True)
721 721
722 722 rev = next(gen)
723 723 self.assertEqual(rev.node, node1)
724 724 self.assertEqual(rev.p1node, node0)
725 725 self.assertEqual(rev.p2node, nullid)
726 726 self.assertEqual(rev.basenode, nullid)
727 727 self.assertIsNone(rev.baserevisionsize)
728 728 self.assertEqual(rev.revision, fulltext1)
729 729 self.assertIsNone(rev.delta)
730 730
731 731 rev = next(gen)
732 732 self.assertEqual(rev.node, node2)
733 733 self.assertEqual(rev.p1node, node1)
734 734 self.assertEqual(rev.p2node, nullid)
735 735 self.assertEqual(rev.basenode, node1)
736 736 self.assertIsNone(rev.baserevisionsize)
737 737 self.assertIsNone(rev.revision)
738 738 self.assertEqual(
739 739 rev.delta,
740 740 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
741 741 )
742 742
743 743 with self.assertRaises(StopIteration):
744 744 next(gen)
745 745
746 746 # assumehaveparentrevisions=True allows delta against initial revision.
747 747 gen = f.emitrevisions(
748 748 [node2, node1], revisiondata=True, assumehaveparentrevisions=True
749 749 )
750 750
751 751 rev = next(gen)
752 752 self.assertEqual(rev.node, node1)
753 753 self.assertEqual(rev.p1node, node0)
754 754 self.assertEqual(rev.p2node, nullid)
755 755 self.assertEqual(rev.basenode, node0)
756 756 self.assertIsNone(rev.baserevisionsize)
757 757 self.assertIsNone(rev.revision)
758 758 self.assertEqual(
759 759 rev.delta,
760 760 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
761 761 )
762 762
763 763 # forceprevious=True forces a delta against the previous revision.
764 764 # Special case for initial revision.
765 765 gen = f.emitrevisions(
766 766 [node0], revisiondata=True, deltamode=repository.CG_DELTAMODE_PREV
767 767 )
768 768
769 769 rev = next(gen)
770 770 self.assertEqual(rev.node, node0)
771 771 self.assertEqual(rev.p1node, nullid)
772 772 self.assertEqual(rev.p2node, nullid)
773 773 self.assertEqual(rev.basenode, nullid)
774 774 self.assertIsNone(rev.baserevisionsize)
775 775 self.assertIsNone(rev.revision)
776 776 self.assertEqual(
777 777 rev.delta,
778 778 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
779 779 )
780 780
781 781 with self.assertRaises(StopIteration):
782 782 next(gen)
783 783
784 784 gen = f.emitrevisions(
785 785 [node0, node2],
786 786 revisiondata=True,
787 787 deltamode=repository.CG_DELTAMODE_PREV,
788 788 )
789 789
790 790 rev = next(gen)
791 791 self.assertEqual(rev.node, node0)
792 792 self.assertEqual(rev.p1node, nullid)
793 793 self.assertEqual(rev.p2node, nullid)
794 794 self.assertEqual(rev.basenode, nullid)
795 795 self.assertIsNone(rev.baserevisionsize)
796 796 self.assertIsNone(rev.revision)
797 797 self.assertEqual(
798 798 rev.delta,
799 799 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
800 800 )
801 801
802 802 rev = next(gen)
803 803 self.assertEqual(rev.node, node2)
804 804 self.assertEqual(rev.p1node, node1)
805 805 self.assertEqual(rev.p2node, nullid)
806 806 self.assertEqual(rev.basenode, node0)
807 807
808 808 with self.assertRaises(StopIteration):
809 809 next(gen)
810 810
811 811 def testrenamed(self):
812 812 fulltext0 = b'foo'
813 813 fulltext1 = b'bar'
814 814 fulltext2 = b'baz'
815 815
816 816 meta1 = {
817 817 b'copy': b'source0',
818 818 b'copyrev': b'a' * 40,
819 819 }
820 820
821 821 meta2 = {
822 822 b'copy': b'source1',
823 823 b'copyrev': b'b' * 40,
824 824 }
825 825
826 826 stored1 = b''.join(
827 827 [
828 828 b'\x01\ncopy: source0\n',
829 829 b'copyrev: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\x01\n',
830 830 fulltext1,
831 831 ]
832 832 )
833 833
834 834 stored2 = b''.join(
835 835 [
836 836 b'\x01\ncopy: source1\n',
837 837 b'copyrev: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\x01\n',
838 838 fulltext2,
839 839 ]
840 840 )
841 841
842 842 f = self._makefilefn()
843 843 with self._maketransactionfn() as tr:
844 844 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
845 845 node1 = f.add(fulltext1, meta1, tr, 1, node0, nullid)
846 846 node2 = f.add(fulltext2, meta2, tr, 2, nullid, nullid)
847 847
848 848 # Metadata header isn't recognized when parent isn't nullid.
849 849 self.assertEqual(f.size(1), len(stored1))
850 850 self.assertEqual(f.size(2), len(fulltext2))
851 851
852 852 self.assertEqual(f.revision(node1), stored1)
853 853 self.assertEqual(f.rawdata(node1), stored1)
854 854 self.assertEqual(f.revision(node2), stored2)
855 855 self.assertEqual(f.rawdata(node2), stored2)
856 856
857 857 self.assertEqual(f.read(node1), fulltext1)
858 858 self.assertEqual(f.read(node2), fulltext2)
859 859
860 860 # Returns False when first parent is set.
861 861 self.assertFalse(f.renamed(node1))
862 862 self.assertEqual(f.renamed(node2), (b'source1', b'\xbb' * 20))
863 863
864 864 self.assertTrue(f.cmp(node1, fulltext1))
865 865 self.assertTrue(f.cmp(node1, stored1))
866 866 self.assertFalse(f.cmp(node2, fulltext2))
867 867 self.assertTrue(f.cmp(node2, stored2))
868 868
869 869 def testmetadataprefix(self):
870 870 # Content with metadata prefix has extra prefix inserted in storage.
871 871 fulltext0 = b'\x01\nfoo'
872 872 stored0 = b'\x01\n\x01\n\x01\nfoo'
873 873
874 874 fulltext1 = b'\x01\nbar'
875 875 meta1 = {
876 876 b'copy': b'source0',
877 877 b'copyrev': b'b' * 40,
878 878 }
879 879 stored1 = b''.join(
880 880 [
881 881 b'\x01\ncopy: source0\n',
882 882 b'copyrev: %s\n' % (b'b' * 40),
883 883 b'\x01\n\x01\nbar',
884 884 ]
885 885 )
886 886
887 887 f = self._makefilefn()
888 888 with self._maketransactionfn() as tr:
889 889 node0 = f.add(fulltext0, {}, tr, 0, nullid, nullid)
890 890 node1 = f.add(fulltext1, meta1, tr, 1, nullid, nullid)
891 891
892 892 # TODO this is buggy.
893 893 self.assertEqual(f.size(0), len(fulltext0) + 4)
894 894
895 895 self.assertEqual(f.size(1), len(fulltext1))
896 896
897 897 self.assertEqual(f.revision(node0), stored0)
898 898 self.assertEqual(f.rawdata(node0), stored0)
899 899
900 900 self.assertEqual(f.revision(node1), stored1)
901 901 self.assertEqual(f.rawdata(node1), stored1)
902 902
903 903 self.assertEqual(f.read(node0), fulltext0)
904 904 self.assertEqual(f.read(node1), fulltext1)
905 905
906 906 self.assertFalse(f.cmp(node0, fulltext0))
907 907 self.assertTrue(f.cmp(node0, stored0))
908 908
909 909 self.assertFalse(f.cmp(node1, fulltext1))
910 910 self.assertTrue(f.cmp(node1, stored0))
911 911
912 912 def testbadnoderead(self):
913 913 f = self._makefilefn()
914 914
915 915 fulltext0 = b'foo\n' * 30
916 916 fulltext1 = fulltext0 + b'bar\n'
917 917
918 918 with self._maketransactionfn() as tr:
919 919 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
920 920 node1 = b'\xaa' * 20
921 921
922 922 self._addrawrevisionfn(
923 923 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
924 924 )
925 925
926 926 self.assertEqual(len(f), 2)
927 927 self.assertEqual(f.parents(node1), (node0, nullid))
928 928
929 929 # revision() raises since it performs hash verification.
930 930 with self.assertRaises(error.StorageError):
931 931 f.revision(node1)
932 932
933 933 # rawdata() still verifies because there are no special storage
934 934 # settings.
935 935 with self.assertRaises(error.StorageError):
936 936 f.rawdata(node1)
937 937
938 938 # read() behaves like revision().
939 939 with self.assertRaises(error.StorageError):
940 940 f.read(node1)
941 941
942 942 # We can't test renamed() here because some backends may not require
943 943 # reading/validating the fulltext to return rename metadata.
944 944
945 945 def testbadnoderevisionraw(self):
946 946 # Like above except we test rawdata() first to isolate
947 947 # revision caching behavior.
948 948 f = self._makefilefn()
949 949
950 950 fulltext0 = b'foo\n' * 30
951 951 fulltext1 = fulltext0 + b'bar\n'
952 952
953 953 with self._maketransactionfn() as tr:
954 954 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
955 955 node1 = b'\xaa' * 20
956 956
957 957 self._addrawrevisionfn(
958 958 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
959 959 )
960 960
961 961 with self.assertRaises(error.StorageError):
962 962 f.rawdata(node1)
963 963
964 964 with self.assertRaises(error.StorageError):
965 965 f.rawdata(node1)
966 966
967 967 def testbadnoderevision(self):
968 968 # Like above except we test read() first to isolate revision caching
969 969 # behavior.
970 970 f = self._makefilefn()
971 971
972 972 fulltext0 = b'foo\n' * 30
973 973 fulltext1 = fulltext0 + b'bar\n'
974 974
975 975 with self._maketransactionfn() as tr:
976 976 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
977 977 node1 = b'\xaa' * 20
978 978
979 979 self._addrawrevisionfn(
980 980 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
981 981 )
982 982
983 983 with self.assertRaises(error.StorageError):
984 984 f.read(node1)
985 985
986 986 with self.assertRaises(error.StorageError):
987 987 f.read(node1)
988 988
989 989 def testbadnodedelta(self):
990 990 f = self._makefilefn()
991 991
992 992 fulltext0 = b'foo\n' * 31
993 993 fulltext1 = fulltext0 + b'bar\n'
994 994 fulltext2 = fulltext1 + b'baz\n'
995 995
996 996 with self._maketransactionfn() as tr:
997 997 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
998 998 node1 = b'\xaa' * 20
999 999
1000 1000 self._addrawrevisionfn(
1001 1001 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
1002 1002 )
1003 1003
1004 1004 with self.assertRaises(error.StorageError):
1005 1005 f.read(node1)
1006 1006
1007 1007 node2 = storageutil.hashrevisionsha1(fulltext2, node1, nullid)
1008 1008
1009 1009 with self._maketransactionfn() as tr:
1010 1010 delta = mdiff.textdiff(fulltext1, fulltext2)
1011 1011 self._addrawrevisionfn(
1012 1012 f, tr, node2, node1, nullid, 2, delta=(1, delta)
1013 1013 )
1014 1014
1015 1015 self.assertEqual(len(f), 3)
1016 1016
1017 1017 # Assuming a delta is stored, we shouldn't need to validate node1 in
1018 1018 # order to retrieve node2.
1019 1019 self.assertEqual(f.read(node2), fulltext2)
1020 1020
1021 1021 def testcensored(self):
1022 1022 f = self._makefilefn()
1023 1023
1024 1024 stored1 = storageutil.packmeta(
1025 1025 {
1026 1026 b'censored': b'tombstone',
1027 1027 },
1028 1028 b'',
1029 1029 )
1030 1030
1031 1031 with self._maketransactionfn() as tr:
1032 1032 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1033 1033
1034 1034 # The node value doesn't matter since we can't verify it.
1035 1035 node1 = b'\xbb' * 20
1036 1036
1037 1037 self._addrawrevisionfn(
1038 1038 f, tr, node1, node0, nullid, 1, stored1, censored=True
1039 1039 )
1040 1040
1041 1041 self.assertTrue(f.iscensored(1))
1042 1042
1043 1043 with self.assertRaises(error.CensoredNodeError):
1044 1044 f.revision(1)
1045 1045
1046 1046 with self.assertRaises(error.CensoredNodeError):
1047 1047 f.rawdata(1)
1048 1048
1049 1049 with self.assertRaises(error.CensoredNodeError):
1050 1050 f.read(1)
1051 1051
1052 1052 def testcensoredrawrevision(self):
1053 1053 # Like above, except we do the rawdata() request first to
1054 1054 # isolate revision caching behavior.
1055 1055
1056 1056 f = self._makefilefn()
1057 1057
1058 1058 stored1 = storageutil.packmeta(
1059 1059 {
1060 1060 b'censored': b'tombstone',
1061 1061 },
1062 1062 b'',
1063 1063 )
1064 1064
1065 1065 with self._maketransactionfn() as tr:
1066 1066 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1067 1067
1068 1068 # The node value doesn't matter since we can't verify it.
1069 1069 node1 = b'\xbb' * 20
1070 1070
1071 1071 self._addrawrevisionfn(
1072 1072 f, tr, node1, node0, nullid, 1, stored1, censored=True
1073 1073 )
1074 1074
1075 1075 with self.assertRaises(error.CensoredNodeError):
1076 1076 f.rawdata(1)
1077 1077
1078 1078
1079 1079 class ifilemutationtests(basetestcase):
1080 1080 """Generic tests for the ifilemutation interface.
1081 1081
1082 1082 All file storage backends that support writing should conform to this
1083 1083 interface.
1084 1084
1085 1085 Use ``makeifilemutationtests()`` to create an instance of this type.
1086 1086 """
1087 1087
1088 1088 def testaddnoop(self):
1089 1089 f = self._makefilefn()
1090 1090 with self._maketransactionfn() as tr:
1091 1091 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1092 1092 node1 = f.add(b'foo', None, tr, 0, nullid, nullid)
1093 1093 # Varying by linkrev shouldn't impact hash.
1094 1094 node2 = f.add(b'foo', None, tr, 1, nullid, nullid)
1095 1095
1096 1096 self.assertEqual(node1, node0)
1097 1097 self.assertEqual(node2, node0)
1098 1098 self.assertEqual(len(f), 1)
1099 1099
1100 1100 def testaddrevisionbadnode(self):
1101 1101 f = self._makefilefn()
1102 1102 with self._maketransactionfn() as tr:
1103 1103 # Adding a revision with bad node value fails.
1104 1104 with self.assertRaises(error.StorageError):
1105 1105 f.addrevision(b'foo', tr, 0, nullid, nullid, node=b'\x01' * 20)
1106 1106
1107 1107 def testaddrevisionunknownflag(self):
1108 1108 f = self._makefilefn()
1109 1109 with self._maketransactionfn() as tr:
1110 1110 for i in range(15, 0, -1):
1111 1111 if (1 << i) & ~repository.REVISION_FLAGS_KNOWN:
1112 1112 flags = 1 << i
1113 1113 break
1114 1114
1115 1115 with self.assertRaises(error.StorageError):
1116 1116 f.addrevision(b'foo', tr, 0, nullid, nullid, flags=flags)
1117 1117
1118 1118 def testaddgroupsimple(self):
1119 1119 f = self._makefilefn()
1120 1120
1121 1121 callbackargs = []
1122 1122
1123 1123 def cb(*args, **kwargs):
1124 1124 callbackargs.append((args, kwargs))
1125 1125
1126 1126 def linkmapper(node):
1127 1127 return 0
1128 1128
1129 1129 with self._maketransactionfn() as tr:
1130 1130 nodes = []
1131 1131
1132 def onchangeset(cl, node):
1132 def onchangeset(cl, rev):
1133 node = cl.node(rev)
1133 1134 nodes.append(node)
1134 1135 cb(cl, node)
1135 1136
1136 def ondupchangeset(cl, node):
1137 nodes.append(node)
1137 def ondupchangeset(cl, rev):
1138 nodes.append(cl.node(rev))
1138 1139
1139 1140 f.addgroup(
1140 1141 [],
1141 1142 None,
1142 1143 tr,
1143 1144 addrevisioncb=onchangeset,
1144 1145 duplicaterevisioncb=ondupchangeset,
1145 1146 )
1146 1147
1147 1148 self.assertEqual(nodes, [])
1148 1149 self.assertEqual(callbackargs, [])
1149 1150 self.assertEqual(len(f), 0)
1150 1151
1151 1152 fulltext0 = b'foo'
1152 1153 delta0 = mdiff.trivialdiffheader(len(fulltext0)) + fulltext0
1153 1154
1154 1155 with self._maketransactionfn() as tr:
1155 1156 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
1156 1157
1157 1158 f = self._makefilefn()
1158 1159
1159 1160 deltas = [
1160 1161 (node0, nullid, nullid, nullid, nullid, delta0, 0),
1161 1162 ]
1162 1163
1163 1164 with self._maketransactionfn() as tr:
1164 1165 nodes = []
1165 1166
1166 def onchangeset(cl, node):
1167 def onchangeset(cl, rev):
1168 node = cl.node(rev)
1167 1169 nodes.append(node)
1168 1170 cb(cl, node)
1169 1171
1170 def ondupchangeset(cl, node):
1171 nodes.append(node)
1172 def ondupchangeset(cl, rev):
1173 nodes.append(cl.node(rev))
1172 1174
1173 1175 f.addgroup(
1174 1176 deltas,
1175 1177 linkmapper,
1176 1178 tr,
1177 1179 addrevisioncb=onchangeset,
1178 1180 duplicaterevisioncb=ondupchangeset,
1179 1181 )
1180 1182
1181 1183 self.assertEqual(
1182 1184 nodes,
1183 1185 [
1184 1186 b'\x49\xd8\xcb\xb1\x5c\xe2\x57\x92\x04\x47'
1185 1187 b'\x00\x6b\x46\x97\x8b\x7a\xf9\x80\xa9\x79'
1186 1188 ],
1187 1189 )
1188 1190
1189 1191 self.assertEqual(len(callbackargs), 1)
1190 1192 self.assertEqual(callbackargs[0][0][1], nodes[0])
1191 1193
1192 1194 self.assertEqual(list(f.revs()), [0])
1193 1195 self.assertEqual(f.rev(nodes[0]), 0)
1194 1196 self.assertEqual(f.node(0), nodes[0])
1195 1197
1196 1198 def testaddgroupmultiple(self):
1197 1199 f = self._makefilefn()
1198 1200
1199 1201 fulltexts = [
1200 1202 b'foo',
1201 1203 b'bar',
1202 1204 b'x' * 1024,
1203 1205 ]
1204 1206
1205 1207 nodes = []
1206 1208 with self._maketransactionfn() as tr:
1207 1209 for fulltext in fulltexts:
1208 1210 nodes.append(f.add(fulltext, None, tr, 0, nullid, nullid))
1209 1211
1210 1212 f = self._makefilefn()
1211 1213 deltas = []
1212 1214 for i, fulltext in enumerate(fulltexts):
1213 1215 delta = mdiff.trivialdiffheader(len(fulltext)) + fulltext
1214 1216
1215 1217 deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
1216 1218
1217 1219 with self._maketransactionfn() as tr:
1218 1220 newnodes = []
1219 1221
1220 def onchangeset(cl, node):
1221 newnodes.append(node)
1222 def onchangeset(cl, rev):
1223 newnodes.append(cl.node(rev))
1222 1224
1223 1225 f.addgroup(
1224 1226 deltas,
1225 1227 lambda x: 0,
1226 1228 tr,
1227 1229 addrevisioncb=onchangeset,
1228 1230 duplicaterevisioncb=onchangeset,
1229 1231 )
1230 1232 self.assertEqual(newnodes, nodes)
1231 1233
1232 1234 self.assertEqual(len(f), len(deltas))
1233 1235 self.assertEqual(list(f.revs()), [0, 1, 2])
1234 1236 self.assertEqual(f.rev(nodes[0]), 0)
1235 1237 self.assertEqual(f.rev(nodes[1]), 1)
1236 1238 self.assertEqual(f.rev(nodes[2]), 2)
1237 1239 self.assertEqual(f.node(0), nodes[0])
1238 1240 self.assertEqual(f.node(1), nodes[1])
1239 1241 self.assertEqual(f.node(2), nodes[2])
1240 1242
1241 1243 def testdeltaagainstcensored(self):
1242 1244 # Attempt to apply a delta made against a censored revision.
1243 1245 f = self._makefilefn()
1244 1246
1245 1247 stored1 = storageutil.packmeta(
1246 1248 {
1247 1249 b'censored': b'tombstone',
1248 1250 },
1249 1251 b'',
1250 1252 )
1251 1253
1252 1254 with self._maketransactionfn() as tr:
1253 1255 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1254 1256
1255 1257 # The node value doesn't matter since we can't verify it.
1256 1258 node1 = b'\xbb' * 20
1257 1259
1258 1260 self._addrawrevisionfn(
1259 1261 f, tr, node1, node0, nullid, 1, stored1, censored=True
1260 1262 )
1261 1263
1262 1264 delta = mdiff.textdiff(b'bar\n' * 30, (b'bar\n' * 30) + b'baz\n')
1263 1265 deltas = [(b'\xcc' * 20, node1, nullid, b'\x01' * 20, node1, delta, 0)]
1264 1266
1265 1267 with self._maketransactionfn() as tr:
1266 1268 with self.assertRaises(error.CensoredBaseError):
1267 1269 f.addgroup(deltas, lambda x: 0, tr)
1268 1270
1269 1271 def testcensorrevisionbasic(self):
1270 1272 f = self._makefilefn()
1271 1273
1272 1274 with self._maketransactionfn() as tr:
1273 1275 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1274 1276 node1 = f.add(b'foo\n' * 31, None, tr, 1, node0, nullid)
1275 1277 node2 = f.add(b'foo\n' * 32, None, tr, 2, node1, nullid)
1276 1278
1277 1279 with self._maketransactionfn() as tr:
1278 1280 f.censorrevision(tr, node1)
1279 1281
1280 1282 self.assertEqual(len(f), 3)
1281 1283 self.assertEqual(list(f.revs()), [0, 1, 2])
1282 1284
1283 1285 self.assertEqual(f.read(node0), b'foo\n' * 30)
1284 1286 self.assertEqual(f.read(node2), b'foo\n' * 32)
1285 1287
1286 1288 with self.assertRaises(error.CensoredNodeError):
1287 1289 f.read(node1)
1288 1290
1289 1291 def testgetstrippointnoparents(self):
1290 1292 # N revisions where none have parents.
1291 1293 f = self._makefilefn()
1292 1294
1293 1295 with self._maketransactionfn() as tr:
1294 1296 for rev in range(10):
1295 1297 f.add(b'%d' % rev, None, tr, rev, nullid, nullid)
1296 1298
1297 1299 for rev in range(10):
1298 1300 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1299 1301
1300 1302 def testgetstrippointlinear(self):
1301 1303 # N revisions in a linear chain.
1302 1304 f = self._makefilefn()
1303 1305
1304 1306 with self._maketransactionfn() as tr:
1305 1307 p1 = nullid
1306 1308
1307 1309 for rev in range(10):
1308 1310 f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1309 1311
1310 1312 for rev in range(10):
1311 1313 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1312 1314
1313 1315 def testgetstrippointmultipleheads(self):
1314 1316 f = self._makefilefn()
1315 1317
1316 1318 with self._maketransactionfn() as tr:
1317 1319 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1318 1320 node1 = f.add(b'1', None, tr, 1, node0, nullid)
1319 1321 f.add(b'2', None, tr, 2, node1, nullid)
1320 1322 f.add(b'3', None, tr, 3, node0, nullid)
1321 1323 f.add(b'4', None, tr, 4, node0, nullid)
1322 1324
1323 1325 for rev in range(5):
1324 1326 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1325 1327
1326 1328 def testgetstrippointearlierlinkrevs(self):
1327 1329 f = self._makefilefn()
1328 1330
1329 1331 with self._maketransactionfn() as tr:
1330 1332 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1331 1333 f.add(b'1', None, tr, 10, node0, nullid)
1332 1334 f.add(b'2', None, tr, 5, node0, nullid)
1333 1335
1334 1336 self.assertEqual(f.getstrippoint(0), (0, set()))
1335 1337 self.assertEqual(f.getstrippoint(1), (1, set()))
1336 1338 self.assertEqual(f.getstrippoint(2), (1, set()))
1337 1339 self.assertEqual(f.getstrippoint(3), (1, set()))
1338 1340 self.assertEqual(f.getstrippoint(4), (1, set()))
1339 1341 self.assertEqual(f.getstrippoint(5), (1, set()))
1340 1342 self.assertEqual(f.getstrippoint(6), (1, {2}))
1341 1343 self.assertEqual(f.getstrippoint(7), (1, {2}))
1342 1344 self.assertEqual(f.getstrippoint(8), (1, {2}))
1343 1345 self.assertEqual(f.getstrippoint(9), (1, {2}))
1344 1346 self.assertEqual(f.getstrippoint(10), (1, {2}))
1345 1347 self.assertEqual(f.getstrippoint(11), (3, set()))
1346 1348
1347 1349 def teststripempty(self):
1348 1350 f = self._makefilefn()
1349 1351
1350 1352 with self._maketransactionfn() as tr:
1351 1353 f.strip(0, tr)
1352 1354
1353 1355 self.assertEqual(len(f), 0)
1354 1356
1355 1357 def teststripall(self):
1356 1358 f = self._makefilefn()
1357 1359
1358 1360 with self._maketransactionfn() as tr:
1359 1361 p1 = nullid
1360 1362 for rev in range(10):
1361 1363 p1 = f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1362 1364
1363 1365 self.assertEqual(len(f), 10)
1364 1366
1365 1367 with self._maketransactionfn() as tr:
1366 1368 f.strip(0, tr)
1367 1369
1368 1370 self.assertEqual(len(f), 0)
1369 1371
1370 1372 def teststrippartial(self):
1371 1373 f = self._makefilefn()
1372 1374
1373 1375 with self._maketransactionfn() as tr:
1374 1376 f.add(b'0', None, tr, 0, nullid, nullid)
1375 1377 node1 = f.add(b'1', None, tr, 5, nullid, nullid)
1376 1378 node2 = f.add(b'2', None, tr, 10, nullid, nullid)
1377 1379
1378 1380 self.assertEqual(len(f), 3)
1379 1381
1380 1382 with self._maketransactionfn() as tr:
1381 1383 f.strip(11, tr)
1382 1384
1383 1385 self.assertEqual(len(f), 3)
1384 1386
1385 1387 with self._maketransactionfn() as tr:
1386 1388 f.strip(10, tr)
1387 1389
1388 1390 self.assertEqual(len(f), 2)
1389 1391
1390 1392 with self.assertRaises(error.LookupError):
1391 1393 f.rev(node2)
1392 1394
1393 1395 with self._maketransactionfn() as tr:
1394 1396 f.strip(6, tr)
1395 1397
1396 1398 self.assertEqual(len(f), 2)
1397 1399
1398 1400 with self._maketransactionfn() as tr:
1399 1401 f.strip(3, tr)
1400 1402
1401 1403 self.assertEqual(len(f), 1)
1402 1404
1403 1405 with self.assertRaises(error.LookupError):
1404 1406 f.rev(node1)
1405 1407
1406 1408
1407 1409 def makeifileindextests(makefilefn, maketransactionfn, addrawrevisionfn):
1408 1410 """Create a unittest.TestCase class suitable for testing file storage.
1409 1411
1410 1412 ``makefilefn`` is a callable which receives the test case as an
1411 1413 argument and returns an object implementing the ``ifilestorage`` interface.
1412 1414
1413 1415 ``maketransactionfn`` is a callable which receives the test case as an
1414 1416 argument and returns a transaction object.
1415 1417
1416 1418 ``addrawrevisionfn`` is a callable which receives arguments describing a
1417 1419 low-level revision to add. This callable allows the insertion of
1418 1420 potentially bad data into the store in order to facilitate testing.
1419 1421
1420 1422 Returns a type that is a ``unittest.TestCase`` that can be used for
1421 1423 testing the object implementing the file storage interface. Simply
1422 1424 assign the returned value to a module-level attribute and a test loader
1423 1425 should find and run it automatically.
1424 1426 """
1425 1427 d = {
1426 1428 '_makefilefn': makefilefn,
1427 1429 '_maketransactionfn': maketransactionfn,
1428 1430 '_addrawrevisionfn': addrawrevisionfn,
1429 1431 }
1430 1432 return type('ifileindextests', (ifileindextests,), d)
1431 1433
1432 1434
1433 1435 def makeifiledatatests(makefilefn, maketransactionfn, addrawrevisionfn):
1434 1436 d = {
1435 1437 '_makefilefn': makefilefn,
1436 1438 '_maketransactionfn': maketransactionfn,
1437 1439 '_addrawrevisionfn': addrawrevisionfn,
1438 1440 }
1439 1441 return type('ifiledatatests', (ifiledatatests,), d)
1440 1442
1441 1443
1442 1444 def makeifilemutationtests(makefilefn, maketransactionfn, addrawrevisionfn):
1443 1445 d = {
1444 1446 '_makefilefn': makefilefn,
1445 1447 '_maketransactionfn': maketransactionfn,
1446 1448 '_addrawrevisionfn': addrawrevisionfn,
1447 1449 }
1448 1450 return type('ifilemutationtests', (ifilemutationtests,), d)
@@ -1,738 +1,738 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import attr
25 25 from mercurial import (
26 26 ancestor,
27 27 bundlerepo,
28 28 error,
29 29 extensions,
30 30 localrepo,
31 31 mdiff,
32 32 pycompat,
33 33 revlog,
34 34 store,
35 35 verify,
36 36 )
37 37 from mercurial.interfaces import (
38 38 repository,
39 39 util as interfaceutil,
40 40 )
41 41 from mercurial.utils import (
42 42 cborutil,
43 43 storageutil,
44 44 )
45 45 from mercurial.revlogutils import flagutil
46 46
47 47 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
48 48 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
49 49 # be specifying the version(s) of Mercurial they are tested with, or
50 50 # leave the attribute unspecified.
51 51 testedwith = b'ships-with-hg-core'
52 52
53 53 REQUIREMENT = b'testonly-simplestore'
54 54
55 55
56 56 def validatenode(node):
57 57 if isinstance(node, int):
58 58 raise ValueError('expected node; got int')
59 59
60 60 if len(node) != 20:
61 61 raise ValueError('expected 20 byte node')
62 62
63 63
64 64 def validaterev(rev):
65 65 if not isinstance(rev, int):
66 66 raise ValueError('expected int')
67 67
68 68
69 69 class simplestoreerror(error.StorageError):
70 70 pass
71 71
72 72
73 73 @interfaceutil.implementer(repository.irevisiondelta)
74 74 @attr.s(slots=True)
75 75 class simplestorerevisiondelta(object):
76 76 node = attr.ib()
77 77 p1node = attr.ib()
78 78 p2node = attr.ib()
79 79 basenode = attr.ib()
80 80 flags = attr.ib()
81 81 baserevisionsize = attr.ib()
82 82 revision = attr.ib()
83 83 delta = attr.ib()
84 84 linknode = attr.ib(default=None)
85 85
86 86
87 87 @interfaceutil.implementer(repository.iverifyproblem)
88 88 @attr.s(frozen=True)
89 89 class simplefilestoreproblem(object):
90 90 warning = attr.ib(default=None)
91 91 error = attr.ib(default=None)
92 92 node = attr.ib(default=None)
93 93
94 94
95 95 @interfaceutil.implementer(repository.ifilestorage)
96 96 class filestorage(object):
97 97 """Implements storage for a tracked path.
98 98
99 99 Data is stored in the VFS in a directory corresponding to the tracked
100 100 path.
101 101
102 102 Index data is stored in an ``index`` file using CBOR.
103 103
104 104 Fulltext data is stored in files having names of the node.
105 105 """
106 106
107 107 _flagserrorclass = simplestoreerror
108 108
109 109 def __init__(self, svfs, path):
110 110 self._svfs = svfs
111 111 self._path = path
112 112
113 113 self._storepath = b'/'.join([b'data', path])
114 114 self._indexpath = b'/'.join([self._storepath, b'index'])
115 115
116 116 indexdata = self._svfs.tryread(self._indexpath)
117 117 if indexdata:
118 118 indexdata = cborutil.decodeall(indexdata)
119 119
120 120 self._indexdata = indexdata or []
121 121 self._indexbynode = {}
122 122 self._indexbyrev = {}
123 123 self._index = []
124 124 self._refreshindex()
125 125
126 126 self._flagprocessors = dict(flagutil.flagprocessors)
127 127
128 128 def _refreshindex(self):
129 129 self._indexbynode.clear()
130 130 self._indexbyrev.clear()
131 131 self._index = []
132 132
133 133 for i, entry in enumerate(self._indexdata):
134 134 self._indexbynode[entry[b'node']] = entry
135 135 self._indexbyrev[i] = entry
136 136
137 137 self._indexbynode[nullid] = {
138 138 b'node': nullid,
139 139 b'p1': nullid,
140 140 b'p2': nullid,
141 141 b'linkrev': nullrev,
142 142 b'flags': 0,
143 143 }
144 144
145 145 self._indexbyrev[nullrev] = {
146 146 b'node': nullid,
147 147 b'p1': nullid,
148 148 b'p2': nullid,
149 149 b'linkrev': nullrev,
150 150 b'flags': 0,
151 151 }
152 152
153 153 for i, entry in enumerate(self._indexdata):
154 154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155 155
156 156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 157 self._index.append(
158 158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 159 )
160 160
161 161 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
162 162
163 163 def __len__(self):
164 164 return len(self._indexdata)
165 165
166 166 def __iter__(self):
167 167 return iter(range(len(self)))
168 168
169 169 def revs(self, start=0, stop=None):
170 170 step = 1
171 171 if stop is not None:
172 172 if start > stop:
173 173 step = -1
174 174
175 175 stop += step
176 176 else:
177 177 stop = len(self)
178 178
179 179 return range(start, stop, step)
180 180
181 181 def parents(self, node):
182 182 validatenode(node)
183 183
184 184 if node not in self._indexbynode:
185 185 raise KeyError('unknown node')
186 186
187 187 entry = self._indexbynode[node]
188 188
189 189 return entry[b'p1'], entry[b'p2']
190 190
191 191 def parentrevs(self, rev):
192 192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 193 return self.rev(p1), self.rev(p2)
194 194
195 195 def rev(self, node):
196 196 validatenode(node)
197 197
198 198 try:
199 199 self._indexbynode[node]
200 200 except KeyError:
201 201 raise error.LookupError(node, self._indexpath, _('no node'))
202 202
203 203 for rev, entry in self._indexbyrev.items():
204 204 if entry[b'node'] == node:
205 205 return rev
206 206
207 207 raise error.ProgrammingError(b'this should not occur')
208 208
209 209 def node(self, rev):
210 210 validaterev(rev)
211 211
212 212 return self._indexbyrev[rev][b'node']
213 213
214 214 def hasnode(self, node):
215 215 validatenode(node)
216 216 return node in self._indexbynode
217 217
218 218 def censorrevision(self, tr, censornode, tombstone=b''):
219 219 raise NotImplementedError('TODO')
220 220
221 221 def lookup(self, node):
222 222 if isinstance(node, int):
223 223 return self.node(node)
224 224
225 225 if len(node) == 20:
226 226 self.rev(node)
227 227 return node
228 228
229 229 try:
230 230 rev = int(node)
231 231 if '%d' % rev != node:
232 232 raise ValueError
233 233
234 234 if rev < 0:
235 235 rev = len(self) + rev
236 236 if rev < 0 or rev >= len(self):
237 237 raise ValueError
238 238
239 239 return self.node(rev)
240 240 except (ValueError, OverflowError):
241 241 pass
242 242
243 243 if len(node) == 40:
244 244 try:
245 245 rawnode = bin(node)
246 246 self.rev(rawnode)
247 247 return rawnode
248 248 except TypeError:
249 249 pass
250 250
251 251 raise error.LookupError(node, self._path, _('invalid lookup input'))
252 252
253 253 def linkrev(self, rev):
254 254 validaterev(rev)
255 255
256 256 return self._indexbyrev[rev][b'linkrev']
257 257
258 258 def _flags(self, rev):
259 259 validaterev(rev)
260 260
261 261 return self._indexbyrev[rev][b'flags']
262 262
263 263 def _candelta(self, baserev, rev):
264 264 validaterev(baserev)
265 265 validaterev(rev)
266 266
267 267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 269 ):
270 270 return False
271 271
272 272 return True
273 273
274 274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 275 if p1 is None and p2 is None:
276 276 p1, p2 = self.parents(node)
277 277 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 278 raise simplestoreerror(
279 279 _("integrity check failed on %s") % self._path
280 280 )
281 281
282 282 def revision(self, nodeorrev, raw=False):
283 283 if isinstance(nodeorrev, int):
284 284 node = self.node(nodeorrev)
285 285 else:
286 286 node = nodeorrev
287 287 validatenode(node)
288 288
289 289 if node == nullid:
290 290 return b''
291 291
292 292 rev = self.rev(node)
293 293 flags = self._flags(rev)
294 294
295 295 path = b'/'.join([self._storepath, hex(node)])
296 296 rawtext = self._svfs.read(path)
297 297
298 298 if raw:
299 299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 300 text = rawtext
301 301 else:
302 302 r = flagutil.processflagsread(self, rawtext, flags)
303 303 text, validatehash, sidedata = r
304 304 if validatehash:
305 305 self.checkhash(text, node, rev=rev)
306 306
307 307 return text
308 308
309 309 def rawdata(self, nodeorrev):
310 310 return self.revision(raw=True)
311 311
312 312 def read(self, node):
313 313 validatenode(node)
314 314
315 315 revision = self.revision(node)
316 316
317 317 if not revision.startswith(b'\1\n'):
318 318 return revision
319 319
320 320 start = revision.index(b'\1\n', 2)
321 321 return revision[start + 2 :]
322 322
323 323 def renamed(self, node):
324 324 validatenode(node)
325 325
326 326 if self.parents(node)[0] != nullid:
327 327 return False
328 328
329 329 fulltext = self.revision(node)
330 330 m = storageutil.parsemeta(fulltext)[0]
331 331
332 332 if m and 'copy' in m:
333 333 return m['copy'], bin(m['copyrev'])
334 334
335 335 return False
336 336
337 337 def cmp(self, node, text):
338 338 validatenode(node)
339 339
340 340 t = text
341 341
342 342 if text.startswith(b'\1\n'):
343 343 t = b'\1\n\1\n' + text
344 344
345 345 p1, p2 = self.parents(node)
346 346
347 347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 348 return False
349 349
350 350 if self.iscensored(self.rev(node)):
351 351 return text != b''
352 352
353 353 if self.renamed(node):
354 354 t2 = self.read(node)
355 355 return t2 != text
356 356
357 357 return True
358 358
359 359 def size(self, rev):
360 360 validaterev(rev)
361 361
362 362 node = self._indexbyrev[rev][b'node']
363 363
364 364 if self.renamed(node):
365 365 return len(self.read(node))
366 366
367 367 if self.iscensored(rev):
368 368 return 0
369 369
370 370 return len(self.revision(node))
371 371
372 372 def iscensored(self, rev):
373 373 validaterev(rev)
374 374
375 375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376 376
377 377 def commonancestorsheads(self, a, b):
378 378 validatenode(a)
379 379 validatenode(b)
380 380
381 381 a = self.rev(a)
382 382 b = self.rev(b)
383 383
384 384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 385 return pycompat.maplist(self.node, ancestors)
386 386
387 387 def descendants(self, revs):
388 388 # This is a copy of revlog.descendants()
389 389 first = min(revs)
390 390 if first == nullrev:
391 391 for i in self:
392 392 yield i
393 393 return
394 394
395 395 seen = set(revs)
396 396 for i in self.revs(start=first + 1):
397 397 for x in self.parentrevs(i):
398 398 if x != nullrev and x in seen:
399 399 seen.add(i)
400 400 yield i
401 401 break
402 402
403 403 # Required by verify.
404 404 def files(self):
405 405 entries = self._svfs.listdir(self._storepath)
406 406
407 407 # Strip out undo.backup.* files created as part of transaction
408 408 # recording.
409 409 entries = [f for f in entries if not f.startswith('undo.backup.')]
410 410
411 411 return [b'/'.join((self._storepath, f)) for f in entries]
412 412
413 413 def storageinfo(
414 414 self,
415 415 exclusivefiles=False,
416 416 sharedfiles=False,
417 417 revisionscount=False,
418 418 trackedsize=False,
419 419 storedsize=False,
420 420 ):
421 421 # TODO do a real implementation of this
422 422 return {
423 423 'exclusivefiles': [],
424 424 'sharedfiles': [],
425 425 'revisionscount': len(self),
426 426 'trackedsize': 0,
427 427 'storedsize': None,
428 428 }
429 429
430 430 def verifyintegrity(self, state):
431 431 state['skipread'] = set()
432 432 for rev in self:
433 433 node = self.node(rev)
434 434 try:
435 435 self.revision(node)
436 436 except Exception as e:
437 437 yield simplefilestoreproblem(
438 438 error='unpacking %s: %s' % (node, e), node=node
439 439 )
440 440 state['skipread'].add(node)
441 441
442 442 def emitrevisions(
443 443 self,
444 444 nodes,
445 445 nodesorder=None,
446 446 revisiondata=False,
447 447 assumehaveparentrevisions=False,
448 448 deltamode=repository.CG_DELTAMODE_STD,
449 449 ):
450 450 # TODO this will probably break on some ordering options.
451 451 nodes = [n for n in nodes if n != nullid]
452 452 if not nodes:
453 453 return
454 454 for delta in storageutil.emitrevisions(
455 455 self,
456 456 nodes,
457 457 nodesorder,
458 458 simplestorerevisiondelta,
459 459 revisiondata=revisiondata,
460 460 assumehaveparentrevisions=assumehaveparentrevisions,
461 461 deltamode=deltamode,
462 462 ):
463 463 yield delta
464 464
465 465 def add(self, text, meta, transaction, linkrev, p1, p2):
466 466 if meta or text.startswith(b'\1\n'):
467 467 text = storageutil.packmeta(meta, text)
468 468
469 469 return self.addrevision(text, transaction, linkrev, p1, p2)
470 470
471 471 def addrevision(
472 472 self,
473 473 text,
474 474 transaction,
475 475 linkrev,
476 476 p1,
477 477 p2,
478 478 node=None,
479 479 flags=revlog.REVIDX_DEFAULT_FLAGS,
480 480 cachedelta=None,
481 481 ):
482 482 validatenode(p1)
483 483 validatenode(p2)
484 484
485 485 if flags:
486 486 node = node or storageutil.hashrevisionsha1(text, p1, p2)
487 487
488 488 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
489 489
490 490 node = node or storageutil.hashrevisionsha1(text, p1, p2)
491 491
492 492 if node in self._indexbynode:
493 493 return node
494 494
495 495 if validatehash:
496 496 self.checkhash(rawtext, node, p1=p1, p2=p2)
497 497
498 498 return self._addrawrevision(
499 499 node, rawtext, transaction, linkrev, p1, p2, flags
500 500 )
501 501
502 502 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
503 503 transaction.addbackup(self._indexpath)
504 504
505 505 path = b'/'.join([self._storepath, hex(node)])
506 506
507 507 self._svfs.write(path, rawtext)
508 508
509 509 self._indexdata.append(
510 510 {
511 511 b'node': node,
512 512 b'p1': p1,
513 513 b'p2': p2,
514 514 b'linkrev': link,
515 515 b'flags': flags,
516 516 }
517 517 )
518 518
519 519 self._reflectindexupdate()
520 520
521 521 return node
522 522
523 523 def _reflectindexupdate(self):
524 524 self._refreshindex()
525 525 self._svfs.write(
526 526 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
527 527 )
528 528
529 529 def addgroup(
530 530 self,
531 531 deltas,
532 532 linkmapper,
533 533 transaction,
534 534 addrevisioncb=None,
535 535 duplicaterevisioncb=None,
536 536 maybemissingparents=False,
537 537 ):
538 538 if maybemissingparents:
539 539 raise error.Abort(
540 540 _('simple store does not support missing parents ' 'write mode')
541 541 )
542 542
543 543 empty = True
544 544
545 545 transaction.addbackup(self._indexpath)
546 546
547 547 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
548 548 linkrev = linkmapper(linknode)
549 549 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
550 550
551 551 if node in self._indexbynode:
552 552 if duplicaterevisioncb:
553 duplicaterevisioncb(self, node)
553 duplicaterevisioncb(self, self.rev(node))
554 554 empty = False
555 555 continue
556 556
557 557 # Need to resolve the fulltext from the delta base.
558 558 if deltabase == nullid:
559 559 text = mdiff.patch(b'', delta)
560 560 else:
561 561 text = mdiff.patch(self.revision(deltabase), delta)
562 562
563 self._addrawrevision(
563 rev = self._addrawrevision(
564 564 node, text, transaction, linkrev, p1, p2, flags
565 565 )
566 566
567 567 if addrevisioncb:
568 addrevisioncb(self, node)
568 addrevisioncb(self, rev)
569 569 empty = False
570 570 return not empty
571 571
572 572 def _headrevs(self):
573 573 # Assume all revisions are heads by default.
574 574 revishead = {rev: True for rev in self._indexbyrev}
575 575
576 576 for rev, entry in self._indexbyrev.items():
577 577 # Unset head flag for all seen parents.
578 578 revishead[self.rev(entry[b'p1'])] = False
579 579 revishead[self.rev(entry[b'p2'])] = False
580 580
581 581 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
582 582
583 583 def heads(self, start=None, stop=None):
584 584 # This is copied from revlog.py.
585 585 if start is None and stop is None:
586 586 if not len(self):
587 587 return [nullid]
588 588 return [self.node(r) for r in self._headrevs()]
589 589
590 590 if start is None:
591 591 start = nullid
592 592 if stop is None:
593 593 stop = []
594 594 stoprevs = {self.rev(n) for n in stop}
595 595 startrev = self.rev(start)
596 596 reachable = {startrev}
597 597 heads = {startrev}
598 598
599 599 parentrevs = self.parentrevs
600 600 for r in self.revs(start=startrev + 1):
601 601 for p in parentrevs(r):
602 602 if p in reachable:
603 603 if r not in stoprevs:
604 604 reachable.add(r)
605 605 heads.add(r)
606 606 if p in heads and p not in stoprevs:
607 607 heads.remove(p)
608 608
609 609 return [self.node(r) for r in heads]
610 610
611 611 def children(self, node):
612 612 validatenode(node)
613 613
614 614 # This is a copy of revlog.children().
615 615 c = []
616 616 p = self.rev(node)
617 617 for r in self.revs(start=p + 1):
618 618 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
619 619 if prevs:
620 620 for pr in prevs:
621 621 if pr == p:
622 622 c.append(self.node(r))
623 623 elif p == nullrev:
624 624 c.append(self.node(r))
625 625 return c
626 626
627 627 def getstrippoint(self, minlink):
628 628 return storageutil.resolvestripinfo(
629 629 minlink,
630 630 len(self) - 1,
631 631 self._headrevs(),
632 632 self.linkrev,
633 633 self.parentrevs,
634 634 )
635 635
636 636 def strip(self, minlink, transaction):
637 637 if not len(self):
638 638 return
639 639
640 640 rev, _ignored = self.getstrippoint(minlink)
641 641 if rev == len(self):
642 642 return
643 643
644 644 # Purge index data starting at the requested revision.
645 645 self._indexdata[rev:] = []
646 646 self._reflectindexupdate()
647 647
648 648
649 649 def issimplestorefile(f, kind, st):
650 650 if kind != stat.S_IFREG:
651 651 return False
652 652
653 653 if store.isrevlog(f, kind, st):
654 654 return False
655 655
656 656 # Ignore transaction undo files.
657 657 if f.startswith('undo.'):
658 658 return False
659 659
660 660 # Otherwise assume it belongs to the simple store.
661 661 return True
662 662
663 663
664 664 class simplestore(store.encodedstore):
665 665 def datafiles(self):
666 666 for x in super(simplestore, self).datafiles():
667 667 yield x
668 668
669 669 # Supplement with non-revlog files.
670 670 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
671 671
672 672 for unencoded, encoded, size in extrafiles:
673 673 try:
674 674 unencoded = store.decodefilename(unencoded)
675 675 except KeyError:
676 676 unencoded = None
677 677
678 678 yield unencoded, encoded, size
679 679
680 680
681 681 def reposetup(ui, repo):
682 682 if not repo.local():
683 683 return
684 684
685 685 if isinstance(repo, bundlerepo.bundlerepository):
686 686 raise error.Abort(_('cannot use simple store with bundlerepo'))
687 687
688 688 class simplestorerepo(repo.__class__):
689 689 def file(self, f):
690 690 return filestorage(self.svfs, f)
691 691
692 692 repo.__class__ = simplestorerepo
693 693
694 694
695 695 def featuresetup(ui, supported):
696 696 supported.add(REQUIREMENT)
697 697
698 698
699 699 def newreporequirements(orig, ui, createopts):
700 700 """Modifies default requirements for new repos to use the simple store."""
701 701 requirements = orig(ui, createopts)
702 702
703 703 # These requirements are only used to affect creation of the store
704 704 # object. We have our own store. So we can remove them.
705 705 # TODO do this once we feel like taking the test hit.
706 706 # if 'fncache' in requirements:
707 707 # requirements.remove('fncache')
708 708 # if 'dotencode' in requirements:
709 709 # requirements.remove('dotencode')
710 710
711 711 requirements.add(REQUIREMENT)
712 712
713 713 return requirements
714 714
715 715
716 716 def makestore(orig, requirements, path, vfstype):
717 717 if REQUIREMENT not in requirements:
718 718 return orig(requirements, path, vfstype)
719 719
720 720 return simplestore(path, vfstype)
721 721
722 722
723 723 def verifierinit(orig, self, *args, **kwargs):
724 724 orig(self, *args, **kwargs)
725 725
726 726 # We don't care that files in the store don't align with what is
727 727 # advertised. So suppress these warnings.
728 728 self.warnorphanstorefiles = False
729 729
730 730
731 731 def extsetup(ui):
732 732 localrepo.featuresetupfuncs.add(featuresetup)
733 733
734 734 extensions.wrapfunction(
735 735 localrepo, 'newreporequirements', newreporequirements
736 736 )
737 737 extensions.wrapfunction(localrepo, 'makestore', makestore)
738 738 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now