##// END OF EJS Templates
delta: add sidedata field to revision delta...
Raphaël Gomès -
r47446:e8c11a2c default
parent child Browse files
Show More
@@ -1,1310 +1,1315 b''
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 48 import sqlite3
49 49 import struct
50 50 import threading
51 51 import zlib
52 52
53 53 from mercurial.i18n import _
54 54 from mercurial.node import (
55 55 nullid,
56 56 nullrev,
57 57 short,
58 58 )
59 59 from mercurial.thirdparty import attr
60 60 from mercurial import (
61 61 ancestor,
62 62 dagop,
63 63 encoding,
64 64 error,
65 65 extensions,
66 66 localrepo,
67 67 mdiff,
68 68 pycompat,
69 69 registrar,
70 70 requirements,
71 71 util,
72 72 verify,
73 73 )
74 74 from mercurial.interfaces import (
75 75 repository,
76 76 util as interfaceutil,
77 77 )
78 78 from mercurial.utils import (
79 79 hashutil,
80 80 storageutil,
81 81 )
82 82
83 83 try:
84 84 from mercurial import zstd
85 85
86 86 zstd.__version__
87 87 except ImportError:
88 88 zstd = None
89 89
90 90 configtable = {}
91 91 configitem = registrar.configitem(configtable)
92 92
93 93 # experimental config: storage.sqlite.compression
94 94 configitem(
95 95 b'storage',
96 96 b'sqlite.compression',
97 97 default=b'zstd' if zstd else b'zlib',
98 98 experimental=True,
99 99 )
100 100
101 101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
102 102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
103 103 # be specifying the version(s) of Mercurial they are tested with, or
104 104 # leave the attribute unspecified.
105 105 testedwith = b'ships-with-hg-core'
106 106
107 107 REQUIREMENT = b'exp-sqlite-001'
108 108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
109 109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
110 110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
111 111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
112 112
113 113 CURRENT_SCHEMA_VERSION = 1
114 114
115 115 COMPRESSION_NONE = 1
116 116 COMPRESSION_ZSTD = 2
117 117 COMPRESSION_ZLIB = 3
118 118
119 119 FLAG_CENSORED = 1
120 120 FLAG_MISSING_P1 = 2
121 121 FLAG_MISSING_P2 = 4
122 122
123 123 CREATE_SCHEMA = [
124 124 # Deltas are stored as content-indexed blobs.
125 125 # compression column holds COMPRESSION_* constant for how the
126 126 # delta is encoded.
127 127 'CREATE TABLE delta ('
128 128 ' id INTEGER PRIMARY KEY, '
129 129 ' compression INTEGER NOT NULL, '
130 130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
131 131 ' delta BLOB NOT NULL '
132 132 ')',
133 133 # Tracked paths are denormalized to integers to avoid redundant
134 134 # storage of the path name.
135 135 'CREATE TABLE filepath ('
136 136 ' id INTEGER PRIMARY KEY, '
137 137 ' path BLOB NOT NULL '
138 138 ')',
139 139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
140 140 # We have a single table for all file revision data.
141 141 # Each file revision is uniquely described by a (path, rev) and
142 142 # (path, node).
143 143 #
144 144 # Revision data is stored as a pointer to the delta producing this
145 145 # revision and the file revision whose delta should be applied before
146 146 # that one. One can reconstruct the delta chain by recursively following
147 147 # the delta base revision pointers until one encounters NULL.
148 148 #
149 149 # flags column holds bitwise integer flags controlling storage options.
150 150 # These flags are defined by the FLAG_* constants.
151 151 'CREATE TABLE fileindex ('
152 152 ' id INTEGER PRIMARY KEY, '
153 153 ' pathid INTEGER REFERENCES filepath(id), '
154 154 ' revnum INTEGER NOT NULL, '
155 155 ' p1rev INTEGER NOT NULL, '
156 156 ' p2rev INTEGER NOT NULL, '
157 157 ' linkrev INTEGER NOT NULL, '
158 158 ' flags INTEGER NOT NULL, '
159 159 ' deltaid INTEGER REFERENCES delta(id), '
160 160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
161 161 ' node BLOB NOT NULL '
162 162 ')',
163 163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
164 164 ' ON fileindex (pathid, revnum)',
165 165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
166 166 # Provide a view over all file data for convenience.
167 167 'CREATE VIEW filedata AS '
168 168 'SELECT '
169 169 ' fileindex.id AS id, '
170 170 ' filepath.id AS pathid, '
171 171 ' filepath.path AS path, '
172 172 ' fileindex.revnum AS revnum, '
173 173 ' fileindex.node AS node, '
174 174 ' fileindex.p1rev AS p1rev, '
175 175 ' fileindex.p2rev AS p2rev, '
176 176 ' fileindex.linkrev AS linkrev, '
177 177 ' fileindex.flags AS flags, '
178 178 ' fileindex.deltaid AS deltaid, '
179 179 ' fileindex.deltabaseid AS deltabaseid '
180 180 'FROM filepath, fileindex '
181 181 'WHERE fileindex.pathid=filepath.id',
182 182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
183 183 ]
184 184
185 185
186 186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
187 187 """Resolve a delta chain for a file node."""
188 188
189 189 # TODO the "not in ({stops})" here is possibly slowing down the query
190 190 # because it needs to perform the lookup on every recursive invocation.
191 191 # This could possibly be faster if we created a temporary query with
192 192 # baseid "poisoned" to null and limited the recursive filter to
193 193 # "is not null".
194 194 res = db.execute(
195 195 'WITH RECURSIVE '
196 196 ' deltachain(deltaid, baseid) AS ('
197 197 ' SELECT deltaid, deltabaseid FROM fileindex '
198 198 ' WHERE pathid=? AND node=? '
199 199 ' UNION ALL '
200 200 ' SELECT fileindex.deltaid, deltabaseid '
201 201 ' FROM fileindex, deltachain '
202 202 ' WHERE '
203 203 ' fileindex.id=deltachain.baseid '
204 204 ' AND deltachain.baseid IS NOT NULL '
205 205 ' AND fileindex.id NOT IN ({stops}) '
206 206 ' ) '
207 207 'SELECT deltachain.baseid, compression, delta '
208 208 'FROM deltachain, delta '
209 209 'WHERE delta.id=deltachain.deltaid'.format(
210 210 stops=','.join(['?'] * len(stoprids))
211 211 ),
212 212 tuple([pathid, node] + list(stoprids.keys())),
213 213 )
214 214
215 215 deltas = []
216 216 lastdeltabaseid = None
217 217
218 218 for deltabaseid, compression, delta in res:
219 219 lastdeltabaseid = deltabaseid
220 220
221 221 if compression == COMPRESSION_ZSTD:
222 222 delta = zstddctx.decompress(delta)
223 223 elif compression == COMPRESSION_NONE:
224 224 delta = delta
225 225 elif compression == COMPRESSION_ZLIB:
226 226 delta = zlib.decompress(delta)
227 227 else:
228 228 raise SQLiteStoreError(
229 229 b'unhandled compression type: %d' % compression
230 230 )
231 231
232 232 deltas.append(delta)
233 233
234 234 if lastdeltabaseid in stoprids:
235 235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 236 else:
237 237 basetext = deltas.pop()
238 238
239 239 deltas.reverse()
240 240 fulltext = mdiff.patches(basetext, deltas)
241 241
242 242 # SQLite returns buffer instances for blob columns on Python 2. This
243 243 # type can propagate through the delta application layer. Because
244 244 # downstream callers assume revisions are bytes, cast as needed.
245 245 if not isinstance(fulltext, bytes):
246 246 fulltext = bytes(delta)
247 247
248 248 return fulltext
249 249
250 250
251 251 def insertdelta(db, compression, hash, delta):
252 252 try:
253 253 return db.execute(
254 254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
255 255 (compression, hash, delta),
256 256 ).lastrowid
257 257 except sqlite3.IntegrityError:
258 258 return db.execute(
259 259 'SELECT id FROM delta WHERE hash=?', (hash,)
260 260 ).fetchone()[0]
261 261
262 262
263 263 class SQLiteStoreError(error.StorageError):
264 264 pass
265 265
266 266
267 267 @attr.s
268 268 class revisionentry(object):
269 269 rid = attr.ib()
270 270 rev = attr.ib()
271 271 node = attr.ib()
272 272 p1rev = attr.ib()
273 273 p2rev = attr.ib()
274 274 p1node = attr.ib()
275 275 p2node = attr.ib()
276 276 linkrev = attr.ib()
277 277 flags = attr.ib()
278 278
279 279
280 280 @interfaceutil.implementer(repository.irevisiondelta)
281 281 @attr.s(slots=True)
282 282 class sqliterevisiondelta(object):
283 283 node = attr.ib()
284 284 p1node = attr.ib()
285 285 p2node = attr.ib()
286 286 basenode = attr.ib()
287 287 flags = attr.ib()
288 288 baserevisionsize = attr.ib()
289 289 revision = attr.ib()
290 290 delta = attr.ib()
291 sidedata = attr.ib()
291 292 linknode = attr.ib(default=None)
292 293
293 294
294 295 @interfaceutil.implementer(repository.iverifyproblem)
295 296 @attr.s(frozen=True)
296 297 class sqliteproblem(object):
297 298 warning = attr.ib(default=None)
298 299 error = attr.ib(default=None)
299 300 node = attr.ib(default=None)
300 301
301 302
302 303 @interfaceutil.implementer(repository.ifilestorage)
303 304 class sqlitefilestore(object):
304 305 """Implements storage for an individual tracked path."""
305 306
306 307 def __init__(self, db, path, compression):
307 308 self._db = db
308 309 self._path = path
309 310
310 311 self._pathid = None
311 312
312 313 # revnum -> node
313 314 self._revtonode = {}
314 315 # node -> revnum
315 316 self._nodetorev = {}
316 317 # node -> data structure
317 318 self._revisions = {}
318 319
319 320 self._revisioncache = util.lrucachedict(10)
320 321
321 322 self._compengine = compression
322 323
323 324 if compression == b'zstd':
324 325 self._cctx = zstd.ZstdCompressor(level=3)
325 326 self._dctx = zstd.ZstdDecompressor()
326 327 else:
327 328 self._cctx = None
328 329 self._dctx = None
329 330
330 331 self._refreshindex()
331 332
332 333 def _refreshindex(self):
333 334 self._revtonode = {}
334 335 self._nodetorev = {}
335 336 self._revisions = {}
336 337
337 338 res = list(
338 339 self._db.execute(
339 340 'SELECT id FROM filepath WHERE path=?', (self._path,)
340 341 )
341 342 )
342 343
343 344 if not res:
344 345 self._pathid = None
345 346 return
346 347
347 348 self._pathid = res[0][0]
348 349
349 350 res = self._db.execute(
350 351 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
351 352 'FROM fileindex '
352 353 'WHERE pathid=? '
353 354 'ORDER BY revnum ASC',
354 355 (self._pathid,),
355 356 )
356 357
357 358 for i, row in enumerate(res):
358 359 rid, rev, node, p1rev, p2rev, linkrev, flags = row
359 360
360 361 if i != rev:
361 362 raise SQLiteStoreError(
362 363 _(b'sqlite database has inconsistent revision numbers')
363 364 )
364 365
365 366 if p1rev == nullrev:
366 367 p1node = nullid
367 368 else:
368 369 p1node = self._revtonode[p1rev]
369 370
370 371 if p2rev == nullrev:
371 372 p2node = nullid
372 373 else:
373 374 p2node = self._revtonode[p2rev]
374 375
375 376 entry = revisionentry(
376 377 rid=rid,
377 378 rev=rev,
378 379 node=node,
379 380 p1rev=p1rev,
380 381 p2rev=p2rev,
381 382 p1node=p1node,
382 383 p2node=p2node,
383 384 linkrev=linkrev,
384 385 flags=flags,
385 386 )
386 387
387 388 self._revtonode[rev] = node
388 389 self._nodetorev[node] = rev
389 390 self._revisions[node] = entry
390 391
391 392 # Start of ifileindex interface.
392 393
393 394 def __len__(self):
394 395 return len(self._revisions)
395 396
396 397 def __iter__(self):
397 398 return iter(pycompat.xrange(len(self._revisions)))
398 399
399 400 def hasnode(self, node):
400 401 if node == nullid:
401 402 return False
402 403
403 404 return node in self._nodetorev
404 405
405 406 def revs(self, start=0, stop=None):
406 407 return storageutil.iterrevs(
407 408 len(self._revisions), start=start, stop=stop
408 409 )
409 410
410 411 def parents(self, node):
411 412 if node == nullid:
412 413 return nullid, nullid
413 414
414 415 if node not in self._revisions:
415 416 raise error.LookupError(node, self._path, _(b'no node'))
416 417
417 418 entry = self._revisions[node]
418 419 return entry.p1node, entry.p2node
419 420
420 421 def parentrevs(self, rev):
421 422 if rev == nullrev:
422 423 return nullrev, nullrev
423 424
424 425 if rev not in self._revtonode:
425 426 raise IndexError(rev)
426 427
427 428 entry = self._revisions[self._revtonode[rev]]
428 429 return entry.p1rev, entry.p2rev
429 430
430 431 def rev(self, node):
431 432 if node == nullid:
432 433 return nullrev
433 434
434 435 if node not in self._nodetorev:
435 436 raise error.LookupError(node, self._path, _(b'no node'))
436 437
437 438 return self._nodetorev[node]
438 439
439 440 def node(self, rev):
440 441 if rev == nullrev:
441 442 return nullid
442 443
443 444 if rev not in self._revtonode:
444 445 raise IndexError(rev)
445 446
446 447 return self._revtonode[rev]
447 448
448 449 def lookup(self, node):
449 450 return storageutil.fileidlookup(self, node, self._path)
450 451
451 452 def linkrev(self, rev):
452 453 if rev == nullrev:
453 454 return nullrev
454 455
455 456 if rev not in self._revtonode:
456 457 raise IndexError(rev)
457 458
458 459 entry = self._revisions[self._revtonode[rev]]
459 460 return entry.linkrev
460 461
461 462 def iscensored(self, rev):
462 463 if rev == nullrev:
463 464 return False
464 465
465 466 if rev not in self._revtonode:
466 467 raise IndexError(rev)
467 468
468 469 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
469 470
470 471 def commonancestorsheads(self, node1, node2):
471 472 rev1 = self.rev(node1)
472 473 rev2 = self.rev(node2)
473 474
474 475 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
475 476 return pycompat.maplist(self.node, ancestors)
476 477
477 478 def descendants(self, revs):
478 479 # TODO we could implement this using a recursive SQL query, which
479 480 # might be faster.
480 481 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
481 482
482 483 def heads(self, start=None, stop=None):
483 484 if start is None and stop is None:
484 485 if not len(self):
485 486 return [nullid]
486 487
487 488 startrev = self.rev(start) if start is not None else nullrev
488 489 stoprevs = {self.rev(n) for n in stop or []}
489 490
490 491 revs = dagop.headrevssubset(
491 492 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
492 493 )
493 494
494 495 return [self.node(rev) for rev in revs]
495 496
496 497 def children(self, node):
497 498 rev = self.rev(node)
498 499
499 500 res = self._db.execute(
500 501 'SELECT'
501 502 ' node '
502 503 ' FROM filedata '
503 504 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
504 505 ' ORDER BY revnum ASC',
505 506 (self._path, rev, rev),
506 507 )
507 508
508 509 return [row[0] for row in res]
509 510
510 511 # End of ifileindex interface.
511 512
512 513 # Start of ifiledata interface.
513 514
514 515 def size(self, rev):
515 516 if rev == nullrev:
516 517 return 0
517 518
518 519 if rev not in self._revtonode:
519 520 raise IndexError(rev)
520 521
521 522 node = self._revtonode[rev]
522 523
523 524 if self.renamed(node):
524 525 return len(self.read(node))
525 526
526 527 return len(self.revision(node))
527 528
528 529 def revision(self, node, raw=False, _verifyhash=True):
529 530 if node in (nullid, nullrev):
530 531 return b''
531 532
532 533 if isinstance(node, int):
533 534 node = self.node(node)
534 535
535 536 if node not in self._nodetorev:
536 537 raise error.LookupError(node, self._path, _(b'no node'))
537 538
538 539 if node in self._revisioncache:
539 540 return self._revisioncache[node]
540 541
541 542 # Because we have a fulltext revision cache, we are able to
542 543 # short-circuit delta chain traversal and decompression as soon as
543 544 # we encounter a revision in the cache.
544 545
545 546 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
546 547
547 548 if not stoprids:
548 549 stoprids[-1] = None
549 550
550 551 fulltext = resolvedeltachain(
551 552 self._db,
552 553 self._pathid,
553 554 node,
554 555 self._revisioncache,
555 556 stoprids,
556 557 zstddctx=self._dctx,
557 558 )
558 559
559 560 # Don't verify hashes if parent nodes were rewritten, as the hash
560 561 # wouldn't verify.
561 562 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
562 563 _verifyhash = False
563 564
564 565 if _verifyhash:
565 566 self._checkhash(fulltext, node)
566 567 self._revisioncache[node] = fulltext
567 568
568 569 return fulltext
569 570
570 571 def rawdata(self, *args, **kwargs):
571 572 return self.revision(*args, **kwargs)
572 573
573 574 def read(self, node):
574 575 return storageutil.filtermetadata(self.revision(node))
575 576
576 577 def renamed(self, node):
577 578 return storageutil.filerevisioncopied(self, node)
578 579
579 580 def cmp(self, node, fulltext):
580 581 return not storageutil.filedataequivalent(self, node, fulltext)
581 582
582 583 def emitrevisions(
583 584 self,
584 585 nodes,
585 586 nodesorder=None,
586 587 revisiondata=False,
587 588 assumehaveparentrevisions=False,
588 589 deltamode=repository.CG_DELTAMODE_STD,
589 590 ):
590 591 if nodesorder not in (b'nodes', b'storage', b'linear', None):
591 592 raise error.ProgrammingError(
592 593 b'unhandled value for nodesorder: %s' % nodesorder
593 594 )
594 595
595 596 nodes = [n for n in nodes if n != nullid]
596 597
597 598 if not nodes:
598 599 return
599 600
600 601 # TODO perform in a single query.
601 602 res = self._db.execute(
602 603 'SELECT revnum, deltaid FROM fileindex '
603 604 'WHERE pathid=? '
604 605 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
605 606 tuple([self._pathid] + nodes),
606 607 )
607 608
608 609 deltabases = {}
609 610
610 611 for rev, deltaid in res:
611 612 res = self._db.execute(
612 613 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
613 614 (self._pathid, deltaid),
614 615 )
615 616 deltabases[rev] = res.fetchone()[0]
616 617
617 618 # TODO define revdifffn so we can use delta from storage.
618 619 for delta in storageutil.emitrevisions(
619 620 self,
620 621 nodes,
621 622 nodesorder,
622 623 sqliterevisiondelta,
623 624 deltaparentfn=deltabases.__getitem__,
624 625 revisiondata=revisiondata,
625 626 assumehaveparentrevisions=assumehaveparentrevisions,
626 627 deltamode=deltamode,
627 628 ):
628 629
629 630 yield delta
630 631
631 632 # End of ifiledata interface.
632 633
633 634 # Start of ifilemutation interface.
634 635
635 636 def add(self, filedata, meta, transaction, linkrev, p1, p2):
636 637 if meta or filedata.startswith(b'\x01\n'):
637 638 filedata = storageutil.packmeta(meta, filedata)
638 639
639 640 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
640 641 return self.node(rev)
641 642
642 643 def addrevision(
643 644 self,
644 645 revisiondata,
645 646 transaction,
646 647 linkrev,
647 648 p1,
648 649 p2,
649 650 node=None,
650 651 flags=0,
651 652 cachedelta=None,
652 653 ):
653 654 if flags:
654 655 raise SQLiteStoreError(_(b'flags not supported on revisions'))
655 656
656 657 validatehash = node is not None
657 658 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
658 659
659 660 if validatehash:
660 661 self._checkhash(revisiondata, node, p1, p2)
661 662
662 663 rev = self._nodetorev.get(node)
663 664 if rev is not None:
664 665 return rev
665 666
666 667 rev = self._addrawrevision(
667 668 node, revisiondata, transaction, linkrev, p1, p2
668 669 )
669 670
670 671 self._revisioncache[node] = revisiondata
671 672 return rev
672 673
673 674 def addgroup(
674 675 self,
675 676 deltas,
676 677 linkmapper,
677 678 transaction,
678 679 addrevisioncb=None,
679 680 duplicaterevisioncb=None,
680 681 maybemissingparents=False,
681 682 ):
682 683 empty = True
683 684
684 685 for (
685 686 node,
686 687 p1,
687 688 p2,
688 689 linknode,
689 690 deltabase,
690 691 delta,
691 692 wireflags,
692 693 sidedata,
693 694 ) in deltas:
694 695 storeflags = 0
695 696
696 697 if wireflags & repository.REVISION_FLAG_CENSORED:
697 698 storeflags |= FLAG_CENSORED
698 699
699 700 if wireflags & ~repository.REVISION_FLAG_CENSORED:
700 701 raise SQLiteStoreError(b'unhandled revision flag')
701 702
702 703 if maybemissingparents:
703 704 if p1 != nullid and not self.hasnode(p1):
704 705 p1 = nullid
705 706 storeflags |= FLAG_MISSING_P1
706 707
707 708 if p2 != nullid and not self.hasnode(p2):
708 709 p2 = nullid
709 710 storeflags |= FLAG_MISSING_P2
710 711
711 712 baserev = self.rev(deltabase)
712 713
713 714 # If base is censored, delta must be full replacement in a single
714 715 # patch operation.
715 716 if baserev != nullrev and self.iscensored(baserev):
716 717 hlen = struct.calcsize(b'>lll')
717 718 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
718 719 newlen = len(delta) - hlen
719 720
720 721 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
721 722 raise error.CensoredBaseError(self._path, deltabase)
722 723
723 724 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
724 725 delta, baserev, lambda x: len(self.rawdata(x))
725 726 ):
726 727 storeflags |= FLAG_CENSORED
727 728
728 729 linkrev = linkmapper(linknode)
729 730
730 731 if node in self._revisions:
731 732 # Possibly reset parents to make them proper.
732 733 entry = self._revisions[node]
733 734
734 735 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
735 736 entry.p1node = p1
736 737 entry.p1rev = self._nodetorev[p1]
737 738 entry.flags &= ~FLAG_MISSING_P1
738 739
739 740 self._db.execute(
740 741 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
741 742 (self._nodetorev[p1], entry.flags, entry.rid),
742 743 )
743 744
744 745 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
745 746 entry.p2node = p2
746 747 entry.p2rev = self._nodetorev[p2]
747 748 entry.flags &= ~FLAG_MISSING_P2
748 749
749 750 self._db.execute(
750 751 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
751 752 (self._nodetorev[p1], entry.flags, entry.rid),
752 753 )
753 754
754 755 if duplicaterevisioncb:
755 756 duplicaterevisioncb(self, self.rev(node))
756 757 empty = False
757 758 continue
758 759
759 760 if deltabase == nullid:
760 761 text = mdiff.patch(b'', delta)
761 762 storedelta = None
762 763 else:
763 764 text = None
764 765 storedelta = (deltabase, delta)
765 766
766 767 rev = self._addrawrevision(
767 768 node,
768 769 text,
769 770 transaction,
770 771 linkrev,
771 772 p1,
772 773 p2,
773 774 storedelta=storedelta,
774 775 flags=storeflags,
775 776 )
776 777
777 778 if addrevisioncb:
778 779 addrevisioncb(self, rev)
779 780 empty = False
780 781
781 782 return not empty
782 783
783 784 def censorrevision(self, tr, censornode, tombstone=b''):
784 785 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
785 786
786 787 # This restriction is cargo culted from revlogs and makes no sense for
787 788 # SQLite, since columns can be resized at will.
788 789 if len(tombstone) > len(self.rawdata(censornode)):
789 790 raise error.Abort(
790 791 _(b'censor tombstone must be no longer than censored data')
791 792 )
792 793
793 794 # We need to replace the censored revision's data with the tombstone.
794 795 # But replacing that data will have implications for delta chains that
795 796 # reference it.
796 797 #
797 798 # While "better," more complex strategies are possible, we do something
798 799 # simple: we find delta chain children of the censored revision and we
799 800 # replace those incremental deltas with fulltexts of their corresponding
800 801 # revision. Then we delete the now-unreferenced delta and original
801 802 # revision and insert a replacement.
802 803
803 804 # Find the delta to be censored.
804 805 censoreddeltaid = self._db.execute(
805 806 'SELECT deltaid FROM fileindex WHERE id=?',
806 807 (self._revisions[censornode].rid,),
807 808 ).fetchone()[0]
808 809
809 810 # Find all its delta chain children.
810 811 # TODO once we support storing deltas for !files, we'll need to look
811 812 # for those delta chains too.
812 813 rows = list(
813 814 self._db.execute(
814 815 'SELECT id, pathid, node FROM fileindex '
815 816 'WHERE deltabaseid=? OR deltaid=?',
816 817 (censoreddeltaid, censoreddeltaid),
817 818 )
818 819 )
819 820
820 821 for row in rows:
821 822 rid, pathid, node = row
822 823
823 824 fulltext = resolvedeltachain(
824 825 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
825 826 )
826 827
827 828 deltahash = hashutil.sha1(fulltext).digest()
828 829
829 830 if self._compengine == b'zstd':
830 831 deltablob = self._cctx.compress(fulltext)
831 832 compression = COMPRESSION_ZSTD
832 833 elif self._compengine == b'zlib':
833 834 deltablob = zlib.compress(fulltext)
834 835 compression = COMPRESSION_ZLIB
835 836 elif self._compengine == b'none':
836 837 deltablob = fulltext
837 838 compression = COMPRESSION_NONE
838 839 else:
839 840 raise error.ProgrammingError(
840 841 b'unhandled compression engine: %s' % self._compengine
841 842 )
842 843
843 844 if len(deltablob) >= len(fulltext):
844 845 deltablob = fulltext
845 846 compression = COMPRESSION_NONE
846 847
847 848 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
848 849
849 850 self._db.execute(
850 851 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
851 852 'WHERE id=?',
852 853 (deltaid, rid),
853 854 )
854 855
855 856 # Now create the tombstone delta and replace the delta on the censored
856 857 # node.
857 858 deltahash = hashutil.sha1(tombstone).digest()
858 859 tombstonedeltaid = insertdelta(
859 860 self._db, COMPRESSION_NONE, deltahash, tombstone
860 861 )
861 862
862 863 flags = self._revisions[censornode].flags
863 864 flags |= FLAG_CENSORED
864 865
865 866 self._db.execute(
866 867 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
867 868 'WHERE pathid=? AND node=?',
868 869 (flags, tombstonedeltaid, self._pathid, censornode),
869 870 )
870 871
871 872 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
872 873
873 874 self._refreshindex()
874 875 self._revisioncache.clear()
875 876
876 877 def getstrippoint(self, minlink):
877 878 return storageutil.resolvestripinfo(
878 879 minlink,
879 880 len(self) - 1,
880 881 [self.rev(n) for n in self.heads()],
881 882 self.linkrev,
882 883 self.parentrevs,
883 884 )
884 885
885 886 def strip(self, minlink, transaction):
886 887 if not len(self):
887 888 return
888 889
889 890 rev, _ignored = self.getstrippoint(minlink)
890 891
891 892 if rev == len(self):
892 893 return
893 894
894 895 for rev in self.revs(rev):
895 896 self._db.execute(
896 897 'DELETE FROM fileindex WHERE pathid=? AND node=?',
897 898 (self._pathid, self.node(rev)),
898 899 )
899 900
900 901 # TODO how should we garbage collect data in delta table?
901 902
902 903 self._refreshindex()
903 904
904 905 # End of ifilemutation interface.
905 906
906 907 # Start of ifilestorage interface.
907 908
908 909 def files(self):
909 910 return []
910 911
912 def sidedata(self, nodeorrev, _df=None):
913 # Not supported for now
914 return {}
915
911 916 def storageinfo(
912 917 self,
913 918 exclusivefiles=False,
914 919 sharedfiles=False,
915 920 revisionscount=False,
916 921 trackedsize=False,
917 922 storedsize=False,
918 923 ):
919 924 d = {}
920 925
921 926 if exclusivefiles:
922 927 d[b'exclusivefiles'] = []
923 928
924 929 if sharedfiles:
925 930 # TODO list sqlite file(s) here.
926 931 d[b'sharedfiles'] = []
927 932
928 933 if revisionscount:
929 934 d[b'revisionscount'] = len(self)
930 935
931 936 if trackedsize:
932 937 d[b'trackedsize'] = sum(
933 938 len(self.revision(node)) for node in self._nodetorev
934 939 )
935 940
936 941 if storedsize:
937 942 # TODO implement this?
938 943 d[b'storedsize'] = None
939 944
940 945 return d
941 946
942 947 def verifyintegrity(self, state):
943 948 state[b'skipread'] = set()
944 949
945 950 for rev in self:
946 951 node = self.node(rev)
947 952
948 953 try:
949 954 self.revision(node)
950 955 except Exception as e:
951 956 yield sqliteproblem(
952 957 error=_(b'unpacking %s: %s') % (short(node), e), node=node
953 958 )
954 959
955 960 state[b'skipread'].add(node)
956 961
957 962 # End of ifilestorage interface.
958 963
959 964 def _checkhash(self, fulltext, node, p1=None, p2=None):
960 965 if p1 is None and p2 is None:
961 966 p1, p2 = self.parents(node)
962 967
963 968 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
964 969 return
965 970
966 971 try:
967 972 del self._revisioncache[node]
968 973 except KeyError:
969 974 pass
970 975
971 976 if storageutil.iscensoredtext(fulltext):
972 977 raise error.CensoredNodeError(self._path, node, fulltext)
973 978
974 979 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
975 980
976 981 def _addrawrevision(
977 982 self,
978 983 node,
979 984 revisiondata,
980 985 transaction,
981 986 linkrev,
982 987 p1,
983 988 p2,
984 989 storedelta=None,
985 990 flags=0,
986 991 ):
987 992 if self._pathid is None:
988 993 res = self._db.execute(
989 994 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
990 995 )
991 996 self._pathid = res.lastrowid
992 997
993 998 # For simplicity, always store a delta against p1.
994 999 # TODO we need a lot more logic here to make behavior reasonable.
995 1000
996 1001 if storedelta:
997 1002 deltabase, delta = storedelta
998 1003
999 1004 if isinstance(deltabase, int):
1000 1005 deltabase = self.node(deltabase)
1001 1006
1002 1007 else:
1003 1008 assert revisiondata is not None
1004 1009 deltabase = p1
1005 1010
1006 1011 if deltabase == nullid:
1007 1012 delta = revisiondata
1008 1013 else:
1009 1014 delta = mdiff.textdiff(
1010 1015 self.revision(self.rev(deltabase)), revisiondata
1011 1016 )
1012 1017
1013 1018 # File index stores a pointer to its delta and the parent delta.
1014 1019 # The parent delta is stored via a pointer to the fileindex PK.
1015 1020 if deltabase == nullid:
1016 1021 baseid = None
1017 1022 else:
1018 1023 baseid = self._revisions[deltabase].rid
1019 1024
1020 1025 # Deltas are stored with a hash of their content. This allows
1021 1026 # us to de-duplicate. The table is configured to ignore conflicts
1022 1027 # and it is faster to just insert and silently noop than to look
1023 1028 # first.
1024 1029 deltahash = hashutil.sha1(delta).digest()
1025 1030
1026 1031 if self._compengine == b'zstd':
1027 1032 deltablob = self._cctx.compress(delta)
1028 1033 compression = COMPRESSION_ZSTD
1029 1034 elif self._compengine == b'zlib':
1030 1035 deltablob = zlib.compress(delta)
1031 1036 compression = COMPRESSION_ZLIB
1032 1037 elif self._compengine == b'none':
1033 1038 deltablob = delta
1034 1039 compression = COMPRESSION_NONE
1035 1040 else:
1036 1041 raise error.ProgrammingError(
1037 1042 b'unhandled compression engine: %s' % self._compengine
1038 1043 )
1039 1044
1040 1045 # Don't store compressed data if it isn't practical.
1041 1046 if len(deltablob) >= len(delta):
1042 1047 deltablob = delta
1043 1048 compression = COMPRESSION_NONE
1044 1049
1045 1050 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1046 1051
1047 1052 rev = len(self)
1048 1053
1049 1054 if p1 == nullid:
1050 1055 p1rev = nullrev
1051 1056 else:
1052 1057 p1rev = self._nodetorev[p1]
1053 1058
1054 1059 if p2 == nullid:
1055 1060 p2rev = nullrev
1056 1061 else:
1057 1062 p2rev = self._nodetorev[p2]
1058 1063
1059 1064 rid = self._db.execute(
1060 1065 'INSERT INTO fileindex ('
1061 1066 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1062 1067 ' deltaid, deltabaseid) '
1063 1068 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1064 1069 (
1065 1070 self._pathid,
1066 1071 rev,
1067 1072 node,
1068 1073 p1rev,
1069 1074 p2rev,
1070 1075 linkrev,
1071 1076 flags,
1072 1077 deltaid,
1073 1078 baseid,
1074 1079 ),
1075 1080 ).lastrowid
1076 1081
1077 1082 entry = revisionentry(
1078 1083 rid=rid,
1079 1084 rev=rev,
1080 1085 node=node,
1081 1086 p1rev=p1rev,
1082 1087 p2rev=p2rev,
1083 1088 p1node=p1,
1084 1089 p2node=p2,
1085 1090 linkrev=linkrev,
1086 1091 flags=flags,
1087 1092 )
1088 1093
1089 1094 self._nodetorev[node] = rev
1090 1095 self._revtonode[rev] = node
1091 1096 self._revisions[node] = entry
1092 1097
1093 1098 return rev
1094 1099
1095 1100
1096 1101 class sqliterepository(localrepo.localrepository):
1097 1102 def cancopy(self):
1098 1103 return False
1099 1104
1100 1105 def transaction(self, *args, **kwargs):
1101 1106 current = self.currenttransaction()
1102 1107
1103 1108 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1104 1109
1105 1110 if current:
1106 1111 return tr
1107 1112
1108 1113 self._dbconn.execute('BEGIN TRANSACTION')
1109 1114
1110 1115 def committransaction(_):
1111 1116 self._dbconn.commit()
1112 1117
1113 1118 tr.addfinalize(b'sqlitestore', committransaction)
1114 1119
1115 1120 return tr
1116 1121
1117 1122 @property
1118 1123 def _dbconn(self):
1119 1124 # SQLite connections can only be used on the thread that created
1120 1125 # them. In most cases, this "just works." However, hgweb uses
1121 1126 # multiple threads.
1122 1127 tid = threading.current_thread().ident
1123 1128
1124 1129 if self._db:
1125 1130 if self._db[0] == tid:
1126 1131 return self._db[1]
1127 1132
1128 1133 db = makedb(self.svfs.join(b'db.sqlite'))
1129 1134 self._db = (tid, db)
1130 1135
1131 1136 return db
1132 1137
1133 1138
1134 1139 def makedb(path):
1135 1140 """Construct a database handle for a database at path."""
1136 1141
1137 1142 db = sqlite3.connect(encoding.strfromlocal(path))
1138 1143 db.text_factory = bytes
1139 1144
1140 1145 res = db.execute('PRAGMA user_version').fetchone()[0]
1141 1146
1142 1147 # New database.
1143 1148 if res == 0:
1144 1149 for statement in CREATE_SCHEMA:
1145 1150 db.execute(statement)
1146 1151
1147 1152 db.commit()
1148 1153
1149 1154 elif res == CURRENT_SCHEMA_VERSION:
1150 1155 pass
1151 1156
1152 1157 else:
1153 1158 raise error.Abort(_(b'sqlite database has unrecognized version'))
1154 1159
1155 1160 db.execute('PRAGMA journal_mode=WAL')
1156 1161
1157 1162 return db
1158 1163
1159 1164
1160 1165 def featuresetup(ui, supported):
1161 1166 supported.add(REQUIREMENT)
1162 1167
1163 1168 if zstd:
1164 1169 supported.add(REQUIREMENT_ZSTD)
1165 1170
1166 1171 supported.add(REQUIREMENT_ZLIB)
1167 1172 supported.add(REQUIREMENT_NONE)
1168 1173 supported.add(REQUIREMENT_SHALLOW_FILES)
1169 1174 supported.add(requirements.NARROW_REQUIREMENT)
1170 1175
1171 1176
1172 1177 def newreporequirements(orig, ui, createopts):
1173 1178 if createopts[b'backend'] != b'sqlite':
1174 1179 return orig(ui, createopts)
1175 1180
1176 1181 # This restriction can be lifted once we have more confidence.
1177 1182 if b'sharedrepo' in createopts:
1178 1183 raise error.Abort(
1179 1184 _(b'shared repositories not supported with SQLite store')
1180 1185 )
1181 1186
1182 1187 # This filtering is out of an abundance of caution: we want to ensure
1183 1188 # we honor creation options and we do that by annotating exactly the
1184 1189 # creation options we recognize.
1185 1190 known = {
1186 1191 b'narrowfiles',
1187 1192 b'backend',
1188 1193 b'shallowfilestore',
1189 1194 }
1190 1195
1191 1196 unsupported = set(createopts) - known
1192 1197 if unsupported:
1193 1198 raise error.Abort(
1194 1199 _(b'SQLite store does not support repo creation option: %s')
1195 1200 % b', '.join(sorted(unsupported))
1196 1201 )
1197 1202
1198 1203 # Since we're a hybrid store that still relies on revlogs, we fall back
1199 1204 # to using the revlogv1 backend's storage requirements then adding our
1200 1205 # own requirement.
1201 1206 createopts[b'backend'] = b'revlogv1'
1202 1207 requirements = orig(ui, createopts)
1203 1208 requirements.add(REQUIREMENT)
1204 1209
1205 1210 compression = ui.config(b'storage', b'sqlite.compression')
1206 1211
1207 1212 if compression == b'zstd' and not zstd:
1208 1213 raise error.Abort(
1209 1214 _(
1210 1215 b'storage.sqlite.compression set to "zstd" but '
1211 1216 b'zstandard compression not available to this '
1212 1217 b'Mercurial install'
1213 1218 )
1214 1219 )
1215 1220
1216 1221 if compression == b'zstd':
1217 1222 requirements.add(REQUIREMENT_ZSTD)
1218 1223 elif compression == b'zlib':
1219 1224 requirements.add(REQUIREMENT_ZLIB)
1220 1225 elif compression == b'none':
1221 1226 requirements.add(REQUIREMENT_NONE)
1222 1227 else:
1223 1228 raise error.Abort(
1224 1229 _(
1225 1230 b'unknown compression engine defined in '
1226 1231 b'storage.sqlite.compression: %s'
1227 1232 )
1228 1233 % compression
1229 1234 )
1230 1235
1231 1236 if createopts.get(b'shallowfilestore'):
1232 1237 requirements.add(REQUIREMENT_SHALLOW_FILES)
1233 1238
1234 1239 return requirements
1235 1240
1236 1241
1237 1242 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1238 1243 class sqlitefilestorage(object):
1239 1244 """Repository file storage backed by SQLite."""
1240 1245
1241 1246 def file(self, path):
1242 1247 if path[0] == b'/':
1243 1248 path = path[1:]
1244 1249
1245 1250 if REQUIREMENT_ZSTD in self.requirements:
1246 1251 compression = b'zstd'
1247 1252 elif REQUIREMENT_ZLIB in self.requirements:
1248 1253 compression = b'zlib'
1249 1254 elif REQUIREMENT_NONE in self.requirements:
1250 1255 compression = b'none'
1251 1256 else:
1252 1257 raise error.Abort(
1253 1258 _(
1254 1259 b'unable to determine what compression engine '
1255 1260 b'to use for SQLite storage'
1256 1261 )
1257 1262 )
1258 1263
1259 1264 return sqlitefilestore(self._dbconn, path, compression)
1260 1265
1261 1266
1262 1267 def makefilestorage(orig, requirements, features, **kwargs):
1263 1268 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1264 1269 if REQUIREMENT in requirements:
1265 1270 if REQUIREMENT_SHALLOW_FILES in requirements:
1266 1271 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1267 1272
1268 1273 return sqlitefilestorage
1269 1274 else:
1270 1275 return orig(requirements=requirements, features=features, **kwargs)
1271 1276
1272 1277
1273 1278 def makemain(orig, ui, requirements, **kwargs):
1274 1279 if REQUIREMENT in requirements:
1275 1280 if REQUIREMENT_ZSTD in requirements and not zstd:
1276 1281 raise error.Abort(
1277 1282 _(
1278 1283 b'repository uses zstandard compression, which '
1279 1284 b'is not available to this Mercurial install'
1280 1285 )
1281 1286 )
1282 1287
1283 1288 return sqliterepository
1284 1289
1285 1290 return orig(requirements=requirements, **kwargs)
1286 1291
1287 1292
1288 1293 def verifierinit(orig, self, *args, **kwargs):
1289 1294 orig(self, *args, **kwargs)
1290 1295
1291 1296 # We don't care that files in the store don't align with what is
1292 1297 # advertised. So suppress these warnings.
1293 1298 self.warnorphanstorefiles = False
1294 1299
1295 1300
1296 1301 def extsetup(ui):
1297 1302 localrepo.featuresetupfuncs.add(featuresetup)
1298 1303 extensions.wrapfunction(
1299 1304 localrepo, b'newreporequirements', newreporequirements
1300 1305 )
1301 1306 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1302 1307 extensions.wrapfunction(localrepo, b'makemain', makemain)
1303 1308 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1304 1309
1305 1310
1306 1311 def reposetup(ui, repo):
1307 1312 if isinstance(repo, sqliterepository):
1308 1313 repo._db = None
1309 1314
1310 1315 # TODO check for bundlerepository?
@@ -1,1784 +1,1791 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35 from .revlogutils import sidedata as sidedatamod
36 36
37 37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 40
41 41 LFS_REQUIREMENT = b'lfs'
42 42
43 43 readexactly = util.readexactly
44 44
45 45
46 46 def getchunk(stream):
47 47 """return the next chunk from stream as a string"""
48 48 d = readexactly(stream, 4)
49 49 l = struct.unpack(b">l", d)[0]
50 50 if l <= 4:
51 51 if l:
52 52 raise error.Abort(_(b"invalid chunk length %d") % l)
53 53 return b""
54 54 return readexactly(stream, l - 4)
55 55
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(b">l", length + 4)
60 60
61 61
62 62 def closechunk():
63 63 """return a changegroup chunk header (string) for a zero-length chunk"""
64 64 return struct.pack(b">l", 0)
65 65
66 66
67 67 def _fileheader(path):
68 68 """Obtain a changegroup chunk header for a named path."""
69 69 return chunkheader(len(path)) + path
70 70
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, b"wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, b"wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 91 fh = os.fdopen(fd, "wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125
126 126 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 127 deltaheadersize = deltaheader.size
128 128 version = b'01'
129 129 _grouplistcount = 1 # One list of files after the manifests
130 130
131 131 def __init__(self, fh, alg, extras=None):
132 132 if alg is None:
133 133 alg = b'UN'
134 134 if alg not in util.compengines.supportedbundletypes:
135 135 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 136 if alg == b'BZ':
137 137 alg = b'_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != b'UN'
149 149
150 150 def read(self, l):
151 151 return self._stream.read(l)
152 152
153 153 def seek(self, pos):
154 154 return self._stream.seek(pos)
155 155
156 156 def tell(self):
157 157 return self._stream.tell()
158 158
159 159 def close(self):
160 160 return self._stream.close()
161 161
162 162 def _chunklength(self):
163 163 d = readexactly(self._stream, 4)
164 164 l = struct.unpack(b">l", d)[0]
165 165 if l <= 4:
166 166 if l:
167 167 raise error.Abort(_(b"invalid chunk length %d") % l)
168 168 return 0
169 169 if self.callback:
170 170 self.callback()
171 171 return l - 4
172 172
173 173 def changelogheader(self):
174 174 """v10 does not have a changelog header chunk"""
175 175 return {}
176 176
177 177 def manifestheader(self):
178 178 """v10 does not have a manifest header chunk"""
179 179 return {}
180 180
181 181 def filelogheader(self):
182 182 """return the header of the filelogs chunk, v10 only has the filename"""
183 183 l = self._chunklength()
184 184 if not l:
185 185 return {}
186 186 fname = readexactly(self._stream, l)
187 187 return {b'filename': fname}
188 188
189 189 def _deltaheader(self, headertuple, prevnode):
190 190 node, p1, p2, cs = headertuple
191 191 if prevnode is None:
192 192 deltabase = p1
193 193 else:
194 194 deltabase = prevnode
195 195 flags = 0
196 196 return node, p1, p2, deltabase, cs, flags
197 197
198 198 def deltachunk(self, prevnode):
199 199 l = self._chunklength()
200 200 if not l:
201 201 return {}
202 202 headerdata = readexactly(self._stream, self.deltaheadersize)
203 203 header = self.deltaheader.unpack(headerdata)
204 204 delta = readexactly(self._stream, l - self.deltaheadersize)
205 205 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 206 # cg4 forward-compat
207 207 sidedata = {}
208 208 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209 209
210 210 def getchunks(self):
211 211 """returns all the chunks contains in the bundle
212 212
213 213 Used when you need to forward the binary stream to a file or another
214 214 network API. To do so, it parse the changegroup data, otherwise it will
215 215 block in case of sshrepo because it don't know the end of the stream.
216 216 """
217 217 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 218 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 219 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 220 # filelogs.
221 221 #
222 222 # Changelog and manifestlog parts are terminated with empty chunks. The
223 223 # tree and file parts are a list of entry sections. Each entry section
224 224 # is a series of chunks terminating in an empty chunk. The list of these
225 225 # entry sections is terminated in yet another empty chunk, so we know
226 226 # we've reached the end of the tree/file list when we reach an empty
227 227 # chunk that was proceeded by no non-empty chunks.
228 228
229 229 parts = 0
230 230 while parts < 2 + self._grouplistcount:
231 231 noentries = True
232 232 while True:
233 233 chunk = getchunk(self)
234 234 if not chunk:
235 235 # The first two empty chunks represent the end of the
236 236 # changelog and the manifestlog portions. The remaining
237 237 # empty chunks represent either A) the end of individual
238 238 # tree or file entries in the file list, or B) the end of
239 239 # the entire list. It's the end of the entire list if there
240 240 # were no entries (i.e. noentries is True).
241 241 if parts < 2:
242 242 parts += 1
243 243 elif noentries:
244 244 parts += 1
245 245 break
246 246 noentries = False
247 247 yield chunkheader(len(chunk))
248 248 pos = 0
249 249 while pos < len(chunk):
250 250 next = pos + 2 ** 20
251 251 yield chunk[pos:next]
252 252 pos = next
253 253 yield closechunk()
254 254
255 255 def _unpackmanifests(self, repo, revmap, trp, prog):
256 256 self.callback = prog.increment
257 257 # no need to check for empty manifest group here:
258 258 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 259 # no new manifest will be created and the manifest group will
260 260 # be empty during the pull
261 261 self.manifestheader()
262 262 deltas = self.deltaiter()
263 263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
264 264 prog.complete()
265 265 self.callback = None
266 266
267 267 def apply(
268 268 self,
269 269 repo,
270 270 tr,
271 271 srctype,
272 272 url,
273 273 targetphase=phases.draft,
274 274 expectedtotal=None,
275 275 ):
276 276 """Add the changegroup returned by source.read() to this repo.
277 277 srctype is a string like 'push', 'pull', or 'unbundle'. url is
278 278 the URL of the repo where this changegroup is coming from.
279 279
280 280 Return an integer summarizing the change to this repo:
281 281 - nothing changed or no source: 0
282 282 - more heads than before: 1+added heads (2..n)
283 283 - fewer heads than before: -1-removed heads (-2..-n)
284 284 - number of heads stays the same: 1
285 285 """
286 286 repo = repo.unfiltered()
287 287
288 288 def csmap(x):
289 289 repo.ui.debug(b"add changeset %s\n" % short(x))
290 290 return len(cl)
291 291
292 292 def revmap(x):
293 293 return cl.rev(x)
294 294
295 295 try:
296 296 # The transaction may already carry source information. In this
297 297 # case we use the top level data. We overwrite the argument
298 298 # because we need to use the top level value (if they exist)
299 299 # in this function.
300 300 srctype = tr.hookargs.setdefault(b'source', srctype)
301 301 tr.hookargs.setdefault(b'url', url)
302 302 repo.hook(
303 303 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
304 304 )
305 305
306 306 # write changelog data to temp files so concurrent readers
307 307 # will not see an inconsistent view
308 308 cl = repo.changelog
309 309 cl.delayupdate(tr)
310 310 oldheads = set(cl.heads())
311 311
312 312 trp = weakref.proxy(tr)
313 313 # pull off the changeset group
314 314 repo.ui.status(_(b"adding changesets\n"))
315 315 clstart = len(cl)
316 316 progress = repo.ui.makeprogress(
317 317 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
318 318 )
319 319 self.callback = progress.increment
320 320
321 321 efilesset = set()
322 322 duprevs = []
323 323
324 324 def ondupchangelog(cl, rev):
325 325 if rev < clstart:
326 326 duprevs.append(rev)
327 327
328 328 def onchangelog(cl, rev):
329 329 ctx = cl.changelogrevision(rev)
330 330 efilesset.update(ctx.files)
331 331 repo.register_changeset(rev, ctx)
332 332
333 333 self.changelogheader()
334 334 deltas = self.deltaiter()
335 335 if not cl.addgroup(
336 336 deltas,
337 337 csmap,
338 338 trp,
339 339 alwayscache=True,
340 340 addrevisioncb=onchangelog,
341 341 duplicaterevisioncb=ondupchangelog,
342 342 ):
343 343 repo.ui.develwarn(
344 344 b'applied empty changelog from changegroup',
345 345 config=b'warn-empty-changegroup',
346 346 )
347 347 efiles = len(efilesset)
348 348 clend = len(cl)
349 349 changesets = clend - clstart
350 350 progress.complete()
351 351 del deltas
352 352 # TODO Python 2.7 removal
353 353 # del efilesset
354 354 efilesset = None
355 355 self.callback = None
356 356
357 357 # pull off the manifest group
358 358 repo.ui.status(_(b"adding manifests\n"))
359 359 # We know that we'll never have more manifests than we had
360 360 # changesets.
361 361 progress = repo.ui.makeprogress(
362 362 _(b'manifests'), unit=_(b'chunks'), total=changesets
363 363 )
364 364 self._unpackmanifests(repo, revmap, trp, progress)
365 365
366 366 needfiles = {}
367 367 if repo.ui.configbool(b'server', b'validate'):
368 368 cl = repo.changelog
369 369 ml = repo.manifestlog
370 370 # validate incoming csets have their manifests
371 371 for cset in pycompat.xrange(clstart, clend):
372 372 mfnode = cl.changelogrevision(cset).manifest
373 373 mfest = ml[mfnode].readdelta()
374 374 # store file nodes we must see
375 375 for f, n in pycompat.iteritems(mfest):
376 376 needfiles.setdefault(f, set()).add(n)
377 377
378 378 # process the files
379 379 repo.ui.status(_(b"adding file changes\n"))
380 380 newrevs, newfiles = _addchangegroupfiles(
381 381 repo, self, revmap, trp, efiles, needfiles
382 382 )
383 383
384 384 # making sure the value exists
385 385 tr.changes.setdefault(b'changegroup-count-changesets', 0)
386 386 tr.changes.setdefault(b'changegroup-count-revisions', 0)
387 387 tr.changes.setdefault(b'changegroup-count-files', 0)
388 388 tr.changes.setdefault(b'changegroup-count-heads', 0)
389 389
390 390 # some code use bundle operation for internal purpose. They usually
391 391 # set `ui.quiet` to do this outside of user sight. Size the report
392 392 # of such operation now happens at the end of the transaction, that
393 393 # ui.quiet has not direct effect on the output.
394 394 #
395 395 # To preserve this intend use an inelegant hack, we fail to report
396 396 # the change if `quiet` is set. We should probably move to
397 397 # something better, but this is a good first step to allow the "end
398 398 # of transaction report" to pass tests.
399 399 if not repo.ui.quiet:
400 400 tr.changes[b'changegroup-count-changesets'] += changesets
401 401 tr.changes[b'changegroup-count-revisions'] += newrevs
402 402 tr.changes[b'changegroup-count-files'] += newfiles
403 403
404 404 deltaheads = 0
405 405 if oldheads:
406 406 heads = cl.heads()
407 407 deltaheads += len(heads) - len(oldheads)
408 408 for h in heads:
409 409 if h not in oldheads and repo[h].closesbranch():
410 410 deltaheads -= 1
411 411
412 412 # see previous comment about checking ui.quiet
413 413 if not repo.ui.quiet:
414 414 tr.changes[b'changegroup-count-heads'] += deltaheads
415 415 repo.invalidatevolatilesets()
416 416
417 417 if changesets > 0:
418 418 if b'node' not in tr.hookargs:
419 419 tr.hookargs[b'node'] = hex(cl.node(clstart))
420 420 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
421 421 hookargs = dict(tr.hookargs)
422 422 else:
423 423 hookargs = dict(tr.hookargs)
424 424 hookargs[b'node'] = hex(cl.node(clstart))
425 425 hookargs[b'node_last'] = hex(cl.node(clend - 1))
426 426 repo.hook(
427 427 b'pretxnchangegroup',
428 428 throw=True,
429 429 **pycompat.strkwargs(hookargs)
430 430 )
431 431
432 432 added = pycompat.xrange(clstart, clend)
433 433 phaseall = None
434 434 if srctype in (b'push', b'serve'):
435 435 # Old servers can not push the boundary themselves.
436 436 # New servers won't push the boundary if changeset already
437 437 # exists locally as secret
438 438 #
439 439 # We should not use added here but the list of all change in
440 440 # the bundle
441 441 if repo.publishing():
442 442 targetphase = phaseall = phases.public
443 443 else:
444 444 # closer target phase computation
445 445
446 446 # Those changesets have been pushed from the
447 447 # outside, their phases are going to be pushed
448 448 # alongside. Therefor `targetphase` is
449 449 # ignored.
450 450 targetphase = phaseall = phases.draft
451 451 if added:
452 452 phases.registernew(repo, tr, targetphase, added)
453 453 if phaseall is not None:
454 454 if duprevs:
455 455 duprevs.extend(added)
456 456 else:
457 457 duprevs = added
458 458 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
459 459 duprevs = []
460 460
461 461 if changesets > 0:
462 462
463 463 def runhooks(unused_success):
464 464 # These hooks run when the lock releases, not when the
465 465 # transaction closes. So it's possible for the changelog
466 466 # to have changed since we last saw it.
467 467 if clstart >= len(repo):
468 468 return
469 469
470 470 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
471 471
472 472 for rev in added:
473 473 args = hookargs.copy()
474 474 args[b'node'] = hex(cl.node(rev))
475 475 del args[b'node_last']
476 476 repo.hook(b"incoming", **pycompat.strkwargs(args))
477 477
478 478 newheads = [h for h in repo.heads() if h not in oldheads]
479 479 repo.ui.log(
480 480 b"incoming",
481 481 b"%d incoming changes - new heads: %s\n",
482 482 len(added),
483 483 b', '.join([hex(c[:6]) for c in newheads]),
484 484 )
485 485
486 486 tr.addpostclose(
487 487 b'changegroup-runhooks-%020i' % clstart,
488 488 lambda tr: repo._afterlock(runhooks),
489 489 )
490 490 finally:
491 491 repo.ui.flush()
492 492 # never return 0 here:
493 493 if deltaheads < 0:
494 494 ret = deltaheads - 1
495 495 else:
496 496 ret = deltaheads + 1
497 497 return ret
498 498
499 499 def deltaiter(self):
500 500 """
501 501 returns an iterator of the deltas in this changegroup
502 502
503 503 Useful for passing to the underlying storage system to be stored.
504 504 """
505 505 chain = None
506 506 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
507 507 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
508 508 yield chunkdata
509 509 chain = chunkdata[0]
510 510
511 511
512 512 class cg2unpacker(cg1unpacker):
513 513 """Unpacker for cg2 streams.
514 514
515 515 cg2 streams add support for generaldelta, so the delta header
516 516 format is slightly different. All other features about the data
517 517 remain the same.
518 518 """
519 519
520 520 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
521 521 deltaheadersize = deltaheader.size
522 522 version = b'02'
523 523
524 524 def _deltaheader(self, headertuple, prevnode):
525 525 node, p1, p2, deltabase, cs = headertuple
526 526 flags = 0
527 527 return node, p1, p2, deltabase, cs, flags
528 528
529 529
530 530 class cg3unpacker(cg2unpacker):
531 531 """Unpacker for cg3 streams.
532 532
533 533 cg3 streams add support for exchanging treemanifests and revlog
534 534 flags. It adds the revlog flags to the delta header and an empty chunk
535 535 separating manifests and files.
536 536 """
537 537
538 538 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
539 539 deltaheadersize = deltaheader.size
540 540 version = b'03'
541 541 _grouplistcount = 2 # One list of manifests and one list of files
542 542
543 543 def _deltaheader(self, headertuple, prevnode):
544 544 node, p1, p2, deltabase, cs, flags = headertuple
545 545 return node, p1, p2, deltabase, cs, flags
546 546
547 547 def _unpackmanifests(self, repo, revmap, trp, prog):
548 548 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
549 549 for chunkdata in iter(self.filelogheader, {}):
550 550 # If we get here, there are directory manifests in the changegroup
551 551 d = chunkdata[b"filename"]
552 552 repo.ui.debug(b"adding %s revisions\n" % d)
553 553 deltas = self.deltaiter()
554 554 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
555 555 raise error.Abort(_(b"received dir revlog group is empty"))
556 556
557 557
558 558 class cg4unpacker(cg3unpacker):
559 559 """Unpacker for cg4 streams.
560 560
561 561 cg4 streams add support for exchanging sidedata.
562 562 """
563 563
564 564 version = b'04'
565 565
566 566 def deltachunk(self, prevnode):
567 567 res = super(cg4unpacker, self).deltachunk(prevnode)
568 568 if not res:
569 569 return res
570 570
571 571 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
572 572
573 573 sidedata_raw = getchunk(self._stream)
574 574 sidedata = {}
575 575 if len(sidedata_raw) > 0:
576 576 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
577 577
578 578 return node, p1, p2, cs, deltabase, delta, flags, sidedata
579 579
580 580
581 581 class headerlessfixup(object):
582 582 def __init__(self, fh, h):
583 583 self._h = h
584 584 self._fh = fh
585 585
586 586 def read(self, n):
587 587 if self._h:
588 588 d, self._h = self._h[:n], self._h[n:]
589 589 if len(d) < n:
590 590 d += readexactly(self._fh, n - len(d))
591 591 return d
592 592 return readexactly(self._fh, n)
593 593
594 594
595 595 def _revisiondeltatochunks(delta, headerfn):
596 596 """Serialize a revisiondelta to changegroup chunks."""
597 597
598 598 # The captured revision delta may be encoded as a delta against
599 599 # a base revision or as a full revision. The changegroup format
600 600 # requires that everything on the wire be deltas. So for full
601 601 # revisions, we need to invent a header that says to rewrite
602 602 # data.
603 603
604 604 if delta.delta is not None:
605 605 prefix, data = b'', delta.delta
606 606 elif delta.basenode == nullid:
607 607 data = delta.revision
608 608 prefix = mdiff.trivialdiffheader(len(data))
609 609 else:
610 610 data = delta.revision
611 611 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
612 612
613 613 meta = headerfn(delta)
614 614
615 615 yield chunkheader(len(meta) + len(prefix) + len(data))
616 616 yield meta
617 617 if prefix:
618 618 yield prefix
619 619 yield data
620 620
621 sidedata = delta.sidedata
622 if sidedata is not None:
623 # Need a separate chunk for sidedata to be able to differentiate
624 # "raw delta" length and sidedata length
625 yield chunkheader(len(sidedata))
626 yield sidedata
627
621 628
622 629 def _sortnodesellipsis(store, nodes, cl, lookup):
623 630 """Sort nodes for changegroup generation."""
624 631 # Ellipses serving mode.
625 632 #
626 633 # In a perfect world, we'd generate better ellipsis-ified graphs
627 634 # for non-changelog revlogs. In practice, we haven't started doing
628 635 # that yet, so the resulting DAGs for the manifestlog and filelogs
629 636 # are actually full of bogus parentage on all the ellipsis
630 637 # nodes. This has the side effect that, while the contents are
631 638 # correct, the individual DAGs might be completely out of whack in
632 639 # a case like 882681bc3166 and its ancestors (back about 10
633 640 # revisions or so) in the main hg repo.
634 641 #
635 642 # The one invariant we *know* holds is that the new (potentially
636 643 # bogus) DAG shape will be valid if we order the nodes in the
637 644 # order that they're introduced in dramatis personae by the
638 645 # changelog, so what we do is we sort the non-changelog histories
639 646 # by the order in which they are used by the changelog.
640 647 key = lambda n: cl.rev(lookup(n))
641 648 return sorted(nodes, key=key)
642 649
643 650
644 651 def _resolvenarrowrevisioninfo(
645 652 cl,
646 653 store,
647 654 ischangelog,
648 655 rev,
649 656 linkrev,
650 657 linknode,
651 658 clrevtolocalrev,
652 659 fullclnodes,
653 660 precomputedellipsis,
654 661 ):
655 662 linkparents = precomputedellipsis[linkrev]
656 663
657 664 def local(clrev):
658 665 """Turn a changelog revnum into a local revnum.
659 666
660 667 The ellipsis dag is stored as revnums on the changelog,
661 668 but when we're producing ellipsis entries for
662 669 non-changelog revlogs, we need to turn those numbers into
663 670 something local. This does that for us, and during the
664 671 changelog sending phase will also expand the stored
665 672 mappings as needed.
666 673 """
667 674 if clrev == nullrev:
668 675 return nullrev
669 676
670 677 if ischangelog:
671 678 return clrev
672 679
673 680 # Walk the ellipsis-ized changelog breadth-first looking for a
674 681 # change that has been linked from the current revlog.
675 682 #
676 683 # For a flat manifest revlog only a single step should be necessary
677 684 # as all relevant changelog entries are relevant to the flat
678 685 # manifest.
679 686 #
680 687 # For a filelog or tree manifest dirlog however not every changelog
681 688 # entry will have been relevant, so we need to skip some changelog
682 689 # nodes even after ellipsis-izing.
683 690 walk = [clrev]
684 691 while walk:
685 692 p = walk[0]
686 693 walk = walk[1:]
687 694 if p in clrevtolocalrev:
688 695 return clrevtolocalrev[p]
689 696 elif p in fullclnodes:
690 697 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
691 698 elif p in precomputedellipsis:
692 699 walk.extend(
693 700 [pp for pp in precomputedellipsis[p] if pp != nullrev]
694 701 )
695 702 else:
696 703 # In this case, we've got an ellipsis with parents
697 704 # outside the current bundle (likely an
698 705 # incremental pull). We "know" that we can use the
699 706 # value of this same revlog at whatever revision
700 707 # is pointed to by linknode. "Know" is in scare
701 708 # quotes because I haven't done enough examination
702 709 # of edge cases to convince myself this is really
703 710 # a fact - it works for all the (admittedly
704 711 # thorough) cases in our testsuite, but I would be
705 712 # somewhat unsurprised to find a case in the wild
706 713 # where this breaks down a bit. That said, I don't
707 714 # know if it would hurt anything.
708 715 for i in pycompat.xrange(rev, 0, -1):
709 716 if store.linkrev(i) == clrev:
710 717 return i
711 718 # We failed to resolve a parent for this node, so
712 719 # we crash the changegroup construction.
713 720 raise error.Abort(
714 721 b"unable to resolve parent while packing '%s' %r"
715 722 b' for changeset %r' % (store.indexfile, rev, clrev)
716 723 )
717 724
718 725 return nullrev
719 726
720 727 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
721 728 p1, p2 = nullrev, nullrev
722 729 elif len(linkparents) == 1:
723 730 (p1,) = sorted(local(p) for p in linkparents)
724 731 p2 = nullrev
725 732 else:
726 733 p1, p2 = sorted(local(p) for p in linkparents)
727 734
728 735 p1node, p2node = store.node(p1), store.node(p2)
729 736
730 737 return p1node, p2node, linknode
731 738
732 739
733 740 def deltagroup(
734 741 repo,
735 742 store,
736 743 nodes,
737 744 ischangelog,
738 745 lookup,
739 746 forcedeltaparentprev,
740 747 topic=None,
741 748 ellipses=False,
742 749 clrevtolocalrev=None,
743 750 fullclnodes=None,
744 751 precomputedellipsis=None,
745 752 ):
746 753 """Calculate deltas for a set of revisions.
747 754
748 755 Is a generator of ``revisiondelta`` instances.
749 756
750 757 If topic is not None, progress detail will be generated using this
751 758 topic name (e.g. changesets, manifests, etc).
752 759 """
753 760 if not nodes:
754 761 return
755 762
756 763 cl = repo.changelog
757 764
758 765 if ischangelog:
759 766 # `hg log` shows changesets in storage order. To preserve order
760 767 # across clones, send out changesets in storage order.
761 768 nodesorder = b'storage'
762 769 elif ellipses:
763 770 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
764 771 nodesorder = b'nodes'
765 772 else:
766 773 nodesorder = None
767 774
768 775 # Perform ellipses filtering and revision massaging. We do this before
769 776 # emitrevisions() because a) filtering out revisions creates less work
770 777 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
771 778 # assumptions about delta choices and we would possibly send a delta
772 779 # referencing a missing base revision.
773 780 #
774 781 # Also, calling lookup() has side-effects with regards to populating
775 782 # data structures. If we don't call lookup() for each node or if we call
776 783 # lookup() after the first pass through each node, things can break -
777 784 # possibly intermittently depending on the python hash seed! For that
778 785 # reason, we store a mapping of all linknodes during the initial node
779 786 # pass rather than use lookup() on the output side.
780 787 if ellipses:
781 788 filtered = []
782 789 adjustedparents = {}
783 790 linknodes = {}
784 791
785 792 for node in nodes:
786 793 rev = store.rev(node)
787 794 linknode = lookup(node)
788 795 linkrev = cl.rev(linknode)
789 796 clrevtolocalrev[linkrev] = rev
790 797
791 798 # If linknode is in fullclnodes, it means the corresponding
792 799 # changeset was a full changeset and is being sent unaltered.
793 800 if linknode in fullclnodes:
794 801 linknodes[node] = linknode
795 802
796 803 # If the corresponding changeset wasn't in the set computed
797 804 # as relevant to us, it should be dropped outright.
798 805 elif linkrev not in precomputedellipsis:
799 806 continue
800 807
801 808 else:
802 809 # We could probably do this later and avoid the dict
803 810 # holding state. But it likely doesn't matter.
804 811 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
805 812 cl,
806 813 store,
807 814 ischangelog,
808 815 rev,
809 816 linkrev,
810 817 linknode,
811 818 clrevtolocalrev,
812 819 fullclnodes,
813 820 precomputedellipsis,
814 821 )
815 822
816 823 adjustedparents[node] = (p1node, p2node)
817 824 linknodes[node] = linknode
818 825
819 826 filtered.append(node)
820 827
821 828 nodes = filtered
822 829
823 830 # We expect the first pass to be fast, so we only engage the progress
824 831 # meter for constructing the revision deltas.
825 832 progress = None
826 833 if topic is not None:
827 834 progress = repo.ui.makeprogress(
828 835 topic, unit=_(b'chunks'), total=len(nodes)
829 836 )
830 837
831 838 configtarget = repo.ui.config(b'devel', b'bundle.delta')
832 839 if configtarget not in (b'', b'p1', b'full'):
833 840 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
834 841 repo.ui.warn(msg % configtarget)
835 842
836 843 deltamode = repository.CG_DELTAMODE_STD
837 844 if forcedeltaparentprev:
838 845 deltamode = repository.CG_DELTAMODE_PREV
839 846 elif configtarget == b'p1':
840 847 deltamode = repository.CG_DELTAMODE_P1
841 848 elif configtarget == b'full':
842 849 deltamode = repository.CG_DELTAMODE_FULL
843 850
844 851 revisions = store.emitrevisions(
845 852 nodes,
846 853 nodesorder=nodesorder,
847 854 revisiondata=True,
848 855 assumehaveparentrevisions=not ellipses,
849 856 deltamode=deltamode,
850 857 )
851 858
852 859 for i, revision in enumerate(revisions):
853 860 if progress:
854 861 progress.update(i + 1)
855 862
856 863 if ellipses:
857 864 linknode = linknodes[revision.node]
858 865
859 866 if revision.node in adjustedparents:
860 867 p1node, p2node = adjustedparents[revision.node]
861 868 revision.p1node = p1node
862 869 revision.p2node = p2node
863 870 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
864 871
865 872 else:
866 873 linknode = lookup(revision.node)
867 874
868 875 revision.linknode = linknode
869 876 yield revision
870 877
871 878 if progress:
872 879 progress.complete()
873 880
874 881
875 882 class cgpacker(object):
876 883 def __init__(
877 884 self,
878 885 repo,
879 886 oldmatcher,
880 887 matcher,
881 888 version,
882 889 builddeltaheader,
883 890 manifestsend,
884 891 forcedeltaparentprev=False,
885 892 bundlecaps=None,
886 893 ellipses=False,
887 894 shallow=False,
888 895 ellipsisroots=None,
889 896 fullnodes=None,
890 897 remote_sidedata=None,
891 898 ):
892 899 """Given a source repo, construct a bundler.
893 900
894 901 oldmatcher is a matcher that matches on files the client already has.
895 902 These will not be included in the changegroup.
896 903
897 904 matcher is a matcher that matches on files to include in the
898 905 changegroup. Used to facilitate sparse changegroups.
899 906
900 907 forcedeltaparentprev indicates whether delta parents must be against
901 908 the previous revision in a delta group. This should only be used for
902 909 compatibility with changegroup version 1.
903 910
904 911 builddeltaheader is a callable that constructs the header for a group
905 912 delta.
906 913
907 914 manifestsend is a chunk to send after manifests have been fully emitted.
908 915
909 916 ellipses indicates whether ellipsis serving mode is enabled.
910 917
911 918 bundlecaps is optional and can be used to specify the set of
912 919 capabilities which can be used to build the bundle. While bundlecaps is
913 920 unused in core Mercurial, extensions rely on this feature to communicate
914 921 capabilities to customize the changegroup packer.
915 922
916 923 shallow indicates whether shallow data might be sent. The packer may
917 924 need to pack file contents not introduced by the changes being packed.
918 925
919 926 fullnodes is the set of changelog nodes which should not be ellipsis
920 927 nodes. We store this rather than the set of nodes that should be
921 928 ellipsis because for very large histories we expect this to be
922 929 significantly smaller.
923 930
924 931 remote_sidedata is the set of sidedata categories wanted by the remote.
925 932 """
926 933 assert oldmatcher
927 934 assert matcher
928 935 self._oldmatcher = oldmatcher
929 936 self._matcher = matcher
930 937
931 938 self.version = version
932 939 self._forcedeltaparentprev = forcedeltaparentprev
933 940 self._builddeltaheader = builddeltaheader
934 941 self._manifestsend = manifestsend
935 942 self._ellipses = ellipses
936 943
937 944 # Set of capabilities we can use to build the bundle.
938 945 if bundlecaps is None:
939 946 bundlecaps = set()
940 947 self._bundlecaps = bundlecaps
941 948 self._isshallow = shallow
942 949 self._fullclnodes = fullnodes
943 950
944 951 # Maps ellipsis revs to their roots at the changelog level.
945 952 self._precomputedellipsis = ellipsisroots
946 953
947 954 self._repo = repo
948 955
949 956 if self._repo.ui.verbose and not self._repo.ui.debugflag:
950 957 self._verbosenote = self._repo.ui.note
951 958 else:
952 959 self._verbosenote = lambda s: None
953 960
954 961 def generate(
955 962 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
956 963 ):
957 964 """Yield a sequence of changegroup byte chunks.
958 965 If changelog is False, changelog data won't be added to changegroup
959 966 """
960 967
961 968 repo = self._repo
962 969 cl = repo.changelog
963 970
964 971 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
965 972 size = 0
966 973
967 974 clstate, deltas = self._generatechangelog(
968 975 cl, clnodes, generate=changelog
969 976 )
970 977 for delta in deltas:
971 978 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
972 979 size += len(chunk)
973 980 yield chunk
974 981
975 982 close = closechunk()
976 983 size += len(close)
977 984 yield closechunk()
978 985
979 986 self._verbosenote(_(b'%8.i (changelog)\n') % size)
980 987
981 988 clrevorder = clstate[b'clrevorder']
982 989 manifests = clstate[b'manifests']
983 990 changedfiles = clstate[b'changedfiles']
984 991
985 992 # We need to make sure that the linkrev in the changegroup refers to
986 993 # the first changeset that introduced the manifest or file revision.
987 994 # The fastpath is usually safer than the slowpath, because the filelogs
988 995 # are walked in revlog order.
989 996 #
990 997 # When taking the slowpath when the manifest revlog uses generaldelta,
991 998 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
992 999 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
993 1000 #
994 1001 # When taking the fastpath, we are only vulnerable to reordering
995 1002 # of the changelog itself. The changelog never uses generaldelta and is
996 1003 # never reordered. To handle this case, we simply take the slowpath,
997 1004 # which already has the 'clrevorder' logic. This was also fixed in
998 1005 # cc0ff93d0c0c.
999 1006
1000 1007 # Treemanifests don't work correctly with fastpathlinkrev
1001 1008 # either, because we don't discover which directory nodes to
1002 1009 # send along with files. This could probably be fixed.
1003 1010 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1004 1011
1005 1012 fnodes = {} # needed file nodes
1006 1013
1007 1014 size = 0
1008 1015 it = self.generatemanifests(
1009 1016 commonrevs,
1010 1017 clrevorder,
1011 1018 fastpathlinkrev,
1012 1019 manifests,
1013 1020 fnodes,
1014 1021 source,
1015 1022 clstate[b'clrevtomanifestrev'],
1016 1023 )
1017 1024
1018 1025 for tree, deltas in it:
1019 1026 if tree:
1020 1027 assert self.version in (b'03', b'04')
1021 1028 chunk = _fileheader(tree)
1022 1029 size += len(chunk)
1023 1030 yield chunk
1024 1031
1025 1032 for delta in deltas:
1026 1033 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1027 1034 for chunk in chunks:
1028 1035 size += len(chunk)
1029 1036 yield chunk
1030 1037
1031 1038 close = closechunk()
1032 1039 size += len(close)
1033 1040 yield close
1034 1041
1035 1042 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1036 1043 yield self._manifestsend
1037 1044
1038 1045 mfdicts = None
1039 1046 if self._ellipses and self._isshallow:
1040 1047 mfdicts = [
1041 1048 (repo.manifestlog[n].read(), lr)
1042 1049 for (n, lr) in pycompat.iteritems(manifests)
1043 1050 ]
1044 1051
1045 1052 manifests.clear()
1046 1053 clrevs = {cl.rev(x) for x in clnodes}
1047 1054
1048 1055 it = self.generatefiles(
1049 1056 changedfiles,
1050 1057 commonrevs,
1051 1058 source,
1052 1059 mfdicts,
1053 1060 fastpathlinkrev,
1054 1061 fnodes,
1055 1062 clrevs,
1056 1063 )
1057 1064
1058 1065 for path, deltas in it:
1059 1066 h = _fileheader(path)
1060 1067 size = len(h)
1061 1068 yield h
1062 1069
1063 1070 for delta in deltas:
1064 1071 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1065 1072 for chunk in chunks:
1066 1073 size += len(chunk)
1067 1074 yield chunk
1068 1075
1069 1076 close = closechunk()
1070 1077 size += len(close)
1071 1078 yield close
1072 1079
1073 1080 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1074 1081
1075 1082 yield closechunk()
1076 1083
1077 1084 if clnodes:
1078 1085 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1079 1086
1080 1087 def _generatechangelog(self, cl, nodes, generate=True):
1081 1088 """Generate data for changelog chunks.
1082 1089
1083 1090 Returns a 2-tuple of a dict containing state and an iterable of
1084 1091 byte chunks. The state will not be fully populated until the
1085 1092 chunk stream has been fully consumed.
1086 1093
1087 1094 if generate is False, the state will be fully populated and no chunk
1088 1095 stream will be yielded
1089 1096 """
1090 1097 clrevorder = {}
1091 1098 manifests = {}
1092 1099 mfl = self._repo.manifestlog
1093 1100 changedfiles = set()
1094 1101 clrevtomanifestrev = {}
1095 1102
1096 1103 state = {
1097 1104 b'clrevorder': clrevorder,
1098 1105 b'manifests': manifests,
1099 1106 b'changedfiles': changedfiles,
1100 1107 b'clrevtomanifestrev': clrevtomanifestrev,
1101 1108 }
1102 1109
1103 1110 if not (generate or self._ellipses):
1104 1111 # sort the nodes in storage order
1105 1112 nodes = sorted(nodes, key=cl.rev)
1106 1113 for node in nodes:
1107 1114 c = cl.changelogrevision(node)
1108 1115 clrevorder[node] = len(clrevorder)
1109 1116 # record the first changeset introducing this manifest version
1110 1117 manifests.setdefault(c.manifest, node)
1111 1118 # Record a complete list of potentially-changed files in
1112 1119 # this manifest.
1113 1120 changedfiles.update(c.files)
1114 1121
1115 1122 return state, ()
1116 1123
1117 1124 # Callback for the changelog, used to collect changed files and
1118 1125 # manifest nodes.
1119 1126 # Returns the linkrev node (identity in the changelog case).
1120 1127 def lookupcl(x):
1121 1128 c = cl.changelogrevision(x)
1122 1129 clrevorder[x] = len(clrevorder)
1123 1130
1124 1131 if self._ellipses:
1125 1132 # Only update manifests if x is going to be sent. Otherwise we
1126 1133 # end up with bogus linkrevs specified for manifests and
1127 1134 # we skip some manifest nodes that we should otherwise
1128 1135 # have sent.
1129 1136 if (
1130 1137 x in self._fullclnodes
1131 1138 or cl.rev(x) in self._precomputedellipsis
1132 1139 ):
1133 1140
1134 1141 manifestnode = c.manifest
1135 1142 # Record the first changeset introducing this manifest
1136 1143 # version.
1137 1144 manifests.setdefault(manifestnode, x)
1138 1145 # Set this narrow-specific dict so we have the lowest
1139 1146 # manifest revnum to look up for this cl revnum. (Part of
1140 1147 # mapping changelog ellipsis parents to manifest ellipsis
1141 1148 # parents)
1142 1149 clrevtomanifestrev.setdefault(
1143 1150 cl.rev(x), mfl.rev(manifestnode)
1144 1151 )
1145 1152 # We can't trust the changed files list in the changeset if the
1146 1153 # client requested a shallow clone.
1147 1154 if self._isshallow:
1148 1155 changedfiles.update(mfl[c.manifest].read().keys())
1149 1156 else:
1150 1157 changedfiles.update(c.files)
1151 1158 else:
1152 1159 # record the first changeset introducing this manifest version
1153 1160 manifests.setdefault(c.manifest, x)
1154 1161 # Record a complete list of potentially-changed files in
1155 1162 # this manifest.
1156 1163 changedfiles.update(c.files)
1157 1164
1158 1165 return x
1159 1166
1160 1167 gen = deltagroup(
1161 1168 self._repo,
1162 1169 cl,
1163 1170 nodes,
1164 1171 True,
1165 1172 lookupcl,
1166 1173 self._forcedeltaparentprev,
1167 1174 ellipses=self._ellipses,
1168 1175 topic=_(b'changesets'),
1169 1176 clrevtolocalrev={},
1170 1177 fullclnodes=self._fullclnodes,
1171 1178 precomputedellipsis=self._precomputedellipsis,
1172 1179 )
1173 1180
1174 1181 return state, gen
1175 1182
1176 1183 def generatemanifests(
1177 1184 self,
1178 1185 commonrevs,
1179 1186 clrevorder,
1180 1187 fastpathlinkrev,
1181 1188 manifests,
1182 1189 fnodes,
1183 1190 source,
1184 1191 clrevtolocalrev,
1185 1192 ):
1186 1193 """Returns an iterator of changegroup chunks containing manifests.
1187 1194
1188 1195 `source` is unused here, but is used by extensions like remotefilelog to
1189 1196 change what is sent based in pulls vs pushes, etc.
1190 1197 """
1191 1198 repo = self._repo
1192 1199 mfl = repo.manifestlog
1193 1200 tmfnodes = {b'': manifests}
1194 1201
1195 1202 # Callback for the manifest, used to collect linkrevs for filelog
1196 1203 # revisions.
1197 1204 # Returns the linkrev node (collected in lookupcl).
1198 1205 def makelookupmflinknode(tree, nodes):
1199 1206 if fastpathlinkrev:
1200 1207 assert not tree
1201 1208 return (
1202 1209 manifests.__getitem__
1203 1210 ) # pytype: disable=unsupported-operands
1204 1211
1205 1212 def lookupmflinknode(x):
1206 1213 """Callback for looking up the linknode for manifests.
1207 1214
1208 1215 Returns the linkrev node for the specified manifest.
1209 1216
1210 1217 SIDE EFFECT:
1211 1218
1212 1219 1) fclnodes gets populated with the list of relevant
1213 1220 file nodes if we're not using fastpathlinkrev
1214 1221 2) When treemanifests are in use, collects treemanifest nodes
1215 1222 to send
1216 1223
1217 1224 Note that this means manifests must be completely sent to
1218 1225 the client before you can trust the list of files and
1219 1226 treemanifests to send.
1220 1227 """
1221 1228 clnode = nodes[x]
1222 1229 mdata = mfl.get(tree, x).readfast(shallow=True)
1223 1230 for p, n, fl in mdata.iterentries():
1224 1231 if fl == b't': # subdirectory manifest
1225 1232 subtree = tree + p + b'/'
1226 1233 tmfclnodes = tmfnodes.setdefault(subtree, {})
1227 1234 tmfclnode = tmfclnodes.setdefault(n, clnode)
1228 1235 if clrevorder[clnode] < clrevorder[tmfclnode]:
1229 1236 tmfclnodes[n] = clnode
1230 1237 else:
1231 1238 f = tree + p
1232 1239 fclnodes = fnodes.setdefault(f, {})
1233 1240 fclnode = fclnodes.setdefault(n, clnode)
1234 1241 if clrevorder[clnode] < clrevorder[fclnode]:
1235 1242 fclnodes[n] = clnode
1236 1243 return clnode
1237 1244
1238 1245 return lookupmflinknode
1239 1246
1240 1247 while tmfnodes:
1241 1248 tree, nodes = tmfnodes.popitem()
1242 1249
1243 1250 should_visit = self._matcher.visitdir(tree[:-1])
1244 1251 if tree and not should_visit:
1245 1252 continue
1246 1253
1247 1254 store = mfl.getstorage(tree)
1248 1255
1249 1256 if not should_visit:
1250 1257 # No nodes to send because this directory is out of
1251 1258 # the client's view of the repository (probably
1252 1259 # because of narrow clones). Do this even for the root
1253 1260 # directory (tree=='')
1254 1261 prunednodes = []
1255 1262 else:
1256 1263 # Avoid sending any manifest nodes we can prove the
1257 1264 # client already has by checking linkrevs. See the
1258 1265 # related comment in generatefiles().
1259 1266 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1260 1267
1261 1268 if tree and not prunednodes:
1262 1269 continue
1263 1270
1264 1271 lookupfn = makelookupmflinknode(tree, nodes)
1265 1272
1266 1273 deltas = deltagroup(
1267 1274 self._repo,
1268 1275 store,
1269 1276 prunednodes,
1270 1277 False,
1271 1278 lookupfn,
1272 1279 self._forcedeltaparentprev,
1273 1280 ellipses=self._ellipses,
1274 1281 topic=_(b'manifests'),
1275 1282 clrevtolocalrev=clrevtolocalrev,
1276 1283 fullclnodes=self._fullclnodes,
1277 1284 precomputedellipsis=self._precomputedellipsis,
1278 1285 )
1279 1286
1280 1287 if not self._oldmatcher.visitdir(store.tree[:-1]):
1281 1288 yield tree, deltas
1282 1289 else:
1283 1290 # 'deltas' is a generator and we need to consume it even if
1284 1291 # we are not going to send it because a side-effect is that
1285 1292 # it updates tmdnodes (via lookupfn)
1286 1293 for d in deltas:
1287 1294 pass
1288 1295 if not tree:
1289 1296 yield tree, []
1290 1297
1291 1298 def _prunemanifests(self, store, nodes, commonrevs):
1292 1299 if not self._ellipses:
1293 1300 # In non-ellipses case and large repositories, it is better to
1294 1301 # prevent calling of store.rev and store.linkrev on a lot of
1295 1302 # nodes as compared to sending some extra data
1296 1303 return nodes.copy()
1297 1304 # This is split out as a separate method to allow filtering
1298 1305 # commonrevs in extension code.
1299 1306 #
1300 1307 # TODO(augie): this shouldn't be required, instead we should
1301 1308 # make filtering of revisions to send delegated to the store
1302 1309 # layer.
1303 1310 frev, flr = store.rev, store.linkrev
1304 1311 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1305 1312
1306 1313 # The 'source' parameter is useful for extensions
1307 1314 def generatefiles(
1308 1315 self,
1309 1316 changedfiles,
1310 1317 commonrevs,
1311 1318 source,
1312 1319 mfdicts,
1313 1320 fastpathlinkrev,
1314 1321 fnodes,
1315 1322 clrevs,
1316 1323 ):
1317 1324 changedfiles = [
1318 1325 f
1319 1326 for f in changedfiles
1320 1327 if self._matcher(f) and not self._oldmatcher(f)
1321 1328 ]
1322 1329
1323 1330 if not fastpathlinkrev:
1324 1331
1325 1332 def normallinknodes(unused, fname):
1326 1333 return fnodes.get(fname, {})
1327 1334
1328 1335 else:
1329 1336 cln = self._repo.changelog.node
1330 1337
1331 1338 def normallinknodes(store, fname):
1332 1339 flinkrev = store.linkrev
1333 1340 fnode = store.node
1334 1341 revs = ((r, flinkrev(r)) for r in store)
1335 1342 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1336 1343
1337 1344 clrevtolocalrev = {}
1338 1345
1339 1346 if self._isshallow:
1340 1347 # In a shallow clone, the linknodes callback needs to also include
1341 1348 # those file nodes that are in the manifests we sent but weren't
1342 1349 # introduced by those manifests.
1343 1350 commonctxs = [self._repo[c] for c in commonrevs]
1344 1351 clrev = self._repo.changelog.rev
1345 1352
1346 1353 def linknodes(flog, fname):
1347 1354 for c in commonctxs:
1348 1355 try:
1349 1356 fnode = c.filenode(fname)
1350 1357 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1351 1358 except error.ManifestLookupError:
1352 1359 pass
1353 1360 links = normallinknodes(flog, fname)
1354 1361 if len(links) != len(mfdicts):
1355 1362 for mf, lr in mfdicts:
1356 1363 fnode = mf.get(fname, None)
1357 1364 if fnode in links:
1358 1365 links[fnode] = min(links[fnode], lr, key=clrev)
1359 1366 elif fnode:
1360 1367 links[fnode] = lr
1361 1368 return links
1362 1369
1363 1370 else:
1364 1371 linknodes = normallinknodes
1365 1372
1366 1373 repo = self._repo
1367 1374 progress = repo.ui.makeprogress(
1368 1375 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1369 1376 )
1370 1377 for i, fname in enumerate(sorted(changedfiles)):
1371 1378 filerevlog = repo.file(fname)
1372 1379 if not filerevlog:
1373 1380 raise error.Abort(
1374 1381 _(b"empty or missing file data for %s") % fname
1375 1382 )
1376 1383
1377 1384 clrevtolocalrev.clear()
1378 1385
1379 1386 linkrevnodes = linknodes(filerevlog, fname)
1380 1387 # Lookup for filenodes, we collected the linkrev nodes above in the
1381 1388 # fastpath case and with lookupmf in the slowpath case.
1382 1389 def lookupfilelog(x):
1383 1390 return linkrevnodes[x]
1384 1391
1385 1392 frev, flr = filerevlog.rev, filerevlog.linkrev
1386 1393 # Skip sending any filenode we know the client already
1387 1394 # has. This avoids over-sending files relatively
1388 1395 # inexpensively, so it's not a problem if we under-filter
1389 1396 # here.
1390 1397 filenodes = [
1391 1398 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1392 1399 ]
1393 1400
1394 1401 if not filenodes:
1395 1402 continue
1396 1403
1397 1404 progress.update(i + 1, item=fname)
1398 1405
1399 1406 deltas = deltagroup(
1400 1407 self._repo,
1401 1408 filerevlog,
1402 1409 filenodes,
1403 1410 False,
1404 1411 lookupfilelog,
1405 1412 self._forcedeltaparentprev,
1406 1413 ellipses=self._ellipses,
1407 1414 clrevtolocalrev=clrevtolocalrev,
1408 1415 fullclnodes=self._fullclnodes,
1409 1416 precomputedellipsis=self._precomputedellipsis,
1410 1417 )
1411 1418
1412 1419 yield fname, deltas
1413 1420
1414 1421 progress.complete()
1415 1422
1416 1423
1417 1424 def _makecg1packer(
1418 1425 repo,
1419 1426 oldmatcher,
1420 1427 matcher,
1421 1428 bundlecaps,
1422 1429 ellipses=False,
1423 1430 shallow=False,
1424 1431 ellipsisroots=None,
1425 1432 fullnodes=None,
1426 1433 remote_sidedata=None,
1427 1434 ):
1428 1435 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1429 1436 d.node, d.p1node, d.p2node, d.linknode
1430 1437 )
1431 1438
1432 1439 return cgpacker(
1433 1440 repo,
1434 1441 oldmatcher,
1435 1442 matcher,
1436 1443 b'01',
1437 1444 builddeltaheader=builddeltaheader,
1438 1445 manifestsend=b'',
1439 1446 forcedeltaparentprev=True,
1440 1447 bundlecaps=bundlecaps,
1441 1448 ellipses=ellipses,
1442 1449 shallow=shallow,
1443 1450 ellipsisroots=ellipsisroots,
1444 1451 fullnodes=fullnodes,
1445 1452 )
1446 1453
1447 1454
1448 1455 def _makecg2packer(
1449 1456 repo,
1450 1457 oldmatcher,
1451 1458 matcher,
1452 1459 bundlecaps,
1453 1460 ellipses=False,
1454 1461 shallow=False,
1455 1462 ellipsisroots=None,
1456 1463 fullnodes=None,
1457 1464 remote_sidedata=None,
1458 1465 ):
1459 1466 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1460 1467 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1461 1468 )
1462 1469
1463 1470 return cgpacker(
1464 1471 repo,
1465 1472 oldmatcher,
1466 1473 matcher,
1467 1474 b'02',
1468 1475 builddeltaheader=builddeltaheader,
1469 1476 manifestsend=b'',
1470 1477 bundlecaps=bundlecaps,
1471 1478 ellipses=ellipses,
1472 1479 shallow=shallow,
1473 1480 ellipsisroots=ellipsisroots,
1474 1481 fullnodes=fullnodes,
1475 1482 )
1476 1483
1477 1484
1478 1485 def _makecg3packer(
1479 1486 repo,
1480 1487 oldmatcher,
1481 1488 matcher,
1482 1489 bundlecaps,
1483 1490 ellipses=False,
1484 1491 shallow=False,
1485 1492 ellipsisroots=None,
1486 1493 fullnodes=None,
1487 1494 remote_sidedata=None,
1488 1495 ):
1489 1496 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1490 1497 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1491 1498 )
1492 1499
1493 1500 return cgpacker(
1494 1501 repo,
1495 1502 oldmatcher,
1496 1503 matcher,
1497 1504 b'03',
1498 1505 builddeltaheader=builddeltaheader,
1499 1506 manifestsend=closechunk(),
1500 1507 bundlecaps=bundlecaps,
1501 1508 ellipses=ellipses,
1502 1509 shallow=shallow,
1503 1510 ellipsisroots=ellipsisroots,
1504 1511 fullnodes=fullnodes,
1505 1512 )
1506 1513
1507 1514
1508 1515 def _makecg4packer(
1509 1516 repo,
1510 1517 oldmatcher,
1511 1518 matcher,
1512 1519 bundlecaps,
1513 1520 ellipses=False,
1514 1521 shallow=False,
1515 1522 ellipsisroots=None,
1516 1523 fullnodes=None,
1517 1524 remote_sidedata=None,
1518 1525 ):
1519 1526 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1520 1527 # differenciate "raw delta" and sidedata.
1521 1528 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1522 1529 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1523 1530 )
1524 1531
1525 1532 return cgpacker(
1526 1533 repo,
1527 1534 oldmatcher,
1528 1535 matcher,
1529 1536 b'04',
1530 1537 builddeltaheader=builddeltaheader,
1531 1538 manifestsend=closechunk(),
1532 1539 bundlecaps=bundlecaps,
1533 1540 ellipses=ellipses,
1534 1541 shallow=shallow,
1535 1542 ellipsisroots=ellipsisroots,
1536 1543 fullnodes=fullnodes,
1537 1544 remote_sidedata=remote_sidedata,
1538 1545 )
1539 1546
1540 1547
1541 1548 _packermap = {
1542 1549 b'01': (_makecg1packer, cg1unpacker),
1543 1550 # cg2 adds support for exchanging generaldelta
1544 1551 b'02': (_makecg2packer, cg2unpacker),
1545 1552 # cg3 adds support for exchanging revlog flags and treemanifests
1546 1553 b'03': (_makecg3packer, cg3unpacker),
1547 1554 # ch4 adds support for exchanging sidedata
1548 1555 b'04': (_makecg4packer, cg4unpacker),
1549 1556 }
1550 1557
1551 1558
1552 1559 def allsupportedversions(repo):
1553 1560 versions = set(_packermap.keys())
1554 1561 needv03 = False
1555 1562 if (
1556 1563 repo.ui.configbool(b'experimental', b'changegroup3')
1557 1564 or repo.ui.configbool(b'experimental', b'treemanifest')
1558 1565 or scmutil.istreemanifest(repo)
1559 1566 ):
1560 1567 # we keep version 03 because we need to to exchange treemanifest data
1561 1568 #
1562 1569 # we also keep vresion 01 and 02, because it is possible for repo to
1563 1570 # contains both normal and tree manifest at the same time. so using
1564 1571 # older version to pull data is viable
1565 1572 #
1566 1573 # (or even to push subset of history)
1567 1574 needv03 = True
1568 1575 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1569 1576 if not has_revlogv2:
1570 1577 versions.discard(b'04')
1571 1578 if not needv03:
1572 1579 versions.discard(b'03')
1573 1580 return versions
1574 1581
1575 1582
1576 1583 # Changegroup versions that can be applied to the repo
1577 1584 def supportedincomingversions(repo):
1578 1585 return allsupportedversions(repo)
1579 1586
1580 1587
1581 1588 # Changegroup versions that can be created from the repo
1582 1589 def supportedoutgoingversions(repo):
1583 1590 versions = allsupportedversions(repo)
1584 1591 if scmutil.istreemanifest(repo):
1585 1592 # Versions 01 and 02 support only flat manifests and it's just too
1586 1593 # expensive to convert between the flat manifest and tree manifest on
1587 1594 # the fly. Since tree manifests are hashed differently, all of history
1588 1595 # would have to be converted. Instead, we simply don't even pretend to
1589 1596 # support versions 01 and 02.
1590 1597 versions.discard(b'01')
1591 1598 versions.discard(b'02')
1592 1599 if requirements.NARROW_REQUIREMENT in repo.requirements:
1593 1600 # Versions 01 and 02 don't support revlog flags, and we need to
1594 1601 # support that for stripping and unbundling to work.
1595 1602 versions.discard(b'01')
1596 1603 versions.discard(b'02')
1597 1604 if LFS_REQUIREMENT in repo.requirements:
1598 1605 # Versions 01 and 02 don't support revlog flags, and we need to
1599 1606 # mark LFS entries with REVIDX_EXTSTORED.
1600 1607 versions.discard(b'01')
1601 1608 versions.discard(b'02')
1602 1609
1603 1610 return versions
1604 1611
1605 1612
1606 1613 def localversion(repo):
1607 1614 # Finds the best version to use for bundles that are meant to be used
1608 1615 # locally, such as those from strip and shelve, and temporary bundles.
1609 1616 return max(supportedoutgoingversions(repo))
1610 1617
1611 1618
1612 1619 def safeversion(repo):
1613 1620 # Finds the smallest version that it's safe to assume clients of the repo
1614 1621 # will support. For example, all hg versions that support generaldelta also
1615 1622 # support changegroup 02.
1616 1623 versions = supportedoutgoingversions(repo)
1617 1624 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1618 1625 versions.discard(b'01')
1619 1626 assert versions
1620 1627 return min(versions)
1621 1628
1622 1629
1623 1630 def getbundler(
1624 1631 version,
1625 1632 repo,
1626 1633 bundlecaps=None,
1627 1634 oldmatcher=None,
1628 1635 matcher=None,
1629 1636 ellipses=False,
1630 1637 shallow=False,
1631 1638 ellipsisroots=None,
1632 1639 fullnodes=None,
1633 1640 remote_sidedata=None,
1634 1641 ):
1635 1642 assert version in supportedoutgoingversions(repo)
1636 1643
1637 1644 if matcher is None:
1638 1645 matcher = matchmod.always()
1639 1646 if oldmatcher is None:
1640 1647 oldmatcher = matchmod.never()
1641 1648
1642 1649 if version == b'01' and not matcher.always():
1643 1650 raise error.ProgrammingError(
1644 1651 b'version 01 changegroups do not support sparse file matchers'
1645 1652 )
1646 1653
1647 1654 if ellipses and version in (b'01', b'02'):
1648 1655 raise error.Abort(
1649 1656 _(
1650 1657 b'ellipsis nodes require at least cg3 on client and server, '
1651 1658 b'but negotiated version %s'
1652 1659 )
1653 1660 % version
1654 1661 )
1655 1662
1656 1663 # Requested files could include files not in the local store. So
1657 1664 # filter those out.
1658 1665 matcher = repo.narrowmatch(matcher)
1659 1666
1660 1667 fn = _packermap[version][0]
1661 1668 return fn(
1662 1669 repo,
1663 1670 oldmatcher,
1664 1671 matcher,
1665 1672 bundlecaps,
1666 1673 ellipses=ellipses,
1667 1674 shallow=shallow,
1668 1675 ellipsisroots=ellipsisroots,
1669 1676 fullnodes=fullnodes,
1670 1677 remote_sidedata=remote_sidedata,
1671 1678 )
1672 1679
1673 1680
1674 1681 def getunbundler(version, fh, alg, extras=None):
1675 1682 return _packermap[version][1](fh, alg, extras=extras)
1676 1683
1677 1684
1678 1685 def _changegroupinfo(repo, nodes, source):
1679 1686 if repo.ui.verbose or source == b'bundle':
1680 1687 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1681 1688 if repo.ui.debugflag:
1682 1689 repo.ui.debug(b"list of changesets:\n")
1683 1690 for node in nodes:
1684 1691 repo.ui.debug(b"%s\n" % hex(node))
1685 1692
1686 1693
1687 1694 def makechangegroup(
1688 1695 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1689 1696 ):
1690 1697 cgstream = makestream(
1691 1698 repo,
1692 1699 outgoing,
1693 1700 version,
1694 1701 source,
1695 1702 fastpath=fastpath,
1696 1703 bundlecaps=bundlecaps,
1697 1704 )
1698 1705 return getunbundler(
1699 1706 version,
1700 1707 util.chunkbuffer(cgstream),
1701 1708 None,
1702 1709 {b'clcount': len(outgoing.missing)},
1703 1710 )
1704 1711
1705 1712
1706 1713 def makestream(
1707 1714 repo,
1708 1715 outgoing,
1709 1716 version,
1710 1717 source,
1711 1718 fastpath=False,
1712 1719 bundlecaps=None,
1713 1720 matcher=None,
1714 1721 remote_sidedata=None,
1715 1722 ):
1716 1723 bundler = getbundler(
1717 1724 version,
1718 1725 repo,
1719 1726 bundlecaps=bundlecaps,
1720 1727 matcher=matcher,
1721 1728 remote_sidedata=remote_sidedata,
1722 1729 )
1723 1730
1724 1731 repo = repo.unfiltered()
1725 1732 commonrevs = outgoing.common
1726 1733 csets = outgoing.missing
1727 1734 heads = outgoing.ancestorsof
1728 1735 # We go through the fast path if we get told to, or if all (unfiltered
1729 1736 # heads have been requested (since we then know there all linkrevs will
1730 1737 # be pulled by the client).
1731 1738 heads.sort()
1732 1739 fastpathlinkrev = fastpath or (
1733 1740 repo.filtername is None and heads == sorted(repo.heads())
1734 1741 )
1735 1742
1736 1743 repo.hook(b'preoutgoing', throw=True, source=source)
1737 1744 _changegroupinfo(repo, csets, source)
1738 1745 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1739 1746
1740 1747
1741 1748 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1742 1749 revisions = 0
1743 1750 files = 0
1744 1751 progress = repo.ui.makeprogress(
1745 1752 _(b'files'), unit=_(b'files'), total=expectedfiles
1746 1753 )
1747 1754 for chunkdata in iter(source.filelogheader, {}):
1748 1755 files += 1
1749 1756 f = chunkdata[b"filename"]
1750 1757 repo.ui.debug(b"adding %s revisions\n" % f)
1751 1758 progress.increment()
1752 1759 fl = repo.file(f)
1753 1760 o = len(fl)
1754 1761 try:
1755 1762 deltas = source.deltaiter()
1756 1763 if not fl.addgroup(deltas, revmap, trp):
1757 1764 raise error.Abort(_(b"received file revlog group is empty"))
1758 1765 except error.CensoredBaseError as e:
1759 1766 raise error.Abort(_(b"received delta base is censored: %s") % e)
1760 1767 revisions += len(fl) - o
1761 1768 if f in needfiles:
1762 1769 needs = needfiles[f]
1763 1770 for new in pycompat.xrange(o, len(fl)):
1764 1771 n = fl.node(new)
1765 1772 if n in needs:
1766 1773 needs.remove(n)
1767 1774 else:
1768 1775 raise error.Abort(_(b"received spurious file revlog entry"))
1769 1776 if not needs:
1770 1777 del needfiles[f]
1771 1778 progress.complete()
1772 1779
1773 1780 for f, needs in pycompat.iteritems(needfiles):
1774 1781 fl = repo.file(f)
1775 1782 for n in needs:
1776 1783 try:
1777 1784 fl.rev(n)
1778 1785 except error.LookupError:
1779 1786 raise error.Abort(
1780 1787 _(b'missing file data for %s:%s - run hg verify')
1781 1788 % (f, hex(n))
1782 1789 )
1783 1790
1784 1791 return revisions, files
@@ -1,1994 +1,1998 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_SIDEDATA = 1 << 12
31 31 REVISION_FLAG_HASCOPIESINFO = 1 << 11
32 32
33 33 REVISION_FLAGS_KNOWN = (
34 34 REVISION_FLAG_CENSORED
35 35 | REVISION_FLAG_ELLIPSIS
36 36 | REVISION_FLAG_EXTSTORED
37 37 | REVISION_FLAG_SIDEDATA
38 38 | REVISION_FLAG_HASCOPIESINFO
39 39 )
40 40
41 41 CG_DELTAMODE_STD = b'default'
42 42 CG_DELTAMODE_PREV = b'previous'
43 43 CG_DELTAMODE_FULL = b'fulltext'
44 44 CG_DELTAMODE_P1 = b'p1'
45 45
46 46
47 47 class ipeerconnection(interfaceutil.Interface):
48 48 """Represents a "connection" to a repository.
49 49
50 50 This is the base interface for representing a connection to a repository.
51 51 It holds basic properties and methods applicable to all peer types.
52 52
53 53 This is not a complete interface definition and should not be used
54 54 outside of this module.
55 55 """
56 56
57 57 ui = interfaceutil.Attribute("""ui.ui instance""")
58 58
59 59 def url():
60 60 """Returns a URL string representing this peer.
61 61
62 62 Currently, implementations expose the raw URL used to construct the
63 63 instance. It may contain credentials as part of the URL. The
64 64 expectations of the value aren't well-defined and this could lead to
65 65 data leakage.
66 66
67 67 TODO audit/clean consumers and more clearly define the contents of this
68 68 value.
69 69 """
70 70
71 71 def local():
72 72 """Returns a local repository instance.
73 73
74 74 If the peer represents a local repository, returns an object that
75 75 can be used to interface with it. Otherwise returns ``None``.
76 76 """
77 77
78 78 def peer():
79 79 """Returns an object conforming to this interface.
80 80
81 81 Most implementations will ``return self``.
82 82 """
83 83
84 84 def canpush():
85 85 """Returns a boolean indicating if this peer can be pushed to."""
86 86
87 87 def close():
88 88 """Close the connection to this peer.
89 89
90 90 This is called when the peer will no longer be used. Resources
91 91 associated with the peer should be cleaned up.
92 92 """
93 93
94 94
95 95 class ipeercapabilities(interfaceutil.Interface):
96 96 """Peer sub-interface related to capabilities."""
97 97
98 98 def capable(name):
99 99 """Determine support for a named capability.
100 100
101 101 Returns ``False`` if capability not supported.
102 102
103 103 Returns ``True`` if boolean capability is supported. Returns a string
104 104 if capability support is non-boolean.
105 105
106 106 Capability strings may or may not map to wire protocol capabilities.
107 107 """
108 108
109 109 def requirecap(name, purpose):
110 110 """Require a capability to be present.
111 111
112 112 Raises a ``CapabilityError`` if the capability isn't present.
113 113 """
114 114
115 115
116 116 class ipeercommands(interfaceutil.Interface):
117 117 """Client-side interface for communicating over the wire protocol.
118 118
119 119 This interface is used as a gateway to the Mercurial wire protocol.
120 120 methods commonly call wire protocol commands of the same name.
121 121 """
122 122
123 123 def branchmap():
124 124 """Obtain heads in named branches.
125 125
126 126 Returns a dict mapping branch name to an iterable of nodes that are
127 127 heads on that branch.
128 128 """
129 129
130 130 def capabilities():
131 131 """Obtain capabilities of the peer.
132 132
133 133 Returns a set of string capabilities.
134 134 """
135 135
136 136 def clonebundles():
137 137 """Obtains the clone bundles manifest for the repo.
138 138
139 139 Returns the manifest as unparsed bytes.
140 140 """
141 141
142 142 def debugwireargs(one, two, three=None, four=None, five=None):
143 143 """Used to facilitate debugging of arguments passed over the wire."""
144 144
145 145 def getbundle(source, **kwargs):
146 146 """Obtain remote repository data as a bundle.
147 147
148 148 This command is how the bulk of repository data is transferred from
149 149 the peer to the local repository
150 150
151 151 Returns a generator of bundle data.
152 152 """
153 153
154 154 def heads():
155 155 """Determine all known head revisions in the peer.
156 156
157 157 Returns an iterable of binary nodes.
158 158 """
159 159
160 160 def known(nodes):
161 161 """Determine whether multiple nodes are known.
162 162
163 163 Accepts an iterable of nodes whose presence to check for.
164 164
165 165 Returns an iterable of booleans indicating of the corresponding node
166 166 at that index is known to the peer.
167 167 """
168 168
169 169 def listkeys(namespace):
170 170 """Obtain all keys in a pushkey namespace.
171 171
172 172 Returns an iterable of key names.
173 173 """
174 174
175 175 def lookup(key):
176 176 """Resolve a value to a known revision.
177 177
178 178 Returns a binary node of the resolved revision on success.
179 179 """
180 180
181 181 def pushkey(namespace, key, old, new):
182 182 """Set a value using the ``pushkey`` protocol.
183 183
184 184 Arguments correspond to the pushkey namespace and key to operate on and
185 185 the old and new values for that key.
186 186
187 187 Returns a string with the peer result. The value inside varies by the
188 188 namespace.
189 189 """
190 190
191 191 def stream_out():
192 192 """Obtain streaming clone data.
193 193
194 194 Successful result should be a generator of data chunks.
195 195 """
196 196
197 197 def unbundle(bundle, heads, url):
198 198 """Transfer repository data to the peer.
199 199
200 200 This is how the bulk of data during a push is transferred.
201 201
202 202 Returns the integer number of heads added to the peer.
203 203 """
204 204
205 205
206 206 class ipeerlegacycommands(interfaceutil.Interface):
207 207 """Interface for implementing support for legacy wire protocol commands.
208 208
209 209 Wire protocol commands transition to legacy status when they are no longer
210 210 used by modern clients. To facilitate identifying which commands are
211 211 legacy, the interfaces are split.
212 212 """
213 213
214 214 def between(pairs):
215 215 """Obtain nodes between pairs of nodes.
216 216
217 217 ``pairs`` is an iterable of node pairs.
218 218
219 219 Returns an iterable of iterables of nodes corresponding to each
220 220 requested pair.
221 221 """
222 222
223 223 def branches(nodes):
224 224 """Obtain ancestor changesets of specific nodes back to a branch point.
225 225
226 226 For each requested node, the peer finds the first ancestor node that is
227 227 a DAG root or is a merge.
228 228
229 229 Returns an iterable of iterables with the resolved values for each node.
230 230 """
231 231
232 232 def changegroup(nodes, source):
233 233 """Obtain a changegroup with data for descendants of specified nodes."""
234 234
235 235 def changegroupsubset(bases, heads, source):
236 236 pass
237 237
238 238
239 239 class ipeercommandexecutor(interfaceutil.Interface):
240 240 """Represents a mechanism to execute remote commands.
241 241
242 242 This is the primary interface for requesting that wire protocol commands
243 243 be executed. Instances of this interface are active in a context manager
244 244 and have a well-defined lifetime. When the context manager exits, all
245 245 outstanding requests are waited on.
246 246 """
247 247
248 248 def callcommand(name, args):
249 249 """Request that a named command be executed.
250 250
251 251 Receives the command name and a dictionary of command arguments.
252 252
253 253 Returns a ``concurrent.futures.Future`` that will resolve to the
254 254 result of that command request. That exact value is left up to
255 255 the implementation and possibly varies by command.
256 256
257 257 Not all commands can coexist with other commands in an executor
258 258 instance: it depends on the underlying wire protocol transport being
259 259 used and the command itself.
260 260
261 261 Implementations MAY call ``sendcommands()`` automatically if the
262 262 requested command can not coexist with other commands in this executor.
263 263
264 264 Implementations MAY call ``sendcommands()`` automatically when the
265 265 future's ``result()`` is called. So, consumers using multiple
266 266 commands with an executor MUST ensure that ``result()`` is not called
267 267 until all command requests have been issued.
268 268 """
269 269
270 270 def sendcommands():
271 271 """Trigger submission of queued command requests.
272 272
273 273 Not all transports submit commands as soon as they are requested to
274 274 run. When called, this method forces queued command requests to be
275 275 issued. It will no-op if all commands have already been sent.
276 276
277 277 When called, no more new commands may be issued with this executor.
278 278 """
279 279
280 280 def close():
281 281 """Signal that this command request is finished.
282 282
283 283 When called, no more new commands may be issued. All outstanding
284 284 commands that have previously been issued are waited on before
285 285 returning. This not only includes waiting for the futures to resolve,
286 286 but also waiting for all response data to arrive. In other words,
287 287 calling this waits for all on-wire state for issued command requests
288 288 to finish.
289 289
290 290 When used as a context manager, this method is called when exiting the
291 291 context manager.
292 292
293 293 This method may call ``sendcommands()`` if there are buffered commands.
294 294 """
295 295
296 296
297 297 class ipeerrequests(interfaceutil.Interface):
298 298 """Interface for executing commands on a peer."""
299 299
300 300 limitedarguments = interfaceutil.Attribute(
301 301 """True if the peer cannot receive large argument value for commands."""
302 302 )
303 303
304 304 def commandexecutor():
305 305 """A context manager that resolves to an ipeercommandexecutor.
306 306
307 307 The object this resolves to can be used to issue command requests
308 308 to the peer.
309 309
310 310 Callers should call its ``callcommand`` method to issue command
311 311 requests.
312 312
313 313 A new executor should be obtained for each distinct set of commands
314 314 (possibly just a single command) that the consumer wants to execute
315 315 as part of a single operation or round trip. This is because some
316 316 peers are half-duplex and/or don't support persistent connections.
317 317 e.g. in the case of HTTP peers, commands sent to an executor represent
318 318 a single HTTP request. While some peers may support multiple command
319 319 sends over the wire per executor, consumers need to code to the least
320 320 capable peer. So it should be assumed that command executors buffer
321 321 called commands until they are told to send them and that each
322 322 command executor could result in a new connection or wire-level request
323 323 being issued.
324 324 """
325 325
326 326
327 327 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
328 328 """Unified interface for peer repositories.
329 329
330 330 All peer instances must conform to this interface.
331 331 """
332 332
333 333
334 334 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
335 335 """Unified peer interface for wire protocol version 2 peers."""
336 336
337 337 apidescriptor = interfaceutil.Attribute(
338 338 """Data structure holding description of server API."""
339 339 )
340 340
341 341
342 342 @interfaceutil.implementer(ipeerbase)
343 343 class peer(object):
344 344 """Base class for peer repositories."""
345 345
346 346 limitedarguments = False
347 347
348 348 def capable(self, name):
349 349 caps = self.capabilities()
350 350 if name in caps:
351 351 return True
352 352
353 353 name = b'%s=' % name
354 354 for cap in caps:
355 355 if cap.startswith(name):
356 356 return cap[len(name) :]
357 357
358 358 return False
359 359
360 360 def requirecap(self, name, purpose):
361 361 if self.capable(name):
362 362 return
363 363
364 364 raise error.CapabilityError(
365 365 _(
366 366 b'cannot %s; remote repository does not support the '
367 367 b'\'%s\' capability'
368 368 )
369 369 % (purpose, name)
370 370 )
371 371
372 372
373 373 class iverifyproblem(interfaceutil.Interface):
374 374 """Represents a problem with the integrity of the repository.
375 375
376 376 Instances of this interface are emitted to describe an integrity issue
377 377 with a repository (e.g. corrupt storage, missing data, etc).
378 378
379 379 Instances are essentially messages associated with severity.
380 380 """
381 381
382 382 warning = interfaceutil.Attribute(
383 383 """Message indicating a non-fatal problem."""
384 384 )
385 385
386 386 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
387 387
388 388 node = interfaceutil.Attribute(
389 389 """Revision encountering the problem.
390 390
391 391 ``None`` means the problem doesn't apply to a single revision.
392 392 """
393 393 )
394 394
395 395
396 396 class irevisiondelta(interfaceutil.Interface):
397 397 """Represents a delta between one revision and another.
398 398
399 399 Instances convey enough information to allow a revision to be exchanged
400 400 with another repository.
401 401
402 402 Instances represent the fulltext revision data or a delta against
403 403 another revision. Therefore the ``revision`` and ``delta`` attributes
404 404 are mutually exclusive.
405 405
406 406 Typically used for changegroup generation.
407 407 """
408 408
409 409 node = interfaceutil.Attribute("""20 byte node of this revision.""")
410 410
411 411 p1node = interfaceutil.Attribute(
412 412 """20 byte node of 1st parent of this revision."""
413 413 )
414 414
415 415 p2node = interfaceutil.Attribute(
416 416 """20 byte node of 2nd parent of this revision."""
417 417 )
418 418
419 419 linknode = interfaceutil.Attribute(
420 420 """20 byte node of the changelog revision this node is linked to."""
421 421 )
422 422
423 423 flags = interfaceutil.Attribute(
424 424 """2 bytes of integer flags that apply to this revision.
425 425
426 426 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
427 427 """
428 428 )
429 429
430 430 basenode = interfaceutil.Attribute(
431 431 """20 byte node of the revision this data is a delta against.
432 432
433 433 ``nullid`` indicates that the revision is a full revision and not
434 434 a delta.
435 435 """
436 436 )
437 437
438 438 baserevisionsize = interfaceutil.Attribute(
439 439 """Size of base revision this delta is against.
440 440
441 441 May be ``None`` if ``basenode`` is ``nullid``.
442 442 """
443 443 )
444 444
445 445 revision = interfaceutil.Attribute(
446 446 """Raw fulltext of revision data for this node."""
447 447 )
448 448
449 449 delta = interfaceutil.Attribute(
450 450 """Delta between ``basenode`` and ``node``.
451 451
452 452 Stored in the bdiff delta format.
453 453 """
454 454 )
455 455
456 sidedata = interfaceutil.Attribute(
457 """Raw sidedata bytes for the given revision."""
458 )
459
456 460
457 461 class ifilerevisionssequence(interfaceutil.Interface):
458 462 """Contains index data for all revisions of a file.
459 463
460 464 Types implementing this behave like lists of tuples. The index
461 465 in the list corresponds to the revision number. The values contain
462 466 index metadata.
463 467
464 468 The *null* revision (revision number -1) is always the last item
465 469 in the index.
466 470 """
467 471
468 472 def __len__():
469 473 """The total number of revisions."""
470 474
471 475 def __getitem__(rev):
472 476 """Returns the object having a specific revision number.
473 477
474 478 Returns an 8-tuple with the following fields:
475 479
476 480 offset+flags
477 481 Contains the offset and flags for the revision. 64-bit unsigned
478 482 integer where first 6 bytes are the offset and the next 2 bytes
479 483 are flags. The offset can be 0 if it is not used by the store.
480 484 compressed size
481 485 Size of the revision data in the store. It can be 0 if it isn't
482 486 needed by the store.
483 487 uncompressed size
484 488 Fulltext size. It can be 0 if it isn't needed by the store.
485 489 base revision
486 490 Revision number of revision the delta for storage is encoded
487 491 against. -1 indicates not encoded against a base revision.
488 492 link revision
489 493 Revision number of changelog revision this entry is related to.
490 494 p1 revision
491 495 Revision number of 1st parent. -1 if no 1st parent.
492 496 p2 revision
493 497 Revision number of 2nd parent. -1 if no 1st parent.
494 498 node
495 499 Binary node value for this revision number.
496 500
497 501 Negative values should index off the end of the sequence. ``-1``
498 502 should return the null revision. ``-2`` should return the most
499 503 recent revision.
500 504 """
501 505
502 506 def __contains__(rev):
503 507 """Whether a revision number exists."""
504 508
505 509 def insert(self, i, entry):
506 510 """Add an item to the index at specific revision."""
507 511
508 512
509 513 class ifileindex(interfaceutil.Interface):
510 514 """Storage interface for index data of a single file.
511 515
512 516 File storage data is divided into index metadata and data storage.
513 517 This interface defines the index portion of the interface.
514 518
515 519 The index logically consists of:
516 520
517 521 * A mapping between revision numbers and nodes.
518 522 * DAG data (storing and querying the relationship between nodes).
519 523 * Metadata to facilitate storage.
520 524 """
521 525
522 526 def __len__():
523 527 """Obtain the number of revisions stored for this file."""
524 528
525 529 def __iter__():
526 530 """Iterate over revision numbers for this file."""
527 531
528 532 def hasnode(node):
529 533 """Returns a bool indicating if a node is known to this store.
530 534
531 535 Implementations must only return True for full, binary node values:
532 536 hex nodes, revision numbers, and partial node matches must be
533 537 rejected.
534 538
535 539 The null node is never present.
536 540 """
537 541
538 542 def revs(start=0, stop=None):
539 543 """Iterate over revision numbers for this file, with control."""
540 544
541 545 def parents(node):
542 546 """Returns a 2-tuple of parent nodes for a revision.
543 547
544 548 Values will be ``nullid`` if the parent is empty.
545 549 """
546 550
547 551 def parentrevs(rev):
548 552 """Like parents() but operates on revision numbers."""
549 553
550 554 def rev(node):
551 555 """Obtain the revision number given a node.
552 556
553 557 Raises ``error.LookupError`` if the node is not known.
554 558 """
555 559
556 560 def node(rev):
557 561 """Obtain the node value given a revision number.
558 562
559 563 Raises ``IndexError`` if the node is not known.
560 564 """
561 565
562 566 def lookup(node):
563 567 """Attempt to resolve a value to a node.
564 568
565 569 Value can be a binary node, hex node, revision number, or a string
566 570 that can be converted to an integer.
567 571
568 572 Raises ``error.LookupError`` if a node could not be resolved.
569 573 """
570 574
571 575 def linkrev(rev):
572 576 """Obtain the changeset revision number a revision is linked to."""
573 577
574 578 def iscensored(rev):
575 579 """Return whether a revision's content has been censored."""
576 580
577 581 def commonancestorsheads(node1, node2):
578 582 """Obtain an iterable of nodes containing heads of common ancestors.
579 583
580 584 See ``ancestor.commonancestorsheads()``.
581 585 """
582 586
583 587 def descendants(revs):
584 588 """Obtain descendant revision numbers for a set of revision numbers.
585 589
586 590 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
587 591 """
588 592
589 593 def heads(start=None, stop=None):
590 594 """Obtain a list of nodes that are DAG heads, with control.
591 595
592 596 The set of revisions examined can be limited by specifying
593 597 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
594 598 iterable of nodes. DAG traversal starts at earlier revision
595 599 ``start`` and iterates forward until any node in ``stop`` is
596 600 encountered.
597 601 """
598 602
599 603 def children(node):
600 604 """Obtain nodes that are children of a node.
601 605
602 606 Returns a list of nodes.
603 607 """
604 608
605 609
606 610 class ifiledata(interfaceutil.Interface):
607 611 """Storage interface for data storage of a specific file.
608 612
609 613 This complements ``ifileindex`` and provides an interface for accessing
610 614 data for a tracked file.
611 615 """
612 616
613 617 def size(rev):
614 618 """Obtain the fulltext size of file data.
615 619
616 620 Any metadata is excluded from size measurements.
617 621 """
618 622
619 623 def revision(node, raw=False):
620 624 """Obtain fulltext data for a node.
621 625
622 626 By default, any storage transformations are applied before the data
623 627 is returned. If ``raw`` is True, non-raw storage transformations
624 628 are not applied.
625 629
626 630 The fulltext data may contain a header containing metadata. Most
627 631 consumers should use ``read()`` to obtain the actual file data.
628 632 """
629 633
630 634 def rawdata(node):
631 635 """Obtain raw data for a node."""
632 636
633 637 def read(node):
634 638 """Resolve file fulltext data.
635 639
636 640 This is similar to ``revision()`` except any metadata in the data
637 641 headers is stripped.
638 642 """
639 643
640 644 def renamed(node):
641 645 """Obtain copy metadata for a node.
642 646
643 647 Returns ``False`` if no copy metadata is stored or a 2-tuple of
644 648 (path, node) from which this revision was copied.
645 649 """
646 650
647 651 def cmp(node, fulltext):
648 652 """Compare fulltext to another revision.
649 653
650 654 Returns True if the fulltext is different from what is stored.
651 655
652 656 This takes copy metadata into account.
653 657
654 658 TODO better document the copy metadata and censoring logic.
655 659 """
656 660
657 661 def emitrevisions(
658 662 nodes,
659 663 nodesorder=None,
660 664 revisiondata=False,
661 665 assumehaveparentrevisions=False,
662 666 deltamode=CG_DELTAMODE_STD,
663 667 ):
664 668 """Produce ``irevisiondelta`` for revisions.
665 669
666 670 Given an iterable of nodes, emits objects conforming to the
667 671 ``irevisiondelta`` interface that describe revisions in storage.
668 672
669 673 This method is a generator.
670 674
671 675 The input nodes may be unordered. Implementations must ensure that a
672 676 node's parents are emitted before the node itself. Transitively, this
673 677 means that a node may only be emitted once all its ancestors in
674 678 ``nodes`` have also been emitted.
675 679
676 680 By default, emits "index" data (the ``node``, ``p1node``, and
677 681 ``p2node`` attributes). If ``revisiondata`` is set, revision data
678 682 will also be present on the emitted objects.
679 683
680 684 With default argument values, implementations can choose to emit
681 685 either fulltext revision data or a delta. When emitting deltas,
682 686 implementations must consider whether the delta's base revision
683 687 fulltext is available to the receiver.
684 688
685 689 The base revision fulltext is guaranteed to be available if any of
686 690 the following are met:
687 691
688 692 * Its fulltext revision was emitted by this method call.
689 693 * A delta for that revision was emitted by this method call.
690 694 * ``assumehaveparentrevisions`` is True and the base revision is a
691 695 parent of the node.
692 696
693 697 ``nodesorder`` can be used to control the order that revisions are
694 698 emitted. By default, revisions can be reordered as long as they are
695 699 in DAG topological order (see above). If the value is ``nodes``,
696 700 the iteration order from ``nodes`` should be used. If the value is
697 701 ``storage``, then the native order from the backing storage layer
698 702 is used. (Not all storage layers will have strong ordering and behavior
699 703 of this mode is storage-dependent.) ``nodes`` ordering can force
700 704 revisions to be emitted before their ancestors, so consumers should
701 705 use it with care.
702 706
703 707 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
704 708 be set and it is the caller's responsibility to resolve it, if needed.
705 709
706 710 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
707 711 all revision data should be emitted as deltas against the revision
708 712 emitted just prior. The initial revision should be a delta against its
709 713 1st parent.
710 714 """
711 715
712 716
713 717 class ifilemutation(interfaceutil.Interface):
714 718 """Storage interface for mutation events of a tracked file."""
715 719
716 720 def add(filedata, meta, transaction, linkrev, p1, p2):
717 721 """Add a new revision to the store.
718 722
719 723 Takes file data, dictionary of metadata, a transaction, linkrev,
720 724 and parent nodes.
721 725
722 726 Returns the node that was added.
723 727
724 728 May no-op if a revision matching the supplied data is already stored.
725 729 """
726 730
727 731 def addrevision(
728 732 revisiondata,
729 733 transaction,
730 734 linkrev,
731 735 p1,
732 736 p2,
733 737 node=None,
734 738 flags=0,
735 739 cachedelta=None,
736 740 ):
737 741 """Add a new revision to the store and return its number.
738 742
739 743 This is similar to ``add()`` except it operates at a lower level.
740 744
741 745 The data passed in already contains a metadata header, if any.
742 746
743 747 ``node`` and ``flags`` can be used to define the expected node and
744 748 the flags to use with storage. ``flags`` is a bitwise value composed
745 749 of the various ``REVISION_FLAG_*`` constants.
746 750
747 751 ``add()`` is usually called when adding files from e.g. the working
748 752 directory. ``addrevision()`` is often called by ``add()`` and for
749 753 scenarios where revision data has already been computed, such as when
750 754 applying raw data from a peer repo.
751 755 """
752 756
753 757 def addgroup(
754 758 deltas,
755 759 linkmapper,
756 760 transaction,
757 761 addrevisioncb=None,
758 762 duplicaterevisioncb=None,
759 763 maybemissingparents=False,
760 764 ):
761 765 """Process a series of deltas for storage.
762 766
763 767 ``deltas`` is an iterable of 7-tuples of
764 768 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
765 769 to add.
766 770
767 771 The ``delta`` field contains ``mpatch`` data to apply to a base
768 772 revision, identified by ``deltabase``. The base node can be
769 773 ``nullid``, in which case the header from the delta can be ignored
770 774 and the delta used as the fulltext.
771 775
772 776 ``alwayscache`` instructs the lower layers to cache the content of the
773 777 newly added revision, even if it needs to be explicitly computed.
774 778 This used to be the default when ``addrevisioncb`` was provided up to
775 779 Mercurial 5.8.
776 780
777 781 ``addrevisioncb`` should be called for each new rev as it is committed.
778 782 ``duplicaterevisioncb`` should be called for all revs with a
779 783 pre-existing node.
780 784
781 785 ``maybemissingparents`` is a bool indicating whether the incoming
782 786 data may reference parents/ancestor revisions that aren't present.
783 787 This flag is set when receiving data into a "shallow" store that
784 788 doesn't hold all history.
785 789
786 790 Returns a list of nodes that were processed. A node will be in the list
787 791 even if it existed in the store previously.
788 792 """
789 793
790 794 def censorrevision(tr, node, tombstone=b''):
791 795 """Remove the content of a single revision.
792 796
793 797 The specified ``node`` will have its content purged from storage.
794 798 Future attempts to access the revision data for this node will
795 799 result in failure.
796 800
797 801 A ``tombstone`` message can optionally be stored. This message may be
798 802 displayed to users when they attempt to access the missing revision
799 803 data.
800 804
801 805 Storage backends may have stored deltas against the previous content
802 806 in this revision. As part of censoring a revision, these storage
803 807 backends are expected to rewrite any internally stored deltas such
804 808 that they no longer reference the deleted content.
805 809 """
806 810
807 811 def getstrippoint(minlink):
808 812 """Find the minimum revision that must be stripped to strip a linkrev.
809 813
810 814 Returns a 2-tuple containing the minimum revision number and a set
811 815 of all revisions numbers that would be broken by this strip.
812 816
813 817 TODO this is highly revlog centric and should be abstracted into
814 818 a higher-level deletion API. ``repair.strip()`` relies on this.
815 819 """
816 820
817 821 def strip(minlink, transaction):
818 822 """Remove storage of items starting at a linkrev.
819 823
820 824 This uses ``getstrippoint()`` to determine the first node to remove.
821 825 Then it effectively truncates storage for all revisions after that.
822 826
823 827 TODO this is highly revlog centric and should be abstracted into a
824 828 higher-level deletion API.
825 829 """
826 830
827 831
828 832 class ifilestorage(ifileindex, ifiledata, ifilemutation):
829 833 """Complete storage interface for a single tracked file."""
830 834
831 835 def files():
832 836 """Obtain paths that are backing storage for this file.
833 837
834 838 TODO this is used heavily by verify code and there should probably
835 839 be a better API for that.
836 840 """
837 841
838 842 def storageinfo(
839 843 exclusivefiles=False,
840 844 sharedfiles=False,
841 845 revisionscount=False,
842 846 trackedsize=False,
843 847 storedsize=False,
844 848 ):
845 849 """Obtain information about storage for this file's data.
846 850
847 851 Returns a dict describing storage for this tracked path. The keys
848 852 in the dict map to arguments of the same. The arguments are bools
849 853 indicating whether to calculate and obtain that data.
850 854
851 855 exclusivefiles
852 856 Iterable of (vfs, path) describing files that are exclusively
853 857 used to back storage for this tracked path.
854 858
855 859 sharedfiles
856 860 Iterable of (vfs, path) describing files that are used to back
857 861 storage for this tracked path. Those files may also provide storage
858 862 for other stored entities.
859 863
860 864 revisionscount
861 865 Number of revisions available for retrieval.
862 866
863 867 trackedsize
864 868 Total size in bytes of all tracked revisions. This is a sum of the
865 869 length of the fulltext of all revisions.
866 870
867 871 storedsize
868 872 Total size in bytes used to store data for all tracked revisions.
869 873 This is commonly less than ``trackedsize`` due to internal usage
870 874 of deltas rather than fulltext revisions.
871 875
872 876 Not all storage backends may support all queries are have a reasonable
873 877 value to use. In that case, the value should be set to ``None`` and
874 878 callers are expected to handle this special value.
875 879 """
876 880
877 881 def verifyintegrity(state):
878 882 """Verifies the integrity of file storage.
879 883
880 884 ``state`` is a dict holding state of the verifier process. It can be
881 885 used to communicate data between invocations of multiple storage
882 886 primitives.
883 887
884 888 If individual revisions cannot have their revision content resolved,
885 889 the method is expected to set the ``skipread`` key to a set of nodes
886 890 that encountered problems. If set, the method can also add the node(s)
887 891 to ``safe_renamed`` in order to indicate nodes that may perform the
888 892 rename checks with currently accessible data.
889 893
890 894 The method yields objects conforming to the ``iverifyproblem``
891 895 interface.
892 896 """
893 897
894 898
895 899 class idirs(interfaceutil.Interface):
896 900 """Interface representing a collection of directories from paths.
897 901
898 902 This interface is essentially a derived data structure representing
899 903 directories from a collection of paths.
900 904 """
901 905
902 906 def addpath(path):
903 907 """Add a path to the collection.
904 908
905 909 All directories in the path will be added to the collection.
906 910 """
907 911
908 912 def delpath(path):
909 913 """Remove a path from the collection.
910 914
911 915 If the removal was the last path in a particular directory, the
912 916 directory is removed from the collection.
913 917 """
914 918
915 919 def __iter__():
916 920 """Iterate over the directories in this collection of paths."""
917 921
918 922 def __contains__(path):
919 923 """Whether a specific directory is in this collection."""
920 924
921 925
922 926 class imanifestdict(interfaceutil.Interface):
923 927 """Interface representing a manifest data structure.
924 928
925 929 A manifest is effectively a dict mapping paths to entries. Each entry
926 930 consists of a binary node and extra flags affecting that entry.
927 931 """
928 932
929 933 def __getitem__(path):
930 934 """Returns the binary node value for a path in the manifest.
931 935
932 936 Raises ``KeyError`` if the path does not exist in the manifest.
933 937
934 938 Equivalent to ``self.find(path)[0]``.
935 939 """
936 940
937 941 def find(path):
938 942 """Returns the entry for a path in the manifest.
939 943
940 944 Returns a 2-tuple of (node, flags).
941 945
942 946 Raises ``KeyError`` if the path does not exist in the manifest.
943 947 """
944 948
945 949 def __len__():
946 950 """Return the number of entries in the manifest."""
947 951
948 952 def __nonzero__():
949 953 """Returns True if the manifest has entries, False otherwise."""
950 954
951 955 __bool__ = __nonzero__
952 956
953 957 def __setitem__(path, node):
954 958 """Define the node value for a path in the manifest.
955 959
956 960 If the path is already in the manifest, its flags will be copied to
957 961 the new entry.
958 962 """
959 963
960 964 def __contains__(path):
961 965 """Whether a path exists in the manifest."""
962 966
963 967 def __delitem__(path):
964 968 """Remove a path from the manifest.
965 969
966 970 Raises ``KeyError`` if the path is not in the manifest.
967 971 """
968 972
969 973 def __iter__():
970 974 """Iterate over paths in the manifest."""
971 975
972 976 def iterkeys():
973 977 """Iterate over paths in the manifest."""
974 978
975 979 def keys():
976 980 """Obtain a list of paths in the manifest."""
977 981
978 982 def filesnotin(other, match=None):
979 983 """Obtain the set of paths in this manifest but not in another.
980 984
981 985 ``match`` is an optional matcher function to be applied to both
982 986 manifests.
983 987
984 988 Returns a set of paths.
985 989 """
986 990
987 991 def dirs():
988 992 """Returns an object implementing the ``idirs`` interface."""
989 993
990 994 def hasdir(dir):
991 995 """Returns a bool indicating if a directory is in this manifest."""
992 996
993 997 def walk(match):
994 998 """Generator of paths in manifest satisfying a matcher.
995 999
996 1000 If the matcher has explicit files listed and they don't exist in
997 1001 the manifest, ``match.bad()`` is called for each missing file.
998 1002 """
999 1003
1000 1004 def diff(other, match=None, clean=False):
1001 1005 """Find differences between this manifest and another.
1002 1006
1003 1007 This manifest is compared to ``other``.
1004 1008
1005 1009 If ``match`` is provided, the two manifests are filtered against this
1006 1010 matcher and only entries satisfying the matcher are compared.
1007 1011
1008 1012 If ``clean`` is True, unchanged files are included in the returned
1009 1013 object.
1010 1014
1011 1015 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1012 1016 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1013 1017 represents the node and flags for this manifest and ``(node2, flag2)``
1014 1018 are the same for the other manifest.
1015 1019 """
1016 1020
1017 1021 def setflag(path, flag):
1018 1022 """Set the flag value for a given path.
1019 1023
1020 1024 Raises ``KeyError`` if the path is not already in the manifest.
1021 1025 """
1022 1026
1023 1027 def get(path, default=None):
1024 1028 """Obtain the node value for a path or a default value if missing."""
1025 1029
1026 1030 def flags(path):
1027 1031 """Return the flags value for a path (default: empty bytestring)."""
1028 1032
1029 1033 def copy():
1030 1034 """Return a copy of this manifest."""
1031 1035
1032 1036 def items():
1033 1037 """Returns an iterable of (path, node) for items in this manifest."""
1034 1038
1035 1039 def iteritems():
1036 1040 """Identical to items()."""
1037 1041
1038 1042 def iterentries():
1039 1043 """Returns an iterable of (path, node, flags) for this manifest.
1040 1044
1041 1045 Similar to ``iteritems()`` except items are a 3-tuple and include
1042 1046 flags.
1043 1047 """
1044 1048
1045 1049 def text():
1046 1050 """Obtain the raw data representation for this manifest.
1047 1051
1048 1052 Result is used to create a manifest revision.
1049 1053 """
1050 1054
1051 1055 def fastdelta(base, changes):
1052 1056 """Obtain a delta between this manifest and another given changes.
1053 1057
1054 1058 ``base`` in the raw data representation for another manifest.
1055 1059
1056 1060 ``changes`` is an iterable of ``(path, to_delete)``.
1057 1061
1058 1062 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1059 1063 delta between ``base`` and this manifest.
1060 1064
1061 1065 If this manifest implementation can't support ``fastdelta()``,
1062 1066 raise ``mercurial.manifest.FastdeltaUnavailable``.
1063 1067 """
1064 1068
1065 1069
1066 1070 class imanifestrevisionbase(interfaceutil.Interface):
1067 1071 """Base interface representing a single revision of a manifest.
1068 1072
1069 1073 Should not be used as a primary interface: should always be inherited
1070 1074 as part of a larger interface.
1071 1075 """
1072 1076
1073 1077 def copy():
1074 1078 """Obtain a copy of this manifest instance.
1075 1079
1076 1080 Returns an object conforming to the ``imanifestrevisionwritable``
1077 1081 interface. The instance will be associated with the same
1078 1082 ``imanifestlog`` collection as this instance.
1079 1083 """
1080 1084
1081 1085 def read():
1082 1086 """Obtain the parsed manifest data structure.
1083 1087
1084 1088 The returned object conforms to the ``imanifestdict`` interface.
1085 1089 """
1086 1090
1087 1091
1088 1092 class imanifestrevisionstored(imanifestrevisionbase):
1089 1093 """Interface representing a manifest revision committed to storage."""
1090 1094
1091 1095 def node():
1092 1096 """The binary node for this manifest."""
1093 1097
1094 1098 parents = interfaceutil.Attribute(
1095 1099 """List of binary nodes that are parents for this manifest revision."""
1096 1100 )
1097 1101
1098 1102 def readdelta(shallow=False):
1099 1103 """Obtain the manifest data structure representing changes from parent.
1100 1104
1101 1105 This manifest is compared to its 1st parent. A new manifest representing
1102 1106 those differences is constructed.
1103 1107
1104 1108 The returned object conforms to the ``imanifestdict`` interface.
1105 1109 """
1106 1110
1107 1111 def readfast(shallow=False):
1108 1112 """Calls either ``read()`` or ``readdelta()``.
1109 1113
1110 1114 The faster of the two options is called.
1111 1115 """
1112 1116
1113 1117 def find(key):
1114 1118 """Calls self.read().find(key)``.
1115 1119
1116 1120 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1117 1121 """
1118 1122
1119 1123
1120 1124 class imanifestrevisionwritable(imanifestrevisionbase):
1121 1125 """Interface representing a manifest revision that can be committed."""
1122 1126
1123 1127 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1124 1128 """Add this revision to storage.
1125 1129
1126 1130 Takes a transaction object, the changeset revision number it will
1127 1131 be associated with, its parent nodes, and lists of added and
1128 1132 removed paths.
1129 1133
1130 1134 If match is provided, storage can choose not to inspect or write out
1131 1135 items that do not match. Storage is still required to be able to provide
1132 1136 the full manifest in the future for any directories written (these
1133 1137 manifests should not be "narrowed on disk").
1134 1138
1135 1139 Returns the binary node of the created revision.
1136 1140 """
1137 1141
1138 1142
1139 1143 class imanifeststorage(interfaceutil.Interface):
1140 1144 """Storage interface for manifest data."""
1141 1145
1142 1146 tree = interfaceutil.Attribute(
1143 1147 """The path to the directory this manifest tracks.
1144 1148
1145 1149 The empty bytestring represents the root manifest.
1146 1150 """
1147 1151 )
1148 1152
1149 1153 index = interfaceutil.Attribute(
1150 1154 """An ``ifilerevisionssequence`` instance."""
1151 1155 )
1152 1156
1153 1157 indexfile = interfaceutil.Attribute(
1154 1158 """Path of revlog index file.
1155 1159
1156 1160 TODO this is revlog specific and should not be exposed.
1157 1161 """
1158 1162 )
1159 1163
1160 1164 opener = interfaceutil.Attribute(
1161 1165 """VFS opener to use to access underlying files used for storage.
1162 1166
1163 1167 TODO this is revlog specific and should not be exposed.
1164 1168 """
1165 1169 )
1166 1170
1167 1171 version = interfaceutil.Attribute(
1168 1172 """Revlog version number.
1169 1173
1170 1174 TODO this is revlog specific and should not be exposed.
1171 1175 """
1172 1176 )
1173 1177
1174 1178 _generaldelta = interfaceutil.Attribute(
1175 1179 """Whether generaldelta storage is being used.
1176 1180
1177 1181 TODO this is revlog specific and should not be exposed.
1178 1182 """
1179 1183 )
1180 1184
1181 1185 fulltextcache = interfaceutil.Attribute(
1182 1186 """Dict with cache of fulltexts.
1183 1187
1184 1188 TODO this doesn't feel appropriate for the storage interface.
1185 1189 """
1186 1190 )
1187 1191
1188 1192 def __len__():
1189 1193 """Obtain the number of revisions stored for this manifest."""
1190 1194
1191 1195 def __iter__():
1192 1196 """Iterate over revision numbers for this manifest."""
1193 1197
1194 1198 def rev(node):
1195 1199 """Obtain the revision number given a binary node.
1196 1200
1197 1201 Raises ``error.LookupError`` if the node is not known.
1198 1202 """
1199 1203
1200 1204 def node(rev):
1201 1205 """Obtain the node value given a revision number.
1202 1206
1203 1207 Raises ``error.LookupError`` if the revision is not known.
1204 1208 """
1205 1209
1206 1210 def lookup(value):
1207 1211 """Attempt to resolve a value to a node.
1208 1212
1209 1213 Value can be a binary node, hex node, revision number, or a bytes
1210 1214 that can be converted to an integer.
1211 1215
1212 1216 Raises ``error.LookupError`` if a ndoe could not be resolved.
1213 1217 """
1214 1218
1215 1219 def parents(node):
1216 1220 """Returns a 2-tuple of parent nodes for a node.
1217 1221
1218 1222 Values will be ``nullid`` if the parent is empty.
1219 1223 """
1220 1224
1221 1225 def parentrevs(rev):
1222 1226 """Like parents() but operates on revision numbers."""
1223 1227
1224 1228 def linkrev(rev):
1225 1229 """Obtain the changeset revision number a revision is linked to."""
1226 1230
1227 1231 def revision(node, _df=None, raw=False):
1228 1232 """Obtain fulltext data for a node."""
1229 1233
1230 1234 def rawdata(node, _df=None):
1231 1235 """Obtain raw data for a node."""
1232 1236
1233 1237 def revdiff(rev1, rev2):
1234 1238 """Obtain a delta between two revision numbers.
1235 1239
1236 1240 The returned data is the result of ``bdiff.bdiff()`` on the raw
1237 1241 revision data.
1238 1242 """
1239 1243
1240 1244 def cmp(node, fulltext):
1241 1245 """Compare fulltext to another revision.
1242 1246
1243 1247 Returns True if the fulltext is different from what is stored.
1244 1248 """
1245 1249
1246 1250 def emitrevisions(
1247 1251 nodes,
1248 1252 nodesorder=None,
1249 1253 revisiondata=False,
1250 1254 assumehaveparentrevisions=False,
1251 1255 ):
1252 1256 """Produce ``irevisiondelta`` describing revisions.
1253 1257
1254 1258 See the documentation for ``ifiledata`` for more.
1255 1259 """
1256 1260
1257 1261 def addgroup(
1258 1262 deltas,
1259 1263 linkmapper,
1260 1264 transaction,
1261 1265 addrevisioncb=None,
1262 1266 duplicaterevisioncb=None,
1263 1267 ):
1264 1268 """Process a series of deltas for storage.
1265 1269
1266 1270 See the documentation in ``ifilemutation`` for more.
1267 1271 """
1268 1272
1269 1273 def rawsize(rev):
1270 1274 """Obtain the size of tracked data.
1271 1275
1272 1276 Is equivalent to ``len(m.rawdata(node))``.
1273 1277
1274 1278 TODO this method is only used by upgrade code and may be removed.
1275 1279 """
1276 1280
1277 1281 def getstrippoint(minlink):
1278 1282 """Find minimum revision that must be stripped to strip a linkrev.
1279 1283
1280 1284 See the documentation in ``ifilemutation`` for more.
1281 1285 """
1282 1286
1283 1287 def strip(minlink, transaction):
1284 1288 """Remove storage of items starting at a linkrev.
1285 1289
1286 1290 See the documentation in ``ifilemutation`` for more.
1287 1291 """
1288 1292
1289 1293 def checksize():
1290 1294 """Obtain the expected sizes of backing files.
1291 1295
1292 1296 TODO this is used by verify and it should not be part of the interface.
1293 1297 """
1294 1298
1295 1299 def files():
1296 1300 """Obtain paths that are backing storage for this manifest.
1297 1301
1298 1302 TODO this is used by verify and there should probably be a better API
1299 1303 for this functionality.
1300 1304 """
1301 1305
1302 1306 def deltaparent(rev):
1303 1307 """Obtain the revision that a revision is delta'd against.
1304 1308
1305 1309 TODO delta encoding is an implementation detail of storage and should
1306 1310 not be exposed to the storage interface.
1307 1311 """
1308 1312
1309 1313 def clone(tr, dest, **kwargs):
1310 1314 """Clone this instance to another."""
1311 1315
1312 1316 def clearcaches(clear_persisted_data=False):
1313 1317 """Clear any caches associated with this instance."""
1314 1318
1315 1319 def dirlog(d):
1316 1320 """Obtain a manifest storage instance for a tree."""
1317 1321
1318 1322 def add(
1319 1323 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1320 1324 ):
1321 1325 """Add a revision to storage.
1322 1326
1323 1327 ``m`` is an object conforming to ``imanifestdict``.
1324 1328
1325 1329 ``link`` is the linkrev revision number.
1326 1330
1327 1331 ``p1`` and ``p2`` are the parent revision numbers.
1328 1332
1329 1333 ``added`` and ``removed`` are iterables of added and removed paths,
1330 1334 respectively.
1331 1335
1332 1336 ``readtree`` is a function that can be used to read the child tree(s)
1333 1337 when recursively writing the full tree structure when using
1334 1338 treemanifets.
1335 1339
1336 1340 ``match`` is a matcher that can be used to hint to storage that not all
1337 1341 paths must be inspected; this is an optimization and can be safely
1338 1342 ignored. Note that the storage must still be able to reproduce a full
1339 1343 manifest including files that did not match.
1340 1344 """
1341 1345
1342 1346 def storageinfo(
1343 1347 exclusivefiles=False,
1344 1348 sharedfiles=False,
1345 1349 revisionscount=False,
1346 1350 trackedsize=False,
1347 1351 storedsize=False,
1348 1352 ):
1349 1353 """Obtain information about storage for this manifest's data.
1350 1354
1351 1355 See ``ifilestorage.storageinfo()`` for a description of this method.
1352 1356 This one behaves the same way, except for manifest data.
1353 1357 """
1354 1358
1355 1359
1356 1360 class imanifestlog(interfaceutil.Interface):
1357 1361 """Interface representing a collection of manifest snapshots.
1358 1362
1359 1363 Represents the root manifest in a repository.
1360 1364
1361 1365 Also serves as a means to access nested tree manifests and to cache
1362 1366 tree manifests.
1363 1367 """
1364 1368
1365 1369 def __getitem__(node):
1366 1370 """Obtain a manifest instance for a given binary node.
1367 1371
1368 1372 Equivalent to calling ``self.get('', node)``.
1369 1373
1370 1374 The returned object conforms to the ``imanifestrevisionstored``
1371 1375 interface.
1372 1376 """
1373 1377
1374 1378 def get(tree, node, verify=True):
1375 1379 """Retrieve the manifest instance for a given directory and binary node.
1376 1380
1377 1381 ``node`` always refers to the node of the root manifest (which will be
1378 1382 the only manifest if flat manifests are being used).
1379 1383
1380 1384 If ``tree`` is the empty string, the root manifest is returned.
1381 1385 Otherwise the manifest for the specified directory will be returned
1382 1386 (requires tree manifests).
1383 1387
1384 1388 If ``verify`` is True, ``LookupError`` is raised if the node is not
1385 1389 known.
1386 1390
1387 1391 The returned object conforms to the ``imanifestrevisionstored``
1388 1392 interface.
1389 1393 """
1390 1394
1391 1395 def getstorage(tree):
1392 1396 """Retrieve an interface to storage for a particular tree.
1393 1397
1394 1398 If ``tree`` is the empty bytestring, storage for the root manifest will
1395 1399 be returned. Otherwise storage for a tree manifest is returned.
1396 1400
1397 1401 TODO formalize interface for returned object.
1398 1402 """
1399 1403
1400 1404 def clearcaches():
1401 1405 """Clear caches associated with this collection."""
1402 1406
1403 1407 def rev(node):
1404 1408 """Obtain the revision number for a binary node.
1405 1409
1406 1410 Raises ``error.LookupError`` if the node is not known.
1407 1411 """
1408 1412
1409 1413 def update_caches(transaction):
1410 1414 """update whatever cache are relevant for the used storage."""
1411 1415
1412 1416
1413 1417 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1414 1418 """Local repository sub-interface providing access to tracked file storage.
1415 1419
1416 1420 This interface defines how a repository accesses storage for a single
1417 1421 tracked file path.
1418 1422 """
1419 1423
1420 1424 def file(f):
1421 1425 """Obtain a filelog for a tracked path.
1422 1426
1423 1427 The returned type conforms to the ``ifilestorage`` interface.
1424 1428 """
1425 1429
1426 1430
1427 1431 class ilocalrepositorymain(interfaceutil.Interface):
1428 1432 """Main interface for local repositories.
1429 1433
1430 1434 This currently captures the reality of things - not how things should be.
1431 1435 """
1432 1436
1433 1437 supportedformats = interfaceutil.Attribute(
1434 1438 """Set of requirements that apply to stream clone.
1435 1439
1436 1440 This is actually a class attribute and is shared among all instances.
1437 1441 """
1438 1442 )
1439 1443
1440 1444 supported = interfaceutil.Attribute(
1441 1445 """Set of requirements that this repo is capable of opening."""
1442 1446 )
1443 1447
1444 1448 requirements = interfaceutil.Attribute(
1445 1449 """Set of requirements this repo uses."""
1446 1450 )
1447 1451
1448 1452 features = interfaceutil.Attribute(
1449 1453 """Set of "features" this repository supports.
1450 1454
1451 1455 A "feature" is a loosely-defined term. It can refer to a feature
1452 1456 in the classical sense or can describe an implementation detail
1453 1457 of the repository. For example, a ``readonly`` feature may denote
1454 1458 the repository as read-only. Or a ``revlogfilestore`` feature may
1455 1459 denote that the repository is using revlogs for file storage.
1456 1460
1457 1461 The intent of features is to provide a machine-queryable mechanism
1458 1462 for repo consumers to test for various repository characteristics.
1459 1463
1460 1464 Features are similar to ``requirements``. The main difference is that
1461 1465 requirements are stored on-disk and represent requirements to open the
1462 1466 repository. Features are more run-time capabilities of the repository
1463 1467 and more granular capabilities (which may be derived from requirements).
1464 1468 """
1465 1469 )
1466 1470
1467 1471 filtername = interfaceutil.Attribute(
1468 1472 """Name of the repoview that is active on this repo."""
1469 1473 )
1470 1474
1471 1475 wvfs = interfaceutil.Attribute(
1472 1476 """VFS used to access the working directory."""
1473 1477 )
1474 1478
1475 1479 vfs = interfaceutil.Attribute(
1476 1480 """VFS rooted at the .hg directory.
1477 1481
1478 1482 Used to access repository data not in the store.
1479 1483 """
1480 1484 )
1481 1485
1482 1486 svfs = interfaceutil.Attribute(
1483 1487 """VFS rooted at the store.
1484 1488
1485 1489 Used to access repository data in the store. Typically .hg/store.
1486 1490 But can point elsewhere if the store is shared.
1487 1491 """
1488 1492 )
1489 1493
1490 1494 root = interfaceutil.Attribute(
1491 1495 """Path to the root of the working directory."""
1492 1496 )
1493 1497
1494 1498 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1495 1499
1496 1500 origroot = interfaceutil.Attribute(
1497 1501 """The filesystem path that was used to construct the repo."""
1498 1502 )
1499 1503
1500 1504 auditor = interfaceutil.Attribute(
1501 1505 """A pathauditor for the working directory.
1502 1506
1503 1507 This checks if a path refers to a nested repository.
1504 1508
1505 1509 Operates on the filesystem.
1506 1510 """
1507 1511 )
1508 1512
1509 1513 nofsauditor = interfaceutil.Attribute(
1510 1514 """A pathauditor for the working directory.
1511 1515
1512 1516 This is like ``auditor`` except it doesn't do filesystem checks.
1513 1517 """
1514 1518 )
1515 1519
1516 1520 baseui = interfaceutil.Attribute(
1517 1521 """Original ui instance passed into constructor."""
1518 1522 )
1519 1523
1520 1524 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1521 1525
1522 1526 sharedpath = interfaceutil.Attribute(
1523 1527 """Path to the .hg directory of the repo this repo was shared from."""
1524 1528 )
1525 1529
1526 1530 store = interfaceutil.Attribute("""A store instance.""")
1527 1531
1528 1532 spath = interfaceutil.Attribute("""Path to the store.""")
1529 1533
1530 1534 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1531 1535
1532 1536 cachevfs = interfaceutil.Attribute(
1533 1537 """A VFS used to access the cache directory.
1534 1538
1535 1539 Typically .hg/cache.
1536 1540 """
1537 1541 )
1538 1542
1539 1543 wcachevfs = interfaceutil.Attribute(
1540 1544 """A VFS used to access the cache directory dedicated to working copy
1541 1545
1542 1546 Typically .hg/wcache.
1543 1547 """
1544 1548 )
1545 1549
1546 1550 filteredrevcache = interfaceutil.Attribute(
1547 1551 """Holds sets of revisions to be filtered."""
1548 1552 )
1549 1553
1550 1554 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1551 1555
1552 1556 filecopiesmode = interfaceutil.Attribute(
1553 1557 """The way files copies should be dealt with in this repo."""
1554 1558 )
1555 1559
1556 1560 def close():
1557 1561 """Close the handle on this repository."""
1558 1562
1559 1563 def peer():
1560 1564 """Obtain an object conforming to the ``peer`` interface."""
1561 1565
1562 1566 def unfiltered():
1563 1567 """Obtain an unfiltered/raw view of this repo."""
1564 1568
1565 1569 def filtered(name, visibilityexceptions=None):
1566 1570 """Obtain a named view of this repository."""
1567 1571
1568 1572 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1569 1573
1570 1574 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1571 1575
1572 1576 manifestlog = interfaceutil.Attribute(
1573 1577 """An instance conforming to the ``imanifestlog`` interface.
1574 1578
1575 1579 Provides access to manifests for the repository.
1576 1580 """
1577 1581 )
1578 1582
1579 1583 dirstate = interfaceutil.Attribute("""Working directory state.""")
1580 1584
1581 1585 narrowpats = interfaceutil.Attribute(
1582 1586 """Matcher patterns for this repository's narrowspec."""
1583 1587 )
1584 1588
1585 1589 def narrowmatch(match=None, includeexact=False):
1586 1590 """Obtain a matcher for the narrowspec."""
1587 1591
1588 1592 def setnarrowpats(newincludes, newexcludes):
1589 1593 """Define the narrowspec for this repository."""
1590 1594
1591 1595 def __getitem__(changeid):
1592 1596 """Try to resolve a changectx."""
1593 1597
1594 1598 def __contains__(changeid):
1595 1599 """Whether a changeset exists."""
1596 1600
1597 1601 def __nonzero__():
1598 1602 """Always returns True."""
1599 1603 return True
1600 1604
1601 1605 __bool__ = __nonzero__
1602 1606
1603 1607 def __len__():
1604 1608 """Returns the number of changesets in the repo."""
1605 1609
1606 1610 def __iter__():
1607 1611 """Iterate over revisions in the changelog."""
1608 1612
1609 1613 def revs(expr, *args):
1610 1614 """Evaluate a revset.
1611 1615
1612 1616 Emits revisions.
1613 1617 """
1614 1618
1615 1619 def set(expr, *args):
1616 1620 """Evaluate a revset.
1617 1621
1618 1622 Emits changectx instances.
1619 1623 """
1620 1624
1621 1625 def anyrevs(specs, user=False, localalias=None):
1622 1626 """Find revisions matching one of the given revsets."""
1623 1627
1624 1628 def url():
1625 1629 """Returns a string representing the location of this repo."""
1626 1630
1627 1631 def hook(name, throw=False, **args):
1628 1632 """Call a hook."""
1629 1633
1630 1634 def tags():
1631 1635 """Return a mapping of tag to node."""
1632 1636
1633 1637 def tagtype(tagname):
1634 1638 """Return the type of a given tag."""
1635 1639
1636 1640 def tagslist():
1637 1641 """Return a list of tags ordered by revision."""
1638 1642
1639 1643 def nodetags(node):
1640 1644 """Return the tags associated with a node."""
1641 1645
1642 1646 def nodebookmarks(node):
1643 1647 """Return the list of bookmarks pointing to the specified node."""
1644 1648
1645 1649 def branchmap():
1646 1650 """Return a mapping of branch to heads in that branch."""
1647 1651
1648 1652 def revbranchcache():
1649 1653 pass
1650 1654
1651 1655 def register_changeset(rev, changelogrevision):
1652 1656 """Extension point for caches for new nodes.
1653 1657
1654 1658 Multiple consumers are expected to need parts of the changelogrevision,
1655 1659 so it is provided as optimization to avoid duplicate lookups. A simple
1656 1660 cache would be fragile when other revisions are accessed, too."""
1657 1661 pass
1658 1662
1659 1663 def branchtip(branchtip, ignoremissing=False):
1660 1664 """Return the tip node for a given branch."""
1661 1665
1662 1666 def lookup(key):
1663 1667 """Resolve the node for a revision."""
1664 1668
1665 1669 def lookupbranch(key):
1666 1670 """Look up the branch name of the given revision or branch name."""
1667 1671
1668 1672 def known(nodes):
1669 1673 """Determine whether a series of nodes is known.
1670 1674
1671 1675 Returns a list of bools.
1672 1676 """
1673 1677
1674 1678 def local():
1675 1679 """Whether the repository is local."""
1676 1680 return True
1677 1681
1678 1682 def publishing():
1679 1683 """Whether the repository is a publishing repository."""
1680 1684
1681 1685 def cancopy():
1682 1686 pass
1683 1687
1684 1688 def shared():
1685 1689 """The type of shared repository or None."""
1686 1690
1687 1691 def wjoin(f, *insidef):
1688 1692 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1689 1693
1690 1694 def setparents(p1, p2):
1691 1695 """Set the parent nodes of the working directory."""
1692 1696
1693 1697 def filectx(path, changeid=None, fileid=None):
1694 1698 """Obtain a filectx for the given file revision."""
1695 1699
1696 1700 def getcwd():
1697 1701 """Obtain the current working directory from the dirstate."""
1698 1702
1699 1703 def pathto(f, cwd=None):
1700 1704 """Obtain the relative path to a file."""
1701 1705
1702 1706 def adddatafilter(name, fltr):
1703 1707 pass
1704 1708
1705 1709 def wread(filename):
1706 1710 """Read a file from wvfs, using data filters."""
1707 1711
1708 1712 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1709 1713 """Write data to a file in the wvfs, using data filters."""
1710 1714
1711 1715 def wwritedata(filename, data):
1712 1716 """Resolve data for writing to the wvfs, using data filters."""
1713 1717
1714 1718 def currenttransaction():
1715 1719 """Obtain the current transaction instance or None."""
1716 1720
1717 1721 def transaction(desc, report=None):
1718 1722 """Open a new transaction to write to the repository."""
1719 1723
1720 1724 def undofiles():
1721 1725 """Returns a list of (vfs, path) for files to undo transactions."""
1722 1726
1723 1727 def recover():
1724 1728 """Roll back an interrupted transaction."""
1725 1729
1726 1730 def rollback(dryrun=False, force=False):
1727 1731 """Undo the last transaction.
1728 1732
1729 1733 DANGEROUS.
1730 1734 """
1731 1735
1732 1736 def updatecaches(tr=None, full=False):
1733 1737 """Warm repo caches."""
1734 1738
1735 1739 def invalidatecaches():
1736 1740 """Invalidate cached data due to the repository mutating."""
1737 1741
1738 1742 def invalidatevolatilesets():
1739 1743 pass
1740 1744
1741 1745 def invalidatedirstate():
1742 1746 """Invalidate the dirstate."""
1743 1747
1744 1748 def invalidate(clearfilecache=False):
1745 1749 pass
1746 1750
1747 1751 def invalidateall():
1748 1752 pass
1749 1753
1750 1754 def lock(wait=True):
1751 1755 """Lock the repository store and return a lock instance."""
1752 1756
1753 1757 def wlock(wait=True):
1754 1758 """Lock the non-store parts of the repository."""
1755 1759
1756 1760 def currentwlock():
1757 1761 """Return the wlock if it's held or None."""
1758 1762
1759 1763 def checkcommitpatterns(wctx, match, status, fail):
1760 1764 pass
1761 1765
1762 1766 def commit(
1763 1767 text=b'',
1764 1768 user=None,
1765 1769 date=None,
1766 1770 match=None,
1767 1771 force=False,
1768 1772 editor=False,
1769 1773 extra=None,
1770 1774 ):
1771 1775 """Add a new revision to the repository."""
1772 1776
1773 1777 def commitctx(ctx, error=False, origctx=None):
1774 1778 """Commit a commitctx instance to the repository."""
1775 1779
1776 1780 def destroying():
1777 1781 """Inform the repository that nodes are about to be destroyed."""
1778 1782
1779 1783 def destroyed():
1780 1784 """Inform the repository that nodes have been destroyed."""
1781 1785
1782 1786 def status(
1783 1787 node1=b'.',
1784 1788 node2=None,
1785 1789 match=None,
1786 1790 ignored=False,
1787 1791 clean=False,
1788 1792 unknown=False,
1789 1793 listsubrepos=False,
1790 1794 ):
1791 1795 """Convenience method to call repo[x].status()."""
1792 1796
1793 1797 def addpostdsstatus(ps):
1794 1798 pass
1795 1799
1796 1800 def postdsstatus():
1797 1801 pass
1798 1802
1799 1803 def clearpostdsstatus():
1800 1804 pass
1801 1805
1802 1806 def heads(start=None):
1803 1807 """Obtain list of nodes that are DAG heads."""
1804 1808
1805 1809 def branchheads(branch=None, start=None, closed=False):
1806 1810 pass
1807 1811
1808 1812 def branches(nodes):
1809 1813 pass
1810 1814
1811 1815 def between(pairs):
1812 1816 pass
1813 1817
1814 1818 def checkpush(pushop):
1815 1819 pass
1816 1820
1817 1821 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1818 1822
1819 1823 def pushkey(namespace, key, old, new):
1820 1824 pass
1821 1825
1822 1826 def listkeys(namespace):
1823 1827 pass
1824 1828
1825 1829 def debugwireargs(one, two, three=None, four=None, five=None):
1826 1830 pass
1827 1831
1828 1832 def savecommitmessage(text):
1829 1833 pass
1830 1834
1831 1835
1832 1836 class completelocalrepository(
1833 1837 ilocalrepositorymain, ilocalrepositoryfilestorage
1834 1838 ):
1835 1839 """Complete interface for a local repository."""
1836 1840
1837 1841
1838 1842 class iwireprotocolcommandcacher(interfaceutil.Interface):
1839 1843 """Represents a caching backend for wire protocol commands.
1840 1844
1841 1845 Wire protocol version 2 supports transparent caching of many commands.
1842 1846 To leverage this caching, servers can activate objects that cache
1843 1847 command responses. Objects handle both cache writing and reading.
1844 1848 This interface defines how that response caching mechanism works.
1845 1849
1846 1850 Wire protocol version 2 commands emit a series of objects that are
1847 1851 serialized and sent to the client. The caching layer exists between
1848 1852 the invocation of the command function and the sending of its output
1849 1853 objects to an output layer.
1850 1854
1851 1855 Instances of this interface represent a binding to a cache that
1852 1856 can serve a response (in place of calling a command function) and/or
1853 1857 write responses to a cache for subsequent use.
1854 1858
1855 1859 When a command request arrives, the following happens with regards
1856 1860 to this interface:
1857 1861
1858 1862 1. The server determines whether the command request is cacheable.
1859 1863 2. If it is, an instance of this interface is spawned.
1860 1864 3. The cacher is activated in a context manager (``__enter__`` is called).
1861 1865 4. A cache *key* for that request is derived. This will call the
1862 1866 instance's ``adjustcachekeystate()`` method so the derivation
1863 1867 can be influenced.
1864 1868 5. The cacher is informed of the derived cache key via a call to
1865 1869 ``setcachekey()``.
1866 1870 6. The cacher's ``lookup()`` method is called to test for presence of
1867 1871 the derived key in the cache.
1868 1872 7. If ``lookup()`` returns a hit, that cached result is used in place
1869 1873 of invoking the command function. ``__exit__`` is called and the instance
1870 1874 is discarded.
1871 1875 8. The command function is invoked.
1872 1876 9. ``onobject()`` is called for each object emitted by the command
1873 1877 function.
1874 1878 10. After the final object is seen, ``onfinished()`` is called.
1875 1879 11. ``__exit__`` is called to signal the end of use of the instance.
1876 1880
1877 1881 Cache *key* derivation can be influenced by the instance.
1878 1882
1879 1883 Cache keys are initially derived by a deterministic representation of
1880 1884 the command request. This includes the command name, arguments, protocol
1881 1885 version, etc. This initial key derivation is performed by CBOR-encoding a
1882 1886 data structure and feeding that output into a hasher.
1883 1887
1884 1888 Instances of this interface can influence this initial key derivation
1885 1889 via ``adjustcachekeystate()``.
1886 1890
1887 1891 The instance is informed of the derived cache key via a call to
1888 1892 ``setcachekey()``. The instance must store the key locally so it can
1889 1893 be consulted on subsequent operations that may require it.
1890 1894
1891 1895 When constructed, the instance has access to a callable that can be used
1892 1896 for encoding response objects. This callable receives as its single
1893 1897 argument an object emitted by a command function. It returns an iterable
1894 1898 of bytes chunks representing the encoded object. Unless the cacher is
1895 1899 caching native Python objects in memory or has a way of reconstructing
1896 1900 the original Python objects, implementations typically call this function
1897 1901 to produce bytes from the output objects and then store those bytes in
1898 1902 the cache. When it comes time to re-emit those bytes, they are wrapped
1899 1903 in a ``wireprototypes.encodedresponse`` instance to tell the output
1900 1904 layer that they are pre-encoded.
1901 1905
1902 1906 When receiving the objects emitted by the command function, instances
1903 1907 can choose what to do with those objects. The simplest thing to do is
1904 1908 re-emit the original objects. They will be forwarded to the output
1905 1909 layer and will be processed as if the cacher did not exist.
1906 1910
1907 1911 Implementations could also choose to not emit objects - instead locally
1908 1912 buffering objects or their encoded representation. They could then emit
1909 1913 a single "coalesced" object when ``onfinished()`` is called. In
1910 1914 this way, the implementation would function as a filtering layer of
1911 1915 sorts.
1912 1916
1913 1917 When caching objects, typically the encoded form of the object will
1914 1918 be stored. Keep in mind that if the original object is forwarded to
1915 1919 the output layer, it will need to be encoded there as well. For large
1916 1920 output, this redundant encoding could add overhead. Implementations
1917 1921 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1918 1922 instances to avoid this overhead.
1919 1923 """
1920 1924
1921 1925 def __enter__():
1922 1926 """Marks the instance as active.
1923 1927
1924 1928 Should return self.
1925 1929 """
1926 1930
1927 1931 def __exit__(exctype, excvalue, exctb):
1928 1932 """Called when cacher is no longer used.
1929 1933
1930 1934 This can be used by implementations to perform cleanup actions (e.g.
1931 1935 disconnecting network sockets, aborting a partially cached response.
1932 1936 """
1933 1937
1934 1938 def adjustcachekeystate(state):
1935 1939 """Influences cache key derivation by adjusting state to derive key.
1936 1940
1937 1941 A dict defining the state used to derive the cache key is passed.
1938 1942
1939 1943 Implementations can modify this dict to record additional state that
1940 1944 is wanted to influence key derivation.
1941 1945
1942 1946 Implementations are *highly* encouraged to not modify or delete
1943 1947 existing keys.
1944 1948 """
1945 1949
1946 1950 def setcachekey(key):
1947 1951 """Record the derived cache key for this request.
1948 1952
1949 1953 Instances may mutate the key for internal usage, as desired. e.g.
1950 1954 instances may wish to prepend the repo name, introduce path
1951 1955 components for filesystem or URL addressing, etc. Behavior is up to
1952 1956 the cache.
1953 1957
1954 1958 Returns a bool indicating if the request is cacheable by this
1955 1959 instance.
1956 1960 """
1957 1961
1958 1962 def lookup():
1959 1963 """Attempt to resolve an entry in the cache.
1960 1964
1961 1965 The instance is instructed to look for the cache key that it was
1962 1966 informed about via the call to ``setcachekey()``.
1963 1967
1964 1968 If there's no cache hit or the cacher doesn't wish to use the cached
1965 1969 entry, ``None`` should be returned.
1966 1970
1967 1971 Else, a dict defining the cached result should be returned. The
1968 1972 dict may have the following keys:
1969 1973
1970 1974 objs
1971 1975 An iterable of objects that should be sent to the client. That
1972 1976 iterable of objects is expected to be what the command function
1973 1977 would return if invoked or an equivalent representation thereof.
1974 1978 """
1975 1979
1976 1980 def onobject(obj):
1977 1981 """Called when a new object is emitted from the command function.
1978 1982
1979 1983 Receives as its argument the object that was emitted from the
1980 1984 command function.
1981 1985
1982 1986 This method returns an iterator of objects to forward to the output
1983 1987 layer. The easiest implementation is a generator that just
1984 1988 ``yield obj``.
1985 1989 """
1986 1990
1987 1991 def onfinished():
1988 1992 """Called after all objects have been emitted from the command function.
1989 1993
1990 1994 Implementations should return an iterator of objects to forward to
1991 1995 the output layer.
1992 1996
1993 1997 This method can be a generator.
1994 1998 """
@@ -1,3199 +1,3201 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86 from .pure import parsers as pureparsers
87 87
88 88 # blanked usage of all the name to prevent pyflakes constraints
89 89 # We need these name available in the module for extensions.
90 90 REVLOGV0
91 91 REVLOGV1
92 92 REVLOGV2
93 93 FLAG_INLINE_DATA
94 94 FLAG_GENERALDELTA
95 95 REVLOG_DEFAULT_FLAGS
96 96 REVLOG_DEFAULT_FORMAT
97 97 REVLOG_DEFAULT_VERSION
98 98 REVLOGV1_FLAGS
99 99 REVLOGV2_FLAGS
100 100 REVIDX_ISCENSORED
101 101 REVIDX_ELLIPSIS
102 102 REVIDX_SIDEDATA
103 103 REVIDX_HASCOPIESINFO
104 104 REVIDX_EXTSTORED
105 105 REVIDX_DEFAULT_FLAGS
106 106 REVIDX_FLAGS_ORDER
107 107 REVIDX_RAWTEXT_CHANGING_FLAGS
108 108
109 109 parsers = policy.importmod('parsers')
110 110 rustancestor = policy.importrust('ancestor')
111 111 rustdagop = policy.importrust('dagop')
112 112 rustrevlog = policy.importrust('revlog')
113 113
114 114 # Aliased for performance.
115 115 _zlibdecompress = zlib.decompress
116 116
117 117 # max size of revlog with inline data
118 118 _maxinline = 131072
119 119 _chunksize = 1048576
120 120
121 121 # Flag processors for REVIDX_ELLIPSIS.
122 122 def ellipsisreadprocessor(rl, text):
123 123 return text, False
124 124
125 125
126 126 def ellipsiswriteprocessor(rl, text):
127 127 return text, False
128 128
129 129
130 130 def ellipsisrawprocessor(rl, text):
131 131 return False
132 132
133 133
134 134 ellipsisprocessor = (
135 135 ellipsisreadprocessor,
136 136 ellipsiswriteprocessor,
137 137 ellipsisrawprocessor,
138 138 )
139 139
140 140
141 141 def getoffset(q):
142 142 return int(q >> 16)
143 143
144 144
145 145 def gettype(q):
146 146 return int(q & 0xFFFF)
147 147
148 148
149 149 def offset_type(offset, type):
150 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 151 raise ValueError(b'unknown revlog index flags')
152 152 return int(int(offset) << 16 | type)
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @attr.s(slots=True, frozen=True)
176 176 class _revisioninfo(object):
177 177 """Information about a revision that allows building its fulltext
178 178 node: expected hash of the revision
179 179 p1, p2: parent revs of the revision
180 180 btext: built text cache consisting of a one-element list
181 181 cachedelta: (baserev, uncompressed_delta) or None
182 182 flags: flags associated to the revision storage
183 183
184 184 One of btext[0] or cachedelta must be set.
185 185 """
186 186
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195
196 196 @interfaceutil.implementer(repository.irevisiondelta)
197 197 @attr.s(slots=True)
198 198 class revlogrevisiondelta(object):
199 199 node = attr.ib()
200 200 p1node = attr.ib()
201 201 p2node = attr.ib()
202 202 basenode = attr.ib()
203 203 flags = attr.ib()
204 204 baserevisionsize = attr.ib()
205 205 revision = attr.ib()
206 206 delta = attr.ib()
207 sidedata = attr.ib()
207 208 linknode = attr.ib(default=None)
208 209
209 210
210 211 @interfaceutil.implementer(repository.iverifyproblem)
211 212 @attr.s(frozen=True)
212 213 class revlogproblem(object):
213 214 warning = attr.ib(default=None)
214 215 error = attr.ib(default=None)
215 216 node = attr.ib(default=None)
216 217
217 218
218 219 # index v0:
219 220 # 4 bytes: offset
220 221 # 4 bytes: compressed length
221 222 # 4 bytes: base rev
222 223 # 4 bytes: link rev
223 224 # 20 bytes: parent 1 nodeid
224 225 # 20 bytes: parent 2 nodeid
225 226 # 20 bytes: nodeid
226 227 indexformatv0 = struct.Struct(b">4l20s20s20s")
227 228 indexformatv0_pack = indexformatv0.pack
228 229 indexformatv0_unpack = indexformatv0.unpack
229 230
230 231
231 232 class revlogoldindex(list):
232 233 @property
233 234 def nodemap(self):
234 235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
235 236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
236 237 return self._nodemap
237 238
238 239 @util.propertycache
239 240 def _nodemap(self):
240 241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
241 242 for r in range(0, len(self)):
242 243 n = self[r][7]
243 244 nodemap[n] = r
244 245 return nodemap
245 246
246 247 def has_node(self, node):
247 248 """return True if the node exist in the index"""
248 249 return node in self._nodemap
249 250
250 251 def rev(self, node):
251 252 """return a revision for a node
252 253
253 254 If the node is unknown, raise a RevlogError"""
254 255 return self._nodemap[node]
255 256
256 257 def get_rev(self, node):
257 258 """return a revision for a node
258 259
259 260 If the node is unknown, return None"""
260 261 return self._nodemap.get(node)
261 262
262 263 def append(self, tup):
263 264 self._nodemap[tup[7]] = len(self)
264 265 super(revlogoldindex, self).append(tup)
265 266
266 267 def __delitem__(self, i):
267 268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
268 269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
269 270 for r in pycompat.xrange(i.start, len(self)):
270 271 del self._nodemap[self[r][7]]
271 272 super(revlogoldindex, self).__delitem__(i)
272 273
273 274 def clearcaches(self):
274 275 self.__dict__.pop('_nodemap', None)
275 276
276 277 def __getitem__(self, i):
277 278 if i == -1:
278 279 return (0, 0, 0, -1, -1, -1, -1, nullid)
279 280 return list.__getitem__(self, i)
280 281
281 282
282 283 class revlogoldio(object):
283 284 def __init__(self):
284 285 self.size = indexformatv0.size
285 286
286 287 def parseindex(self, data, inline):
287 288 s = self.size
288 289 index = []
289 290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
290 291 n = off = 0
291 292 l = len(data)
292 293 while off + s <= l:
293 294 cur = data[off : off + s]
294 295 off += s
295 296 e = indexformatv0_unpack(cur)
296 297 # transform to revlogv1 format
297 298 e2 = (
298 299 offset_type(e[0], 0),
299 300 e[1],
300 301 -1,
301 302 e[2],
302 303 e[3],
303 304 nodemap.get(e[4], nullrev),
304 305 nodemap.get(e[5], nullrev),
305 306 e[6],
306 307 )
307 308 index.append(e2)
308 309 nodemap[e[6]] = n
309 310 n += 1
310 311
311 312 index = revlogoldindex(index)
312 313 return index, None
313 314
314 315 def packentry(self, entry, node, version, rev):
315 316 if gettype(entry[0]):
316 317 raise error.RevlogError(
317 318 _(b'index entry flags need revlog version 1')
318 319 )
319 320 e2 = (
320 321 getoffset(entry[0]),
321 322 entry[1],
322 323 entry[3],
323 324 entry[4],
324 325 node(entry[5]),
325 326 node(entry[6]),
326 327 entry[7],
327 328 )
328 329 return indexformatv0_pack(*e2)
329 330
330 331
331 332 # index ng:
332 333 # 6 bytes: offset
333 334 # 2 bytes: flags
334 335 # 4 bytes: compressed length
335 336 # 4 bytes: uncompressed length
336 337 # 4 bytes: base rev
337 338 # 4 bytes: link rev
338 339 # 4 bytes: parent 1 rev
339 340 # 4 bytes: parent 2 rev
340 341 # 32 bytes: nodeid
341 342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
342 343 indexformatng_pack = indexformatng.pack
343 344 versionformat = struct.Struct(b">I")
344 345 versionformat_pack = versionformat.pack
345 346 versionformat_unpack = versionformat.unpack
346 347
347 348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
348 349 # signed integer)
349 350 _maxentrysize = 0x7FFFFFFF
350 351
351 352
352 353 class revlogio(object):
353 354 def __init__(self):
354 355 self.size = indexformatng.size
355 356
356 357 def parseindex(self, data, inline):
357 358 # call the C implementation to parse the index data
358 359 index, cache = parsers.parse_index2(data, inline)
359 360 return index, cache
360 361
361 362 def packentry(self, entry, node, version, rev):
362 363 p = indexformatng_pack(*entry)
363 364 if rev == 0:
364 365 p = versionformat_pack(version) + p[4:]
365 366 return p
366 367
367 368
368 369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
369 370 indexformatv2_pack = indexformatv2.pack
370 371
371 372
372 373 class revlogv2io(object):
373 374 def __init__(self):
374 375 self.size = indexformatv2.size
375 376
376 377 def parseindex(self, data, inline):
377 378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
378 379 return index, cache
379 380
380 381 def packentry(self, entry, node, version, rev):
381 382 p = indexformatv2_pack(*entry)
382 383 if rev == 0:
383 384 p = versionformat_pack(version) + p[4:]
384 385 return p
385 386
386 387
387 388 NodemapRevlogIO = None
388 389
389 390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
390 391
391 392 class NodemapRevlogIO(revlogio):
392 393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
393 394
394 395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
395 396 """
396 397
397 398 def parseindex(self, data, inline):
398 399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
399 400 return index, cache
400 401
401 402
402 403 class rustrevlogio(revlogio):
403 404 def parseindex(self, data, inline):
404 405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
405 406 return rustrevlog.MixedIndex(index), cache
406 407
407 408
408 409 class revlog(object):
409 410 """
410 411 the underlying revision storage object
411 412
412 413 A revlog consists of two parts, an index and the revision data.
413 414
414 415 The index is a file with a fixed record size containing
415 416 information on each revision, including its nodeid (hash), the
416 417 nodeids of its parents, the position and offset of its data within
417 418 the data file, and the revision it's based on. Finally, each entry
418 419 contains a linkrev entry that can serve as a pointer to external
419 420 data.
420 421
421 422 The revision data itself is a linear collection of data chunks.
422 423 Each chunk represents a revision and is usually represented as a
423 424 delta against the previous chunk. To bound lookup time, runs of
424 425 deltas are limited to about 2 times the length of the original
425 426 version data. This makes retrieval of a version proportional to
426 427 its size, or O(1) relative to the number of revisions.
427 428
428 429 Both pieces of the revlog are written to in an append-only
429 430 fashion, which means we never need to rewrite a file to insert or
430 431 remove data, and can use some simple techniques to avoid the need
431 432 for locking while reading.
432 433
433 434 If checkambig, indexfile is opened with checkambig=True at
434 435 writing, to avoid file stat ambiguity.
435 436
436 437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
437 438 index will be mmapped rather than read if it is larger than the
438 439 configured threshold.
439 440
440 441 If censorable is True, the revlog can have censored revisions.
441 442
442 443 If `upperboundcomp` is not None, this is the expected maximal gain from
443 444 compression for the data content.
444 445
445 446 `concurrencychecker` is an optional function that receives 3 arguments: a
446 447 file handle, a filename, and an expected position. It should check whether
447 448 the current position in the file handle is valid, and log/warn/fail (by
448 449 raising).
449 450 """
450 451
451 452 _flagserrorclass = error.RevlogError
452 453
453 454 def __init__(
454 455 self,
455 456 opener,
456 457 indexfile,
457 458 datafile=None,
458 459 checkambig=False,
459 460 mmaplargeindex=False,
460 461 censorable=False,
461 462 upperboundcomp=None,
462 463 persistentnodemap=False,
463 464 concurrencychecker=None,
464 465 ):
465 466 """
466 467 create a revlog object
467 468
468 469 opener is a function that abstracts the file opening operation
469 470 and can be used to implement COW semantics or the like.
470 471
471 472 """
472 473 self.upperboundcomp = upperboundcomp
473 474 self.indexfile = indexfile
474 475 self.datafile = datafile or (indexfile[:-2] + b".d")
475 476 self.nodemap_file = None
476 477 if persistentnodemap:
477 478 self.nodemap_file = nodemaputil.get_nodemap_file(
478 479 opener, self.indexfile
479 480 )
480 481
481 482 self.opener = opener
482 483 # When True, indexfile is opened with checkambig=True at writing, to
483 484 # avoid file stat ambiguity.
484 485 self._checkambig = checkambig
485 486 self._mmaplargeindex = mmaplargeindex
486 487 self._censorable = censorable
487 488 # 3-tuple of (node, rev, text) for a raw revision.
488 489 self._revisioncache = None
489 490 # Maps rev to chain base rev.
490 491 self._chainbasecache = util.lrucachedict(100)
491 492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
492 493 self._chunkcache = (0, b'')
493 494 # How much data to read and cache into the raw revlog data cache.
494 495 self._chunkcachesize = 65536
495 496 self._maxchainlen = None
496 497 self._deltabothparents = True
497 498 self.index = None
498 499 self._nodemap_docket = None
499 500 # Mapping of partial identifiers to full nodes.
500 501 self._pcache = {}
501 502 # Mapping of revision integer to full node.
502 503 self._compengine = b'zlib'
503 504 self._compengineopts = {}
504 505 self._maxdeltachainspan = -1
505 506 self._withsparseread = False
506 507 self._sparserevlog = False
507 508 self._srdensitythreshold = 0.50
508 509 self._srmingapsize = 262144
509 510
510 511 # Make copy of flag processors so each revlog instance can support
511 512 # custom flags.
512 513 self._flagprocessors = dict(flagutil.flagprocessors)
513 514
514 515 # 2-tuple of file handles being used for active writing.
515 516 self._writinghandles = None
516 517
517 518 self._loadindex()
518 519
519 520 self._concurrencychecker = concurrencychecker
520 521
521 522 def _loadindex(self):
522 523 mmapindexthreshold = None
523 524 opts = self.opener.options
524 525
525 526 if b'revlogv2' in opts:
526 527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
527 528 elif b'revlogv1' in opts:
528 529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
529 530 if b'generaldelta' in opts:
530 531 newversionflags |= FLAG_GENERALDELTA
531 532 elif b'revlogv0' in self.opener.options:
532 533 newversionflags = REVLOGV0
533 534 else:
534 535 newversionflags = REVLOG_DEFAULT_VERSION
535 536
536 537 if b'chunkcachesize' in opts:
537 538 self._chunkcachesize = opts[b'chunkcachesize']
538 539 if b'maxchainlen' in opts:
539 540 self._maxchainlen = opts[b'maxchainlen']
540 541 if b'deltabothparents' in opts:
541 542 self._deltabothparents = opts[b'deltabothparents']
542 543 self._lazydelta = bool(opts.get(b'lazydelta', True))
543 544 self._lazydeltabase = False
544 545 if self._lazydelta:
545 546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
546 547 if b'compengine' in opts:
547 548 self._compengine = opts[b'compengine']
548 549 if b'zlib.level' in opts:
549 550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
550 551 if b'zstd.level' in opts:
551 552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
552 553 if b'maxdeltachainspan' in opts:
553 554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
554 555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
555 556 mmapindexthreshold = opts[b'mmapindexthreshold']
556 557 self.hassidedata = bool(opts.get(b'side-data', False))
557 558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
558 559 withsparseread = bool(opts.get(b'with-sparse-read', False))
559 560 # sparse-revlog forces sparse-read
560 561 self._withsparseread = self._sparserevlog or withsparseread
561 562 if b'sparse-read-density-threshold' in opts:
562 563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
563 564 if b'sparse-read-min-gap-size' in opts:
564 565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
565 566 if opts.get(b'enableellipsis'):
566 567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
567 568
568 569 # revlog v0 doesn't have flag processors
569 570 for flag, processor in pycompat.iteritems(
570 571 opts.get(b'flagprocessors', {})
571 572 ):
572 573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
573 574
574 575 if self._chunkcachesize <= 0:
575 576 raise error.RevlogError(
576 577 _(b'revlog chunk cache size %r is not greater than 0')
577 578 % self._chunkcachesize
578 579 )
579 580 elif self._chunkcachesize & (self._chunkcachesize - 1):
580 581 raise error.RevlogError(
581 582 _(b'revlog chunk cache size %r is not a power of 2')
582 583 % self._chunkcachesize
583 584 )
584 585
585 586 indexdata = b''
586 587 self._initempty = True
587 588 try:
588 589 with self._indexfp() as f:
589 590 if (
590 591 mmapindexthreshold is not None
591 592 and self.opener.fstat(f).st_size >= mmapindexthreshold
592 593 ):
593 594 # TODO: should .close() to release resources without
594 595 # relying on Python GC
595 596 indexdata = util.buffer(util.mmapread(f))
596 597 else:
597 598 indexdata = f.read()
598 599 if len(indexdata) > 0:
599 600 versionflags = versionformat_unpack(indexdata[:4])[0]
600 601 self._initempty = False
601 602 else:
602 603 versionflags = newversionflags
603 604 except IOError as inst:
604 605 if inst.errno != errno.ENOENT:
605 606 raise
606 607
607 608 versionflags = newversionflags
608 609
609 610 self.version = versionflags
610 611
611 612 flags = versionflags & ~0xFFFF
612 613 fmt = versionflags & 0xFFFF
613 614
614 615 if fmt == REVLOGV0:
615 616 if flags:
616 617 raise error.RevlogError(
617 618 _(b'unknown flags (%#04x) in version %d revlog %s')
618 619 % (flags >> 16, fmt, self.indexfile)
619 620 )
620 621
621 622 self._inline = False
622 623 self._generaldelta = False
623 624
624 625 elif fmt == REVLOGV1:
625 626 if flags & ~REVLOGV1_FLAGS:
626 627 raise error.RevlogError(
627 628 _(b'unknown flags (%#04x) in version %d revlog %s')
628 629 % (flags >> 16, fmt, self.indexfile)
629 630 )
630 631
631 632 self._inline = versionflags & FLAG_INLINE_DATA
632 633 self._generaldelta = versionflags & FLAG_GENERALDELTA
633 634
634 635 elif fmt == REVLOGV2:
635 636 if flags & ~REVLOGV2_FLAGS:
636 637 raise error.RevlogError(
637 638 _(b'unknown flags (%#04x) in version %d revlog %s')
638 639 % (flags >> 16, fmt, self.indexfile)
639 640 )
640 641
641 642 self._inline = versionflags & FLAG_INLINE_DATA
642 643 # generaldelta implied by version 2 revlogs.
643 644 self._generaldelta = True
644 645
645 646 else:
646 647 raise error.RevlogError(
647 648 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
648 649 )
649 650 # sparse-revlog can't be on without general-delta (issue6056)
650 651 if not self._generaldelta:
651 652 self._sparserevlog = False
652 653
653 654 self._storedeltachains = True
654 655
655 656 devel_nodemap = (
656 657 self.nodemap_file
657 658 and opts.get(b'devel-force-nodemap', False)
658 659 and NodemapRevlogIO is not None
659 660 )
660 661
661 662 use_rust_index = False
662 663 if rustrevlog is not None:
663 664 if self.nodemap_file is not None:
664 665 use_rust_index = True
665 666 else:
666 667 use_rust_index = self.opener.options.get(b'rust.index')
667 668
668 669 self._io = revlogio()
669 670 if self.version == REVLOGV0:
670 671 self._io = revlogoldio()
671 672 elif fmt == REVLOGV2:
672 673 self._io = revlogv2io()
673 674 elif devel_nodemap:
674 675 self._io = NodemapRevlogIO()
675 676 elif use_rust_index:
676 677 self._io = rustrevlogio()
677 678 try:
678 679 d = self._io.parseindex(indexdata, self._inline)
679 680 index, _chunkcache = d
680 681 use_nodemap = (
681 682 not self._inline
682 683 and self.nodemap_file is not None
683 684 and util.safehasattr(index, 'update_nodemap_data')
684 685 )
685 686 if use_nodemap:
686 687 nodemap_data = nodemaputil.persisted_data(self)
687 688 if nodemap_data is not None:
688 689 docket = nodemap_data[0]
689 690 if (
690 691 len(d[0]) > docket.tip_rev
691 692 and d[0][docket.tip_rev][7] == docket.tip_node
692 693 ):
693 694 # no changelog tampering
694 695 self._nodemap_docket = docket
695 696 index.update_nodemap_data(*nodemap_data)
696 697 except (ValueError, IndexError):
697 698 raise error.RevlogError(
698 699 _(b"index %s is corrupted") % self.indexfile
699 700 )
700 701 self.index, self._chunkcache = d
701 702 if not self._chunkcache:
702 703 self._chunkclear()
703 704 # revnum -> (chain-length, sum-delta-length)
704 705 self._chaininfocache = util.lrucachedict(500)
705 706 # revlog header -> revlog compressor
706 707 self._decompressors = {}
707 708
708 709 @util.propertycache
709 710 def _compressor(self):
710 711 engine = util.compengines[self._compengine]
711 712 return engine.revlogcompressor(self._compengineopts)
712 713
713 714 def _indexfp(self, mode=b'r'):
714 715 """file object for the revlog's index file"""
715 716 args = {'mode': mode}
716 717 if mode != b'r':
717 718 args['checkambig'] = self._checkambig
718 719 if mode == b'w':
719 720 args['atomictemp'] = True
720 721 return self.opener(self.indexfile, **args)
721 722
722 723 def _datafp(self, mode=b'r'):
723 724 """file object for the revlog's data file"""
724 725 return self.opener(self.datafile, mode=mode)
725 726
726 727 @contextlib.contextmanager
727 728 def _datareadfp(self, existingfp=None):
728 729 """file object suitable to read data"""
729 730 # Use explicit file handle, if given.
730 731 if existingfp is not None:
731 732 yield existingfp
732 733
733 734 # Use a file handle being actively used for writes, if available.
734 735 # There is some danger to doing this because reads will seek the
735 736 # file. However, _writeentry() performs a SEEK_END before all writes,
736 737 # so we should be safe.
737 738 elif self._writinghandles:
738 739 if self._inline:
739 740 yield self._writinghandles[0]
740 741 else:
741 742 yield self._writinghandles[1]
742 743
743 744 # Otherwise open a new file handle.
744 745 else:
745 746 if self._inline:
746 747 func = self._indexfp
747 748 else:
748 749 func = self._datafp
749 750 with func() as fp:
750 751 yield fp
751 752
752 753 def tiprev(self):
753 754 return len(self.index) - 1
754 755
755 756 def tip(self):
756 757 return self.node(self.tiprev())
757 758
758 759 def __contains__(self, rev):
759 760 return 0 <= rev < len(self)
760 761
761 762 def __len__(self):
762 763 return len(self.index)
763 764
764 765 def __iter__(self):
765 766 return iter(pycompat.xrange(len(self)))
766 767
767 768 def revs(self, start=0, stop=None):
768 769 """iterate over all rev in this revlog (from start to stop)"""
769 770 return storageutil.iterrevs(len(self), start=start, stop=stop)
770 771
771 772 @property
772 773 def nodemap(self):
773 774 msg = (
774 775 b"revlog.nodemap is deprecated, "
775 776 b"use revlog.index.[has_node|rev|get_rev]"
776 777 )
777 778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
778 779 return self.index.nodemap
779 780
780 781 @property
781 782 def _nodecache(self):
782 783 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
783 784 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
784 785 return self.index.nodemap
785 786
786 787 def hasnode(self, node):
787 788 try:
788 789 self.rev(node)
789 790 return True
790 791 except KeyError:
791 792 return False
792 793
793 794 def candelta(self, baserev, rev):
794 795 """whether two revisions (baserev, rev) can be delta-ed or not"""
795 796 # Disable delta if either rev requires a content-changing flag
796 797 # processor (ex. LFS). This is because such flag processor can alter
797 798 # the rawtext content that the delta will be based on, and two clients
798 799 # could have a same revlog node with different flags (i.e. different
799 800 # rawtext contents) and the delta could be incompatible.
800 801 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
801 802 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
802 803 ):
803 804 return False
804 805 return True
805 806
806 807 def update_caches(self, transaction):
807 808 if self.nodemap_file is not None:
808 809 if transaction is None:
809 810 nodemaputil.update_persistent_nodemap(self)
810 811 else:
811 812 nodemaputil.setup_persistent_nodemap(transaction, self)
812 813
813 814 def clearcaches(self):
814 815 self._revisioncache = None
815 816 self._chainbasecache.clear()
816 817 self._chunkcache = (0, b'')
817 818 self._pcache = {}
818 819 self._nodemap_docket = None
819 820 self.index.clearcaches()
820 821 # The python code is the one responsible for validating the docket, we
821 822 # end up having to refresh it here.
822 823 use_nodemap = (
823 824 not self._inline
824 825 and self.nodemap_file is not None
825 826 and util.safehasattr(self.index, 'update_nodemap_data')
826 827 )
827 828 if use_nodemap:
828 829 nodemap_data = nodemaputil.persisted_data(self)
829 830 if nodemap_data is not None:
830 831 self._nodemap_docket = nodemap_data[0]
831 832 self.index.update_nodemap_data(*nodemap_data)
832 833
833 834 def rev(self, node):
834 835 try:
835 836 return self.index.rev(node)
836 837 except TypeError:
837 838 raise
838 839 except error.RevlogError:
839 840 # parsers.c radix tree lookup failed
840 841 if node == wdirid or node in wdirfilenodeids:
841 842 raise error.WdirUnsupported
842 843 raise error.LookupError(node, self.indexfile, _(b'no node'))
843 844
844 845 # Accessors for index entries.
845 846
846 847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
847 848 # are flags.
848 849 def start(self, rev):
849 850 return int(self.index[rev][0] >> 16)
850 851
851 852 def flags(self, rev):
852 853 return self.index[rev][0] & 0xFFFF
853 854
854 855 def length(self, rev):
855 856 return self.index[rev][1]
856 857
857 858 def sidedata_length(self, rev):
858 859 if self.version & 0xFFFF != REVLOGV2:
859 860 return 0
860 861 return self.index[rev][9]
861 862
862 863 def rawsize(self, rev):
863 864 """return the length of the uncompressed text for a given revision"""
864 865 l = self.index[rev][2]
865 866 if l >= 0:
866 867 return l
867 868
868 869 t = self.rawdata(rev)
869 870 return len(t)
870 871
871 872 def size(self, rev):
872 873 """length of non-raw text (processed by a "read" flag processor)"""
873 874 # fast path: if no "read" flag processor could change the content,
874 875 # size is rawsize. note: ELLIPSIS is known to not change the content.
875 876 flags = self.flags(rev)
876 877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
877 878 return self.rawsize(rev)
878 879
879 880 return len(self.revision(rev, raw=False))
880 881
881 882 def chainbase(self, rev):
882 883 base = self._chainbasecache.get(rev)
883 884 if base is not None:
884 885 return base
885 886
886 887 index = self.index
887 888 iterrev = rev
888 889 base = index[iterrev][3]
889 890 while base != iterrev:
890 891 iterrev = base
891 892 base = index[iterrev][3]
892 893
893 894 self._chainbasecache[rev] = base
894 895 return base
895 896
896 897 def linkrev(self, rev):
897 898 return self.index[rev][4]
898 899
899 900 def parentrevs(self, rev):
900 901 try:
901 902 entry = self.index[rev]
902 903 except IndexError:
903 904 if rev == wdirrev:
904 905 raise error.WdirUnsupported
905 906 raise
906 907
907 908 return entry[5], entry[6]
908 909
909 910 # fast parentrevs(rev) where rev isn't filtered
910 911 _uncheckedparentrevs = parentrevs
911 912
912 913 def node(self, rev):
913 914 try:
914 915 return self.index[rev][7]
915 916 except IndexError:
916 917 if rev == wdirrev:
917 918 raise error.WdirUnsupported
918 919 raise
919 920
920 921 # Derived from index values.
921 922
922 923 def end(self, rev):
923 924 return self.start(rev) + self.length(rev)
924 925
925 926 def parents(self, node):
926 927 i = self.index
927 928 d = i[self.rev(node)]
928 929 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
929 930
930 931 def chainlen(self, rev):
931 932 return self._chaininfo(rev)[0]
932 933
933 934 def _chaininfo(self, rev):
934 935 chaininfocache = self._chaininfocache
935 936 if rev in chaininfocache:
936 937 return chaininfocache[rev]
937 938 index = self.index
938 939 generaldelta = self._generaldelta
939 940 iterrev = rev
940 941 e = index[iterrev]
941 942 clen = 0
942 943 compresseddeltalen = 0
943 944 while iterrev != e[3]:
944 945 clen += 1
945 946 compresseddeltalen += e[1]
946 947 if generaldelta:
947 948 iterrev = e[3]
948 949 else:
949 950 iterrev -= 1
950 951 if iterrev in chaininfocache:
951 952 t = chaininfocache[iterrev]
952 953 clen += t[0]
953 954 compresseddeltalen += t[1]
954 955 break
955 956 e = index[iterrev]
956 957 else:
957 958 # Add text length of base since decompressing that also takes
958 959 # work. For cache hits the length is already included.
959 960 compresseddeltalen += e[1]
960 961 r = (clen, compresseddeltalen)
961 962 chaininfocache[rev] = r
962 963 return r
963 964
964 965 def _deltachain(self, rev, stoprev=None):
965 966 """Obtain the delta chain for a revision.
966 967
967 968 ``stoprev`` specifies a revision to stop at. If not specified, we
968 969 stop at the base of the chain.
969 970
970 971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
971 972 revs in ascending order and ``stopped`` is a bool indicating whether
972 973 ``stoprev`` was hit.
973 974 """
974 975 # Try C implementation.
975 976 try:
976 977 return self.index.deltachain(rev, stoprev, self._generaldelta)
977 978 except AttributeError:
978 979 pass
979 980
980 981 chain = []
981 982
982 983 # Alias to prevent attribute lookup in tight loop.
983 984 index = self.index
984 985 generaldelta = self._generaldelta
985 986
986 987 iterrev = rev
987 988 e = index[iterrev]
988 989 while iterrev != e[3] and iterrev != stoprev:
989 990 chain.append(iterrev)
990 991 if generaldelta:
991 992 iterrev = e[3]
992 993 else:
993 994 iterrev -= 1
994 995 e = index[iterrev]
995 996
996 997 if iterrev == stoprev:
997 998 stopped = True
998 999 else:
999 1000 chain.append(iterrev)
1000 1001 stopped = False
1001 1002
1002 1003 chain.reverse()
1003 1004 return chain, stopped
1004 1005
1005 1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1006 1007 """Generate the ancestors of 'revs' in reverse revision order.
1007 1008 Does not generate revs lower than stoprev.
1008 1009
1009 1010 See the documentation for ancestor.lazyancestors for more details."""
1010 1011
1011 1012 # first, make sure start revisions aren't filtered
1012 1013 revs = list(revs)
1013 1014 checkrev = self.node
1014 1015 for r in revs:
1015 1016 checkrev(r)
1016 1017 # and we're sure ancestors aren't filtered as well
1017 1018
1018 1019 if rustancestor is not None:
1019 1020 lazyancestors = rustancestor.LazyAncestors
1020 1021 arg = self.index
1021 1022 else:
1022 1023 lazyancestors = ancestor.lazyancestors
1023 1024 arg = self._uncheckedparentrevs
1024 1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1025 1026
1026 1027 def descendants(self, revs):
1027 1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1028 1029
1029 1030 def findcommonmissing(self, common=None, heads=None):
1030 1031 """Return a tuple of the ancestors of common and the ancestors of heads
1031 1032 that are not ancestors of common. In revset terminology, we return the
1032 1033 tuple:
1033 1034
1034 1035 ::common, (::heads) - (::common)
1035 1036
1036 1037 The list is sorted by revision number, meaning it is
1037 1038 topologically sorted.
1038 1039
1039 1040 'heads' and 'common' are both lists of node IDs. If heads is
1040 1041 not supplied, uses all of the revlog's heads. If common is not
1041 1042 supplied, uses nullid."""
1042 1043 if common is None:
1043 1044 common = [nullid]
1044 1045 if heads is None:
1045 1046 heads = self.heads()
1046 1047
1047 1048 common = [self.rev(n) for n in common]
1048 1049 heads = [self.rev(n) for n in heads]
1049 1050
1050 1051 # we want the ancestors, but inclusive
1051 1052 class lazyset(object):
1052 1053 def __init__(self, lazyvalues):
1053 1054 self.addedvalues = set()
1054 1055 self.lazyvalues = lazyvalues
1055 1056
1056 1057 def __contains__(self, value):
1057 1058 return value in self.addedvalues or value in self.lazyvalues
1058 1059
1059 1060 def __iter__(self):
1060 1061 added = self.addedvalues
1061 1062 for r in added:
1062 1063 yield r
1063 1064 for r in self.lazyvalues:
1064 1065 if not r in added:
1065 1066 yield r
1066 1067
1067 1068 def add(self, value):
1068 1069 self.addedvalues.add(value)
1069 1070
1070 1071 def update(self, values):
1071 1072 self.addedvalues.update(values)
1072 1073
1073 1074 has = lazyset(self.ancestors(common))
1074 1075 has.add(nullrev)
1075 1076 has.update(common)
1076 1077
1077 1078 # take all ancestors from heads that aren't in has
1078 1079 missing = set()
1079 1080 visit = collections.deque(r for r in heads if r not in has)
1080 1081 while visit:
1081 1082 r = visit.popleft()
1082 1083 if r in missing:
1083 1084 continue
1084 1085 else:
1085 1086 missing.add(r)
1086 1087 for p in self.parentrevs(r):
1087 1088 if p not in has:
1088 1089 visit.append(p)
1089 1090 missing = list(missing)
1090 1091 missing.sort()
1091 1092 return has, [self.node(miss) for miss in missing]
1092 1093
1093 1094 def incrementalmissingrevs(self, common=None):
1094 1095 """Return an object that can be used to incrementally compute the
1095 1096 revision numbers of the ancestors of arbitrary sets that are not
1096 1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1097 1098 object.
1098 1099
1099 1100 'common' is a list of revision numbers. If common is not supplied, uses
1100 1101 nullrev.
1101 1102 """
1102 1103 if common is None:
1103 1104 common = [nullrev]
1104 1105
1105 1106 if rustancestor is not None:
1106 1107 return rustancestor.MissingAncestors(self.index, common)
1107 1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1108 1109
1109 1110 def findmissingrevs(self, common=None, heads=None):
1110 1111 """Return the revision numbers of the ancestors of heads that
1111 1112 are not ancestors of common.
1112 1113
1113 1114 More specifically, return a list of revision numbers corresponding to
1114 1115 nodes N such that every N satisfies the following constraints:
1115 1116
1116 1117 1. N is an ancestor of some node in 'heads'
1117 1118 2. N is not an ancestor of any node in 'common'
1118 1119
1119 1120 The list is sorted by revision number, meaning it is
1120 1121 topologically sorted.
1121 1122
1122 1123 'heads' and 'common' are both lists of revision numbers. If heads is
1123 1124 not supplied, uses all of the revlog's heads. If common is not
1124 1125 supplied, uses nullid."""
1125 1126 if common is None:
1126 1127 common = [nullrev]
1127 1128 if heads is None:
1128 1129 heads = self.headrevs()
1129 1130
1130 1131 inc = self.incrementalmissingrevs(common=common)
1131 1132 return inc.missingancestors(heads)
1132 1133
1133 1134 def findmissing(self, common=None, heads=None):
1134 1135 """Return the ancestors of heads that are not ancestors of common.
1135 1136
1136 1137 More specifically, return a list of nodes N such that every N
1137 1138 satisfies the following constraints:
1138 1139
1139 1140 1. N is an ancestor of some node in 'heads'
1140 1141 2. N is not an ancestor of any node in 'common'
1141 1142
1142 1143 The list is sorted by revision number, meaning it is
1143 1144 topologically sorted.
1144 1145
1145 1146 'heads' and 'common' are both lists of node IDs. If heads is
1146 1147 not supplied, uses all of the revlog's heads. If common is not
1147 1148 supplied, uses nullid."""
1148 1149 if common is None:
1149 1150 common = [nullid]
1150 1151 if heads is None:
1151 1152 heads = self.heads()
1152 1153
1153 1154 common = [self.rev(n) for n in common]
1154 1155 heads = [self.rev(n) for n in heads]
1155 1156
1156 1157 inc = self.incrementalmissingrevs(common=common)
1157 1158 return [self.node(r) for r in inc.missingancestors(heads)]
1158 1159
1159 1160 def nodesbetween(self, roots=None, heads=None):
1160 1161 """Return a topological path from 'roots' to 'heads'.
1161 1162
1162 1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1163 1164 topologically sorted list of all nodes N that satisfy both of
1164 1165 these constraints:
1165 1166
1166 1167 1. N is a descendant of some node in 'roots'
1167 1168 2. N is an ancestor of some node in 'heads'
1168 1169
1169 1170 Every node is considered to be both a descendant and an ancestor
1170 1171 of itself, so every reachable node in 'roots' and 'heads' will be
1171 1172 included in 'nodes'.
1172 1173
1173 1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1174 1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1175 1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1176 1177
1177 1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1178 1179 unspecified, uses nullid as the only root. If 'heads' is
1179 1180 unspecified, uses list of all of the revlog's heads."""
1180 1181 nonodes = ([], [], [])
1181 1182 if roots is not None:
1182 1183 roots = list(roots)
1183 1184 if not roots:
1184 1185 return nonodes
1185 1186 lowestrev = min([self.rev(n) for n in roots])
1186 1187 else:
1187 1188 roots = [nullid] # Everybody's a descendant of nullid
1188 1189 lowestrev = nullrev
1189 1190 if (lowestrev == nullrev) and (heads is None):
1190 1191 # We want _all_ the nodes!
1191 1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1192 1193 if heads is None:
1193 1194 # All nodes are ancestors, so the latest ancestor is the last
1194 1195 # node.
1195 1196 highestrev = len(self) - 1
1196 1197 # Set ancestors to None to signal that every node is an ancestor.
1197 1198 ancestors = None
1198 1199 # Set heads to an empty dictionary for later discovery of heads
1199 1200 heads = {}
1200 1201 else:
1201 1202 heads = list(heads)
1202 1203 if not heads:
1203 1204 return nonodes
1204 1205 ancestors = set()
1205 1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1206 1207 # Also, later we will be using it to filter out the heads we can't
1207 1208 # find from roots.
1208 1209 heads = dict.fromkeys(heads, False)
1209 1210 # Start at the top and keep marking parents until we're done.
1210 1211 nodestotag = set(heads)
1211 1212 # Remember where the top was so we can use it as a limit later.
1212 1213 highestrev = max([self.rev(n) for n in nodestotag])
1213 1214 while nodestotag:
1214 1215 # grab a node to tag
1215 1216 n = nodestotag.pop()
1216 1217 # Never tag nullid
1217 1218 if n == nullid:
1218 1219 continue
1219 1220 # A node's revision number represents its place in a
1220 1221 # topologically sorted list of nodes.
1221 1222 r = self.rev(n)
1222 1223 if r >= lowestrev:
1223 1224 if n not in ancestors:
1224 1225 # If we are possibly a descendant of one of the roots
1225 1226 # and we haven't already been marked as an ancestor
1226 1227 ancestors.add(n) # Mark as ancestor
1227 1228 # Add non-nullid parents to list of nodes to tag.
1228 1229 nodestotag.update(
1229 1230 [p for p in self.parents(n) if p != nullid]
1230 1231 )
1231 1232 elif n in heads: # We've seen it before, is it a fake head?
1232 1233 # So it is, real heads should not be the ancestors of
1233 1234 # any other heads.
1234 1235 heads.pop(n)
1235 1236 if not ancestors:
1236 1237 return nonodes
1237 1238 # Now that we have our set of ancestors, we want to remove any
1238 1239 # roots that are not ancestors.
1239 1240
1240 1241 # If one of the roots was nullid, everything is included anyway.
1241 1242 if lowestrev > nullrev:
1242 1243 # But, since we weren't, let's recompute the lowest rev to not
1243 1244 # include roots that aren't ancestors.
1244 1245
1245 1246 # Filter out roots that aren't ancestors of heads
1246 1247 roots = [root for root in roots if root in ancestors]
1247 1248 # Recompute the lowest revision
1248 1249 if roots:
1249 1250 lowestrev = min([self.rev(root) for root in roots])
1250 1251 else:
1251 1252 # No more roots? Return empty list
1252 1253 return nonodes
1253 1254 else:
1254 1255 # We are descending from nullid, and don't need to care about
1255 1256 # any other roots.
1256 1257 lowestrev = nullrev
1257 1258 roots = [nullid]
1258 1259 # Transform our roots list into a set.
1259 1260 descendants = set(roots)
1260 1261 # Also, keep the original roots so we can filter out roots that aren't
1261 1262 # 'real' roots (i.e. are descended from other roots).
1262 1263 roots = descendants.copy()
1263 1264 # Our topologically sorted list of output nodes.
1264 1265 orderedout = []
1265 1266 # Don't start at nullid since we don't want nullid in our output list,
1266 1267 # and if nullid shows up in descendants, empty parents will look like
1267 1268 # they're descendants.
1268 1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1269 1270 n = self.node(r)
1270 1271 isdescendant = False
1271 1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1272 1273 isdescendant = True
1273 1274 elif n in descendants:
1274 1275 # n is already a descendant
1275 1276 isdescendant = True
1276 1277 # This check only needs to be done here because all the roots
1277 1278 # will start being marked is descendants before the loop.
1278 1279 if n in roots:
1279 1280 # If n was a root, check if it's a 'real' root.
1280 1281 p = tuple(self.parents(n))
1281 1282 # If any of its parents are descendants, it's not a root.
1282 1283 if (p[0] in descendants) or (p[1] in descendants):
1283 1284 roots.remove(n)
1284 1285 else:
1285 1286 p = tuple(self.parents(n))
1286 1287 # A node is a descendant if either of its parents are
1287 1288 # descendants. (We seeded the dependents list with the roots
1288 1289 # up there, remember?)
1289 1290 if (p[0] in descendants) or (p[1] in descendants):
1290 1291 descendants.add(n)
1291 1292 isdescendant = True
1292 1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1293 1294 # Only include nodes that are both descendants and ancestors.
1294 1295 orderedout.append(n)
1295 1296 if (ancestors is not None) and (n in heads):
1296 1297 # We're trying to figure out which heads are reachable
1297 1298 # from roots.
1298 1299 # Mark this head as having been reached
1299 1300 heads[n] = True
1300 1301 elif ancestors is None:
1301 1302 # Otherwise, we're trying to discover the heads.
1302 1303 # Assume this is a head because if it isn't, the next step
1303 1304 # will eventually remove it.
1304 1305 heads[n] = True
1305 1306 # But, obviously its parents aren't.
1306 1307 for p in self.parents(n):
1307 1308 heads.pop(p, None)
1308 1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1309 1310 roots = list(roots)
1310 1311 assert orderedout
1311 1312 assert roots
1312 1313 assert heads
1313 1314 return (orderedout, roots, heads)
1314 1315
1315 1316 def headrevs(self, revs=None):
1316 1317 if revs is None:
1317 1318 try:
1318 1319 return self.index.headrevs()
1319 1320 except AttributeError:
1320 1321 return self._headrevs()
1321 1322 if rustdagop is not None:
1322 1323 return rustdagop.headrevs(self.index, revs)
1323 1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1324 1325
1325 1326 def computephases(self, roots):
1326 1327 return self.index.computephasesmapsets(roots)
1327 1328
1328 1329 def _headrevs(self):
1329 1330 count = len(self)
1330 1331 if not count:
1331 1332 return [nullrev]
1332 1333 # we won't iter over filtered rev so nobody is a head at start
1333 1334 ishead = [0] * (count + 1)
1334 1335 index = self.index
1335 1336 for r in self:
1336 1337 ishead[r] = 1 # I may be an head
1337 1338 e = index[r]
1338 1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1339 1340 return [r for r, val in enumerate(ishead) if val]
1340 1341
1341 1342 def heads(self, start=None, stop=None):
1342 1343 """return the list of all nodes that have no children
1343 1344
1344 1345 if start is specified, only heads that are descendants of
1345 1346 start will be returned
1346 1347 if stop is specified, it will consider all the revs from stop
1347 1348 as if they had no children
1348 1349 """
1349 1350 if start is None and stop is None:
1350 1351 if not len(self):
1351 1352 return [nullid]
1352 1353 return [self.node(r) for r in self.headrevs()]
1353 1354
1354 1355 if start is None:
1355 1356 start = nullrev
1356 1357 else:
1357 1358 start = self.rev(start)
1358 1359
1359 1360 stoprevs = {self.rev(n) for n in stop or []}
1360 1361
1361 1362 revs = dagop.headrevssubset(
1362 1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1363 1364 )
1364 1365
1365 1366 return [self.node(rev) for rev in revs]
1366 1367
1367 1368 def children(self, node):
1368 1369 """find the children of a given node"""
1369 1370 c = []
1370 1371 p = self.rev(node)
1371 1372 for r in self.revs(start=p + 1):
1372 1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1373 1374 if prevs:
1374 1375 for pr in prevs:
1375 1376 if pr == p:
1376 1377 c.append(self.node(r))
1377 1378 elif p == nullrev:
1378 1379 c.append(self.node(r))
1379 1380 return c
1380 1381
1381 1382 def commonancestorsheads(self, a, b):
1382 1383 """calculate all the heads of the common ancestors of nodes a and b"""
1383 1384 a, b = self.rev(a), self.rev(b)
1384 1385 ancs = self._commonancestorsheads(a, b)
1385 1386 return pycompat.maplist(self.node, ancs)
1386 1387
1387 1388 def _commonancestorsheads(self, *revs):
1388 1389 """calculate all the heads of the common ancestors of revs"""
1389 1390 try:
1390 1391 ancs = self.index.commonancestorsheads(*revs)
1391 1392 except (AttributeError, OverflowError): # C implementation failed
1392 1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1393 1394 return ancs
1394 1395
1395 1396 def isancestor(self, a, b):
1396 1397 """return True if node a is an ancestor of node b
1397 1398
1398 1399 A revision is considered an ancestor of itself."""
1399 1400 a, b = self.rev(a), self.rev(b)
1400 1401 return self.isancestorrev(a, b)
1401 1402
1402 1403 def isancestorrev(self, a, b):
1403 1404 """return True if revision a is an ancestor of revision b
1404 1405
1405 1406 A revision is considered an ancestor of itself.
1406 1407
1407 1408 The implementation of this is trivial but the use of
1408 1409 reachableroots is not."""
1409 1410 if a == nullrev:
1410 1411 return True
1411 1412 elif a == b:
1412 1413 return True
1413 1414 elif a > b:
1414 1415 return False
1415 1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1416 1417
1417 1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1418 1419 """return (heads(::(<roots> and <roots>::<heads>)))
1419 1420
1420 1421 If includepath is True, return (<roots>::<heads>)."""
1421 1422 try:
1422 1423 return self.index.reachableroots2(
1423 1424 minroot, heads, roots, includepath
1424 1425 )
1425 1426 except AttributeError:
1426 1427 return dagop._reachablerootspure(
1427 1428 self.parentrevs, minroot, roots, heads, includepath
1428 1429 )
1429 1430
1430 1431 def ancestor(self, a, b):
1431 1432 """calculate the "best" common ancestor of nodes a and b"""
1432 1433
1433 1434 a, b = self.rev(a), self.rev(b)
1434 1435 try:
1435 1436 ancs = self.index.ancestors(a, b)
1436 1437 except (AttributeError, OverflowError):
1437 1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1438 1439 if ancs:
1439 1440 # choose a consistent winner when there's a tie
1440 1441 return min(map(self.node, ancs))
1441 1442 return nullid
1442 1443
1443 1444 def _match(self, id):
1444 1445 if isinstance(id, int):
1445 1446 # rev
1446 1447 return self.node(id)
1447 1448 if len(id) == 20:
1448 1449 # possibly a binary node
1449 1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1450 1451 try:
1451 1452 node = id
1452 1453 self.rev(node) # quick search the index
1453 1454 return node
1454 1455 except error.LookupError:
1455 1456 pass # may be partial hex id
1456 1457 try:
1457 1458 # str(rev)
1458 1459 rev = int(id)
1459 1460 if b"%d" % rev != id:
1460 1461 raise ValueError
1461 1462 if rev < 0:
1462 1463 rev = len(self) + rev
1463 1464 if rev < 0 or rev >= len(self):
1464 1465 raise ValueError
1465 1466 return self.node(rev)
1466 1467 except (ValueError, OverflowError):
1467 1468 pass
1468 1469 if len(id) == 40:
1469 1470 try:
1470 1471 # a full hex nodeid?
1471 1472 node = bin(id)
1472 1473 self.rev(node)
1473 1474 return node
1474 1475 except (TypeError, error.LookupError):
1475 1476 pass
1476 1477
1477 1478 def _partialmatch(self, id):
1478 1479 # we don't care wdirfilenodeids as they should be always full hash
1479 1480 maybewdir = wdirhex.startswith(id)
1480 1481 try:
1481 1482 partial = self.index.partialmatch(id)
1482 1483 if partial and self.hasnode(partial):
1483 1484 if maybewdir:
1484 1485 # single 'ff...' match in radix tree, ambiguous with wdir
1485 1486 raise error.RevlogError
1486 1487 return partial
1487 1488 if maybewdir:
1488 1489 # no 'ff...' match in radix tree, wdir identified
1489 1490 raise error.WdirUnsupported
1490 1491 return None
1491 1492 except error.RevlogError:
1492 1493 # parsers.c radix tree lookup gave multiple matches
1493 1494 # fast path: for unfiltered changelog, radix tree is accurate
1494 1495 if not getattr(self, 'filteredrevs', None):
1495 1496 raise error.AmbiguousPrefixLookupError(
1496 1497 id, self.indexfile, _(b'ambiguous identifier')
1497 1498 )
1498 1499 # fall through to slow path that filters hidden revisions
1499 1500 except (AttributeError, ValueError):
1500 1501 # we are pure python, or key was too short to search radix tree
1501 1502 pass
1502 1503
1503 1504 if id in self._pcache:
1504 1505 return self._pcache[id]
1505 1506
1506 1507 if len(id) <= 40:
1507 1508 try:
1508 1509 # hex(node)[:...]
1509 1510 l = len(id) // 2 # grab an even number of digits
1510 1511 prefix = bin(id[: l * 2])
1511 1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1512 1513 nl = [
1513 1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1514 1515 ]
1515 1516 if nullhex.startswith(id):
1516 1517 nl.append(nullid)
1517 1518 if len(nl) > 0:
1518 1519 if len(nl) == 1 and not maybewdir:
1519 1520 self._pcache[id] = nl[0]
1520 1521 return nl[0]
1521 1522 raise error.AmbiguousPrefixLookupError(
1522 1523 id, self.indexfile, _(b'ambiguous identifier')
1523 1524 )
1524 1525 if maybewdir:
1525 1526 raise error.WdirUnsupported
1526 1527 return None
1527 1528 except TypeError:
1528 1529 pass
1529 1530
1530 1531 def lookup(self, id):
1531 1532 """locate a node based on:
1532 1533 - revision number or str(revision number)
1533 1534 - nodeid or subset of hex nodeid
1534 1535 """
1535 1536 n = self._match(id)
1536 1537 if n is not None:
1537 1538 return n
1538 1539 n = self._partialmatch(id)
1539 1540 if n:
1540 1541 return n
1541 1542
1542 1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1543 1544
1544 1545 def shortest(self, node, minlength=1):
1545 1546 """Find the shortest unambiguous prefix that matches node."""
1546 1547
1547 1548 def isvalid(prefix):
1548 1549 try:
1549 1550 matchednode = self._partialmatch(prefix)
1550 1551 except error.AmbiguousPrefixLookupError:
1551 1552 return False
1552 1553 except error.WdirUnsupported:
1553 1554 # single 'ff...' match
1554 1555 return True
1555 1556 if matchednode is None:
1556 1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1557 1558 return True
1558 1559
1559 1560 def maybewdir(prefix):
1560 1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1561 1562
1562 1563 hexnode = hex(node)
1563 1564
1564 1565 def disambiguate(hexnode, minlength):
1565 1566 """Disambiguate against wdirid."""
1566 1567 for length in range(minlength, len(hexnode) + 1):
1567 1568 prefix = hexnode[:length]
1568 1569 if not maybewdir(prefix):
1569 1570 return prefix
1570 1571
1571 1572 if not getattr(self, 'filteredrevs', None):
1572 1573 try:
1573 1574 length = max(self.index.shortest(node), minlength)
1574 1575 return disambiguate(hexnode, length)
1575 1576 except error.RevlogError:
1576 1577 if node != wdirid:
1577 1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1578 1579 except AttributeError:
1579 1580 # Fall through to pure code
1580 1581 pass
1581 1582
1582 1583 if node == wdirid:
1583 1584 for length in range(minlength, len(hexnode) + 1):
1584 1585 prefix = hexnode[:length]
1585 1586 if isvalid(prefix):
1586 1587 return prefix
1587 1588
1588 1589 for length in range(minlength, len(hexnode) + 1):
1589 1590 prefix = hexnode[:length]
1590 1591 if isvalid(prefix):
1591 1592 return disambiguate(hexnode, length)
1592 1593
1593 1594 def cmp(self, node, text):
1594 1595 """compare text with a given file revision
1595 1596
1596 1597 returns True if text is different than what is stored.
1597 1598 """
1598 1599 p1, p2 = self.parents(node)
1599 1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1600 1601
1601 1602 def _cachesegment(self, offset, data):
1602 1603 """Add a segment to the revlog cache.
1603 1604
1604 1605 Accepts an absolute offset and the data that is at that location.
1605 1606 """
1606 1607 o, d = self._chunkcache
1607 1608 # try to add to existing cache
1608 1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1609 1610 self._chunkcache = o, d + data
1610 1611 else:
1611 1612 self._chunkcache = offset, data
1612 1613
1613 1614 def _readsegment(self, offset, length, df=None):
1614 1615 """Load a segment of raw data from the revlog.
1615 1616
1616 1617 Accepts an absolute offset, length to read, and an optional existing
1617 1618 file handle to read from.
1618 1619
1619 1620 If an existing file handle is passed, it will be seeked and the
1620 1621 original seek position will NOT be restored.
1621 1622
1622 1623 Returns a str or buffer of raw byte data.
1623 1624
1624 1625 Raises if the requested number of bytes could not be read.
1625 1626 """
1626 1627 # Cache data both forward and backward around the requested
1627 1628 # data, in a fixed size window. This helps speed up operations
1628 1629 # involving reading the revlog backwards.
1629 1630 cachesize = self._chunkcachesize
1630 1631 realoffset = offset & ~(cachesize - 1)
1631 1632 reallength = (
1632 1633 (offset + length + cachesize) & ~(cachesize - 1)
1633 1634 ) - realoffset
1634 1635 with self._datareadfp(df) as df:
1635 1636 df.seek(realoffset)
1636 1637 d = df.read(reallength)
1637 1638
1638 1639 self._cachesegment(realoffset, d)
1639 1640 if offset != realoffset or reallength != length:
1640 1641 startoffset = offset - realoffset
1641 1642 if len(d) - startoffset < length:
1642 1643 raise error.RevlogError(
1643 1644 _(
1644 1645 b'partial read of revlog %s; expected %d bytes from '
1645 1646 b'offset %d, got %d'
1646 1647 )
1647 1648 % (
1648 1649 self.indexfile if self._inline else self.datafile,
1649 1650 length,
1650 1651 realoffset,
1651 1652 len(d) - startoffset,
1652 1653 )
1653 1654 )
1654 1655
1655 1656 return util.buffer(d, startoffset, length)
1656 1657
1657 1658 if len(d) < length:
1658 1659 raise error.RevlogError(
1659 1660 _(
1660 1661 b'partial read of revlog %s; expected %d bytes from offset '
1661 1662 b'%d, got %d'
1662 1663 )
1663 1664 % (
1664 1665 self.indexfile if self._inline else self.datafile,
1665 1666 length,
1666 1667 offset,
1667 1668 len(d),
1668 1669 )
1669 1670 )
1670 1671
1671 1672 return d
1672 1673
1673 1674 def _getsegment(self, offset, length, df=None):
1674 1675 """Obtain a segment of raw data from the revlog.
1675 1676
1676 1677 Accepts an absolute offset, length of bytes to obtain, and an
1677 1678 optional file handle to the already-opened revlog. If the file
1678 1679 handle is used, it's original seek position will not be preserved.
1679 1680
1680 1681 Requests for data may be returned from a cache.
1681 1682
1682 1683 Returns a str or a buffer instance of raw byte data.
1683 1684 """
1684 1685 o, d = self._chunkcache
1685 1686 l = len(d)
1686 1687
1687 1688 # is it in the cache?
1688 1689 cachestart = offset - o
1689 1690 cacheend = cachestart + length
1690 1691 if cachestart >= 0 and cacheend <= l:
1691 1692 if cachestart == 0 and cacheend == l:
1692 1693 return d # avoid a copy
1693 1694 return util.buffer(d, cachestart, cacheend - cachestart)
1694 1695
1695 1696 return self._readsegment(offset, length, df=df)
1696 1697
1697 1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1698 1699 """Obtain a segment of raw data corresponding to a range of revisions.
1699 1700
1700 1701 Accepts the start and end revisions and an optional already-open
1701 1702 file handle to be used for reading. If the file handle is read, its
1702 1703 seek position will not be preserved.
1703 1704
1704 1705 Requests for data may be satisfied by a cache.
1705 1706
1706 1707 Returns a 2-tuple of (offset, data) for the requested range of
1707 1708 revisions. Offset is the integer offset from the beginning of the
1708 1709 revlog and data is a str or buffer of the raw byte data.
1709 1710
1710 1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1711 1712 to determine where each revision's data begins and ends.
1712 1713 """
1713 1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1714 1715 # (functions are expensive).
1715 1716 index = self.index
1716 1717 istart = index[startrev]
1717 1718 start = int(istart[0] >> 16)
1718 1719 if startrev == endrev:
1719 1720 end = start + istart[1]
1720 1721 else:
1721 1722 iend = index[endrev]
1722 1723 end = int(iend[0] >> 16) + iend[1]
1723 1724
1724 1725 if self._inline:
1725 1726 start += (startrev + 1) * self._io.size
1726 1727 end += (endrev + 1) * self._io.size
1727 1728 length = end - start
1728 1729
1729 1730 return start, self._getsegment(start, length, df=df)
1730 1731
1731 1732 def _chunk(self, rev, df=None):
1732 1733 """Obtain a single decompressed chunk for a revision.
1733 1734
1734 1735 Accepts an integer revision and an optional already-open file handle
1735 1736 to be used for reading. If used, the seek position of the file will not
1736 1737 be preserved.
1737 1738
1738 1739 Returns a str holding uncompressed data for the requested revision.
1739 1740 """
1740 1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1741 1742
1742 1743 def _chunks(self, revs, df=None, targetsize=None):
1743 1744 """Obtain decompressed chunks for the specified revisions.
1744 1745
1745 1746 Accepts an iterable of numeric revisions that are assumed to be in
1746 1747 ascending order. Also accepts an optional already-open file handle
1747 1748 to be used for reading. If used, the seek position of the file will
1748 1749 not be preserved.
1749 1750
1750 1751 This function is similar to calling ``self._chunk()`` multiple times,
1751 1752 but is faster.
1752 1753
1753 1754 Returns a list with decompressed data for each requested revision.
1754 1755 """
1755 1756 if not revs:
1756 1757 return []
1757 1758 start = self.start
1758 1759 length = self.length
1759 1760 inline = self._inline
1760 1761 iosize = self._io.size
1761 1762 buffer = util.buffer
1762 1763
1763 1764 l = []
1764 1765 ladd = l.append
1765 1766
1766 1767 if not self._withsparseread:
1767 1768 slicedchunks = (revs,)
1768 1769 else:
1769 1770 slicedchunks = deltautil.slicechunk(
1770 1771 self, revs, targetsize=targetsize
1771 1772 )
1772 1773
1773 1774 for revschunk in slicedchunks:
1774 1775 firstrev = revschunk[0]
1775 1776 # Skip trailing revisions with empty diff
1776 1777 for lastrev in revschunk[::-1]:
1777 1778 if length(lastrev) != 0:
1778 1779 break
1779 1780
1780 1781 try:
1781 1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1782 1783 except OverflowError:
1783 1784 # issue4215 - we can't cache a run of chunks greater than
1784 1785 # 2G on Windows
1785 1786 return [self._chunk(rev, df=df) for rev in revschunk]
1786 1787
1787 1788 decomp = self.decompress
1788 1789 for rev in revschunk:
1789 1790 chunkstart = start(rev)
1790 1791 if inline:
1791 1792 chunkstart += (rev + 1) * iosize
1792 1793 chunklength = length(rev)
1793 1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1794 1795
1795 1796 return l
1796 1797
1797 1798 def _chunkclear(self):
1798 1799 """Clear the raw chunk cache."""
1799 1800 self._chunkcache = (0, b'')
1800 1801
1801 1802 def deltaparent(self, rev):
1802 1803 """return deltaparent of the given revision"""
1803 1804 base = self.index[rev][3]
1804 1805 if base == rev:
1805 1806 return nullrev
1806 1807 elif self._generaldelta:
1807 1808 return base
1808 1809 else:
1809 1810 return rev - 1
1810 1811
1811 1812 def issnapshot(self, rev):
1812 1813 """tells whether rev is a snapshot"""
1813 1814 if not self._sparserevlog:
1814 1815 return self.deltaparent(rev) == nullrev
1815 1816 elif util.safehasattr(self.index, b'issnapshot'):
1816 1817 # directly assign the method to cache the testing and access
1817 1818 self.issnapshot = self.index.issnapshot
1818 1819 return self.issnapshot(rev)
1819 1820 if rev == nullrev:
1820 1821 return True
1821 1822 entry = self.index[rev]
1822 1823 base = entry[3]
1823 1824 if base == rev:
1824 1825 return True
1825 1826 if base == nullrev:
1826 1827 return True
1827 1828 p1 = entry[5]
1828 1829 p2 = entry[6]
1829 1830 if base == p1 or base == p2:
1830 1831 return False
1831 1832 return self.issnapshot(base)
1832 1833
1833 1834 def snapshotdepth(self, rev):
1834 1835 """number of snapshot in the chain before this one"""
1835 1836 if not self.issnapshot(rev):
1836 1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1837 1838 return len(self._deltachain(rev)[0]) - 1
1838 1839
1839 1840 def revdiff(self, rev1, rev2):
1840 1841 """return or calculate a delta between two revisions
1841 1842
1842 1843 The delta calculated is in binary form and is intended to be written to
1843 1844 revlog data directly. So this function needs raw revision data.
1844 1845 """
1845 1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1846 1847 return bytes(self._chunk(rev2))
1847 1848
1848 1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1849 1850
1850 1851 def _processflags(self, text, flags, operation, raw=False):
1851 1852 """deprecated entry point to access flag processors"""
1852 1853 msg = b'_processflag(...) use the specialized variant'
1853 1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1854 1855 if raw:
1855 1856 return text, flagutil.processflagsraw(self, text, flags)
1856 1857 elif operation == b'read':
1857 1858 return flagutil.processflagsread(self, text, flags)
1858 1859 else: # write operation
1859 1860 return flagutil.processflagswrite(self, text, flags)
1860 1861
1861 1862 def revision(self, nodeorrev, _df=None, raw=False):
1862 1863 """return an uncompressed revision of a given node or revision
1863 1864 number.
1864 1865
1865 1866 _df - an existing file handle to read from. (internal-only)
1866 1867 raw - an optional argument specifying if the revision data is to be
1867 1868 treated as raw data when applying flag transforms. 'raw' should be set
1868 1869 to True when generating changegroups or in debug commands.
1869 1870 """
1870 1871 if raw:
1871 1872 msg = (
1872 1873 b'revlog.revision(..., raw=True) is deprecated, '
1873 1874 b'use revlog.rawdata(...)'
1874 1875 )
1875 1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1876 1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1877 1878
1878 1879 def sidedata(self, nodeorrev, _df=None):
1879 1880 """a map of extra data related to the changeset but not part of the hash
1880 1881
1881 1882 This function currently return a dictionary. However, more advanced
1882 1883 mapping object will likely be used in the future for a more
1883 1884 efficient/lazy code.
1884 1885 """
1885 1886 return self._revisiondata(nodeorrev, _df)[1]
1886 1887
1887 1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1888 1889 # deal with <nodeorrev> argument type
1889 1890 if isinstance(nodeorrev, int):
1890 1891 rev = nodeorrev
1891 1892 node = self.node(rev)
1892 1893 else:
1893 1894 node = nodeorrev
1894 1895 rev = None
1895 1896
1896 1897 # fast path the special `nullid` rev
1897 1898 if node == nullid:
1898 1899 return b"", {}
1899 1900
1900 1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1901 1902 # revision or might need to be processed to retrieve the revision.
1902 1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1903 1904
1904 1905 if self.version & 0xFFFF == REVLOGV2:
1905 1906 if rev is None:
1906 1907 rev = self.rev(node)
1907 1908 sidedata = self._sidedata(rev)
1908 1909 else:
1909 1910 sidedata = {}
1910 1911
1911 1912 if raw and validated:
1912 1913 # if we don't want to process the raw text and that raw
1913 1914 # text is cached, we can exit early.
1914 1915 return rawtext, sidedata
1915 1916 if rev is None:
1916 1917 rev = self.rev(node)
1917 1918 # the revlog's flag for this revision
1918 1919 # (usually alter its state or content)
1919 1920 flags = self.flags(rev)
1920 1921
1921 1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1922 1923 # no extra flags set, no flag processor runs, text = rawtext
1923 1924 return rawtext, sidedata
1924 1925
1925 1926 if raw:
1926 1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1927 1928 text = rawtext
1928 1929 else:
1929 1930 r = flagutil.processflagsread(self, rawtext, flags)
1930 1931 text, validatehash = r
1931 1932 if validatehash:
1932 1933 self.checkhash(text, node, rev=rev)
1933 1934 if not validated:
1934 1935 self._revisioncache = (node, rev, rawtext)
1935 1936
1936 1937 return text, sidedata
1937 1938
1938 1939 def _rawtext(self, node, rev, _df=None):
1939 1940 """return the possibly unvalidated rawtext for a revision
1940 1941
1941 1942 returns (rev, rawtext, validated)
1942 1943 """
1943 1944
1944 1945 # revision in the cache (could be useful to apply delta)
1945 1946 cachedrev = None
1946 1947 # An intermediate text to apply deltas to
1947 1948 basetext = None
1948 1949
1949 1950 # Check if we have the entry in cache
1950 1951 # The cache entry looks like (node, rev, rawtext)
1951 1952 if self._revisioncache:
1952 1953 if self._revisioncache[0] == node:
1953 1954 return (rev, self._revisioncache[2], True)
1954 1955 cachedrev = self._revisioncache[1]
1955 1956
1956 1957 if rev is None:
1957 1958 rev = self.rev(node)
1958 1959
1959 1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1960 1961 if stopped:
1961 1962 basetext = self._revisioncache[2]
1962 1963
1963 1964 # drop cache to save memory, the caller is expected to
1964 1965 # update self._revisioncache after validating the text
1965 1966 self._revisioncache = None
1966 1967
1967 1968 targetsize = None
1968 1969 rawsize = self.index[rev][2]
1969 1970 if 0 <= rawsize:
1970 1971 targetsize = 4 * rawsize
1971 1972
1972 1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1973 1974 if basetext is None:
1974 1975 basetext = bytes(bins[0])
1975 1976 bins = bins[1:]
1976 1977
1977 1978 rawtext = mdiff.patches(basetext, bins)
1978 1979 del basetext # let us have a chance to free memory early
1979 1980 return (rev, rawtext, False)
1980 1981
1981 1982 def _sidedata(self, rev):
1982 1983 """Return the sidedata for a given revision number."""
1983 1984 index_entry = self.index[rev]
1984 1985 sidedata_offset = index_entry[8]
1985 1986 sidedata_size = index_entry[9]
1986 1987
1987 1988 if self._inline:
1988 1989 sidedata_offset += self._io.size * (1 + rev)
1989 1990 if sidedata_size == 0:
1990 1991 return {}
1991 1992
1992 1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1993 1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1994 1995 return sidedata
1995 1996
1996 1997 def rawdata(self, nodeorrev, _df=None):
1997 1998 """return an uncompressed raw data of a given node or revision number.
1998 1999
1999 2000 _df - an existing file handle to read from. (internal-only)
2000 2001 """
2001 2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2002 2003
2003 2004 def hash(self, text, p1, p2):
2004 2005 """Compute a node hash.
2005 2006
2006 2007 Available as a function so that subclasses can replace the hash
2007 2008 as needed.
2008 2009 """
2009 2010 return storageutil.hashrevisionsha1(text, p1, p2)
2010 2011
2011 2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2012 2013 """Check node hash integrity.
2013 2014
2014 2015 Available as a function so that subclasses can extend hash mismatch
2015 2016 behaviors as needed.
2016 2017 """
2017 2018 try:
2018 2019 if p1 is None and p2 is None:
2019 2020 p1, p2 = self.parents(node)
2020 2021 if node != self.hash(text, p1, p2):
2021 2022 # Clear the revision cache on hash failure. The revision cache
2022 2023 # only stores the raw revision and clearing the cache does have
2023 2024 # the side-effect that we won't have a cache hit when the raw
2024 2025 # revision data is accessed. But this case should be rare and
2025 2026 # it is extra work to teach the cache about the hash
2026 2027 # verification state.
2027 2028 if self._revisioncache and self._revisioncache[0] == node:
2028 2029 self._revisioncache = None
2029 2030
2030 2031 revornode = rev
2031 2032 if revornode is None:
2032 2033 revornode = templatefilters.short(hex(node))
2033 2034 raise error.RevlogError(
2034 2035 _(b"integrity check failed on %s:%s")
2035 2036 % (self.indexfile, pycompat.bytestr(revornode))
2036 2037 )
2037 2038 except error.RevlogError:
2038 2039 if self._censorable and storageutil.iscensoredtext(text):
2039 2040 raise error.CensoredNodeError(self.indexfile, node, text)
2040 2041 raise
2041 2042
2042 2043 def _enforceinlinesize(self, tr, fp=None):
2043 2044 """Check if the revlog is too big for inline and convert if so.
2044 2045
2045 2046 This should be called after revisions are added to the revlog. If the
2046 2047 revlog has grown too large to be an inline revlog, it will convert it
2047 2048 to use multiple index and data files.
2048 2049 """
2049 2050 tiprev = len(self) - 1
2050 2051 if (
2051 2052 not self._inline
2052 2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2053 2054 ):
2054 2055 return
2055 2056
2056 2057 troffset = tr.findoffset(self.indexfile)
2057 2058 if troffset is None:
2058 2059 raise error.RevlogError(
2059 2060 _(b"%s not found in the transaction") % self.indexfile
2060 2061 )
2061 2062 trindex = 0
2062 2063 tr.add(self.datafile, 0)
2063 2064
2064 2065 if fp:
2065 2066 fp.flush()
2066 2067 fp.close()
2067 2068 # We can't use the cached file handle after close(). So prevent
2068 2069 # its usage.
2069 2070 self._writinghandles = None
2070 2071
2071 2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2072 2073 for r in self:
2073 2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2074 2075 if troffset <= self.start(r):
2075 2076 trindex = r
2076 2077
2077 2078 with self._indexfp(b'w') as fp:
2078 2079 self.version &= ~FLAG_INLINE_DATA
2079 2080 self._inline = False
2080 2081 io = self._io
2081 2082 for i in self:
2082 2083 e = io.packentry(self.index[i], self.node, self.version, i)
2083 2084 fp.write(e)
2084 2085
2085 2086 # the temp file replace the real index when we exit the context
2086 2087 # manager
2087 2088
2088 2089 tr.replace(self.indexfile, trindex * self._io.size)
2089 2090 nodemaputil.setup_persistent_nodemap(tr, self)
2090 2091 self._chunkclear()
2091 2092
2092 2093 def _nodeduplicatecallback(self, transaction, node):
2093 2094 """called when trying to add a node already stored."""
2094 2095
2095 2096 def addrevision(
2096 2097 self,
2097 2098 text,
2098 2099 transaction,
2099 2100 link,
2100 2101 p1,
2101 2102 p2,
2102 2103 cachedelta=None,
2103 2104 node=None,
2104 2105 flags=REVIDX_DEFAULT_FLAGS,
2105 2106 deltacomputer=None,
2106 2107 sidedata=None,
2107 2108 ):
2108 2109 """add a revision to the log
2109 2110
2110 2111 text - the revision data to add
2111 2112 transaction - the transaction object used for rollback
2112 2113 link - the linkrev data to add
2113 2114 p1, p2 - the parent nodeids of the revision
2114 2115 cachedelta - an optional precomputed delta
2115 2116 node - nodeid of revision; typically node is not specified, and it is
2116 2117 computed by default as hash(text, p1, p2), however subclasses might
2117 2118 use different hashing method (and override checkhash() in such case)
2118 2119 flags - the known flags to set on the revision
2119 2120 deltacomputer - an optional deltacomputer instance shared between
2120 2121 multiple calls
2121 2122 """
2122 2123 if link == nullrev:
2123 2124 raise error.RevlogError(
2124 2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2125 2126 )
2126 2127
2127 2128 if sidedata is None:
2128 2129 sidedata = {}
2129 2130 elif not self.hassidedata:
2130 2131 raise error.ProgrammingError(
2131 2132 _(b"trying to add sidedata to a revlog who don't support them")
2132 2133 )
2133 2134
2134 2135 if flags:
2135 2136 node = node or self.hash(text, p1, p2)
2136 2137
2137 2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2138 2139
2139 2140 # If the flag processor modifies the revision data, ignore any provided
2140 2141 # cachedelta.
2141 2142 if rawtext != text:
2142 2143 cachedelta = None
2143 2144
2144 2145 if len(rawtext) > _maxentrysize:
2145 2146 raise error.RevlogError(
2146 2147 _(
2147 2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2148 2149 )
2149 2150 % (self.indexfile, len(rawtext))
2150 2151 )
2151 2152
2152 2153 node = node or self.hash(rawtext, p1, p2)
2153 2154 rev = self.index.get_rev(node)
2154 2155 if rev is not None:
2155 2156 return rev
2156 2157
2157 2158 if validatehash:
2158 2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2159 2160
2160 2161 return self.addrawrevision(
2161 2162 rawtext,
2162 2163 transaction,
2163 2164 link,
2164 2165 p1,
2165 2166 p2,
2166 2167 node,
2167 2168 flags,
2168 2169 cachedelta=cachedelta,
2169 2170 deltacomputer=deltacomputer,
2170 2171 sidedata=sidedata,
2171 2172 )
2172 2173
2173 2174 def addrawrevision(
2174 2175 self,
2175 2176 rawtext,
2176 2177 transaction,
2177 2178 link,
2178 2179 p1,
2179 2180 p2,
2180 2181 node,
2181 2182 flags,
2182 2183 cachedelta=None,
2183 2184 deltacomputer=None,
2184 2185 sidedata=None,
2185 2186 ):
2186 2187 """add a raw revision with known flags, node and parents
2187 2188 useful when reusing a revision not stored in this revlog (ex: received
2188 2189 over wire, or read from an external bundle).
2189 2190 """
2190 2191 dfh = None
2191 2192 if not self._inline:
2192 2193 dfh = self._datafp(b"a+")
2193 2194 ifh = self._indexfp(b"a+")
2194 2195 try:
2195 2196 return self._addrevision(
2196 2197 node,
2197 2198 rawtext,
2198 2199 transaction,
2199 2200 link,
2200 2201 p1,
2201 2202 p2,
2202 2203 flags,
2203 2204 cachedelta,
2204 2205 ifh,
2205 2206 dfh,
2206 2207 deltacomputer=deltacomputer,
2207 2208 sidedata=sidedata,
2208 2209 )
2209 2210 finally:
2210 2211 if dfh:
2211 2212 dfh.close()
2212 2213 ifh.close()
2213 2214
2214 2215 def compress(self, data):
2215 2216 """Generate a possibly-compressed representation of data."""
2216 2217 if not data:
2217 2218 return b'', data
2218 2219
2219 2220 compressed = self._compressor.compress(data)
2220 2221
2221 2222 if compressed:
2222 2223 # The revlog compressor added the header in the returned data.
2223 2224 return b'', compressed
2224 2225
2225 2226 if data[0:1] == b'\0':
2226 2227 return b'', data
2227 2228 return b'u', data
2228 2229
2229 2230 def decompress(self, data):
2230 2231 """Decompress a revlog chunk.
2231 2232
2232 2233 The chunk is expected to begin with a header identifying the
2233 2234 format type so it can be routed to an appropriate decompressor.
2234 2235 """
2235 2236 if not data:
2236 2237 return data
2237 2238
2238 2239 # Revlogs are read much more frequently than they are written and many
2239 2240 # chunks only take microseconds to decompress, so performance is
2240 2241 # important here.
2241 2242 #
2242 2243 # We can make a few assumptions about revlogs:
2243 2244 #
2244 2245 # 1) the majority of chunks will be compressed (as opposed to inline
2245 2246 # raw data).
2246 2247 # 2) decompressing *any* data will likely by at least 10x slower than
2247 2248 # returning raw inline data.
2248 2249 # 3) we want to prioritize common and officially supported compression
2249 2250 # engines
2250 2251 #
2251 2252 # It follows that we want to optimize for "decompress compressed data
2252 2253 # when encoded with common and officially supported compression engines"
2253 2254 # case over "raw data" and "data encoded by less common or non-official
2254 2255 # compression engines." That is why we have the inline lookup first
2255 2256 # followed by the compengines lookup.
2256 2257 #
2257 2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2258 2259 # compressed chunks. And this matters for changelog and manifest reads.
2259 2260 t = data[0:1]
2260 2261
2261 2262 if t == b'x':
2262 2263 try:
2263 2264 return _zlibdecompress(data)
2264 2265 except zlib.error as e:
2265 2266 raise error.RevlogError(
2266 2267 _(b'revlog decompress error: %s')
2267 2268 % stringutil.forcebytestr(e)
2268 2269 )
2269 2270 # '\0' is more common than 'u' so it goes first.
2270 2271 elif t == b'\0':
2271 2272 return data
2272 2273 elif t == b'u':
2273 2274 return util.buffer(data, 1)
2274 2275
2275 2276 try:
2276 2277 compressor = self._decompressors[t]
2277 2278 except KeyError:
2278 2279 try:
2279 2280 engine = util.compengines.forrevlogheader(t)
2280 2281 compressor = engine.revlogcompressor(self._compengineopts)
2281 2282 self._decompressors[t] = compressor
2282 2283 except KeyError:
2283 2284 raise error.RevlogError(_(b'unknown compression type %r') % t)
2284 2285
2285 2286 return compressor.decompress(data)
2286 2287
2287 2288 def _addrevision(
2288 2289 self,
2289 2290 node,
2290 2291 rawtext,
2291 2292 transaction,
2292 2293 link,
2293 2294 p1,
2294 2295 p2,
2295 2296 flags,
2296 2297 cachedelta,
2297 2298 ifh,
2298 2299 dfh,
2299 2300 alwayscache=False,
2300 2301 deltacomputer=None,
2301 2302 sidedata=None,
2302 2303 ):
2303 2304 """internal function to add revisions to the log
2304 2305
2305 2306 see addrevision for argument descriptions.
2306 2307
2307 2308 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2308 2309
2309 2310 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2310 2311 be used.
2311 2312
2312 2313 invariants:
2313 2314 - rawtext is optional (can be None); if not set, cachedelta must be set.
2314 2315 if both are set, they must correspond to each other.
2315 2316 """
2316 2317 if node == nullid:
2317 2318 raise error.RevlogError(
2318 2319 _(b"%s: attempt to add null revision") % self.indexfile
2319 2320 )
2320 2321 if node == wdirid or node in wdirfilenodeids:
2321 2322 raise error.RevlogError(
2322 2323 _(b"%s: attempt to add wdir revision") % self.indexfile
2323 2324 )
2324 2325
2325 2326 if self._inline:
2326 2327 fh = ifh
2327 2328 else:
2328 2329 fh = dfh
2329 2330
2330 2331 btext = [rawtext]
2331 2332
2332 2333 curr = len(self)
2333 2334 prev = curr - 1
2334 2335
2335 2336 offset = self._get_data_offset(prev)
2336 2337
2337 2338 if self._concurrencychecker:
2338 2339 if self._inline:
2339 2340 # offset is "as if" it were in the .d file, so we need to add on
2340 2341 # the size of the entry metadata.
2341 2342 self._concurrencychecker(
2342 2343 ifh, self.indexfile, offset + curr * self._io.size
2343 2344 )
2344 2345 else:
2345 2346 # Entries in the .i are a consistent size.
2346 2347 self._concurrencychecker(
2347 2348 ifh, self.indexfile, curr * self._io.size
2348 2349 )
2349 2350 self._concurrencychecker(dfh, self.datafile, offset)
2350 2351
2351 2352 p1r, p2r = self.rev(p1), self.rev(p2)
2352 2353
2353 2354 # full versions are inserted when the needed deltas
2354 2355 # become comparable to the uncompressed text
2355 2356 if rawtext is None:
2356 2357 # need rawtext size, before changed by flag processors, which is
2357 2358 # the non-raw size. use revlog explicitly to avoid filelog's extra
2358 2359 # logic that might remove metadata size.
2359 2360 textlen = mdiff.patchedsize(
2360 2361 revlog.size(self, cachedelta[0]), cachedelta[1]
2361 2362 )
2362 2363 else:
2363 2364 textlen = len(rawtext)
2364 2365
2365 2366 if deltacomputer is None:
2366 2367 deltacomputer = deltautil.deltacomputer(self)
2367 2368
2368 2369 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2369 2370
2370 2371 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2371 2372
2372 2373 if sidedata:
2373 2374 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2374 2375 sidedata_offset = offset + deltainfo.deltalen
2375 2376 else:
2376 2377 serialized_sidedata = b""
2377 2378 # Don't store the offset if the sidedata is empty, that way
2378 2379 # we can easily detect empty sidedata and they will be no different
2379 2380 # than ones we manually add.
2380 2381 sidedata_offset = 0
2381 2382
2382 2383 e = (
2383 2384 offset_type(offset, flags),
2384 2385 deltainfo.deltalen,
2385 2386 textlen,
2386 2387 deltainfo.base,
2387 2388 link,
2388 2389 p1r,
2389 2390 p2r,
2390 2391 node,
2391 2392 sidedata_offset,
2392 2393 len(serialized_sidedata),
2393 2394 )
2394 2395
2395 2396 if self.version & 0xFFFF != REVLOGV2:
2396 2397 e = e[:8]
2397 2398
2398 2399 self.index.append(e)
2399 2400 entry = self._io.packentry(e, self.node, self.version, curr)
2400 2401 self._writeentry(
2401 2402 transaction,
2402 2403 ifh,
2403 2404 dfh,
2404 2405 entry,
2405 2406 deltainfo.data,
2406 2407 link,
2407 2408 offset,
2408 2409 serialized_sidedata,
2409 2410 )
2410 2411
2411 2412 rawtext = btext[0]
2412 2413
2413 2414 if alwayscache and rawtext is None:
2414 2415 rawtext = deltacomputer.buildtext(revinfo, fh)
2415 2416
2416 2417 if type(rawtext) == bytes: # only accept immutable objects
2417 2418 self._revisioncache = (node, curr, rawtext)
2418 2419 self._chainbasecache[curr] = deltainfo.chainbase
2419 2420 return curr
2420 2421
2421 2422 def _get_data_offset(self, prev):
2422 2423 """Returns the current offset in the (in-transaction) data file.
2423 2424 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2424 2425 file to store that information: since sidedata can be rewritten to the
2425 2426 end of the data file within a transaction, you can have cases where, for
2426 2427 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2427 2428 to `n - 1`'s sidedata being written after `n`'s data.
2428 2429
2429 2430 TODO cache this in a docket file before getting out of experimental."""
2430 2431 if self.version & 0xFFFF != REVLOGV2:
2431 2432 return self.end(prev)
2432 2433
2433 2434 offset = 0
2434 2435 for rev, entry in enumerate(self.index):
2435 2436 sidedata_end = entry[8] + entry[9]
2436 2437 # Sidedata for a previous rev has potentially been written after
2437 2438 # this rev's end, so take the max.
2438 2439 offset = max(self.end(rev), offset, sidedata_end)
2439 2440 return offset
2440 2441
2441 2442 def _writeentry(
2442 2443 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2443 2444 ):
2444 2445 # Files opened in a+ mode have inconsistent behavior on various
2445 2446 # platforms. Windows requires that a file positioning call be made
2446 2447 # when the file handle transitions between reads and writes. See
2447 2448 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2448 2449 # platforms, Python or the platform itself can be buggy. Some versions
2449 2450 # of Solaris have been observed to not append at the end of the file
2450 2451 # if the file was seeked to before the end. See issue4943 for more.
2451 2452 #
2452 2453 # We work around this issue by inserting a seek() before writing.
2453 2454 # Note: This is likely not necessary on Python 3. However, because
2454 2455 # the file handle is reused for reads and may be seeked there, we need
2455 2456 # to be careful before changing this.
2456 2457 ifh.seek(0, os.SEEK_END)
2457 2458 if dfh:
2458 2459 dfh.seek(0, os.SEEK_END)
2459 2460
2460 2461 curr = len(self) - 1
2461 2462 if not self._inline:
2462 2463 transaction.add(self.datafile, offset)
2463 2464 transaction.add(self.indexfile, curr * len(entry))
2464 2465 if data[0]:
2465 2466 dfh.write(data[0])
2466 2467 dfh.write(data[1])
2467 2468 if sidedata:
2468 2469 dfh.write(sidedata)
2469 2470 ifh.write(entry)
2470 2471 else:
2471 2472 offset += curr * self._io.size
2472 2473 transaction.add(self.indexfile, offset)
2473 2474 ifh.write(entry)
2474 2475 ifh.write(data[0])
2475 2476 ifh.write(data[1])
2476 2477 if sidedata:
2477 2478 ifh.write(sidedata)
2478 2479 self._enforceinlinesize(transaction, ifh)
2479 2480 nodemaputil.setup_persistent_nodemap(transaction, self)
2480 2481
2481 2482 def addgroup(
2482 2483 self,
2483 2484 deltas,
2484 2485 linkmapper,
2485 2486 transaction,
2486 2487 alwayscache=False,
2487 2488 addrevisioncb=None,
2488 2489 duplicaterevisioncb=None,
2489 2490 ):
2490 2491 """
2491 2492 add a delta group
2492 2493
2493 2494 given a set of deltas, add them to the revision log. the
2494 2495 first delta is against its parent, which should be in our
2495 2496 log, the rest are against the previous delta.
2496 2497
2497 2498 If ``addrevisioncb`` is defined, it will be called with arguments of
2498 2499 this revlog and the node that was added.
2499 2500 """
2500 2501
2501 2502 if self._writinghandles:
2502 2503 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2503 2504
2504 2505 r = len(self)
2505 2506 end = 0
2506 2507 if r:
2507 2508 end = self.end(r - 1)
2508 2509 ifh = self._indexfp(b"a+")
2509 2510 isize = r * self._io.size
2510 2511 if self._inline:
2511 2512 transaction.add(self.indexfile, end + isize)
2512 2513 dfh = None
2513 2514 else:
2514 2515 transaction.add(self.indexfile, isize)
2515 2516 transaction.add(self.datafile, end)
2516 2517 dfh = self._datafp(b"a+")
2517 2518
2518 2519 def flush():
2519 2520 if dfh:
2520 2521 dfh.flush()
2521 2522 ifh.flush()
2522 2523
2523 2524 self._writinghandles = (ifh, dfh)
2524 2525 empty = True
2525 2526
2526 2527 try:
2527 2528 deltacomputer = deltautil.deltacomputer(self)
2528 2529 # loop through our set of deltas
2529 2530 for data in deltas:
2530 2531 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2531 2532 link = linkmapper(linknode)
2532 2533 flags = flags or REVIDX_DEFAULT_FLAGS
2533 2534
2534 2535 rev = self.index.get_rev(node)
2535 2536 if rev is not None:
2536 2537 # this can happen if two branches make the same change
2537 2538 self._nodeduplicatecallback(transaction, rev)
2538 2539 if duplicaterevisioncb:
2539 2540 duplicaterevisioncb(self, rev)
2540 2541 empty = False
2541 2542 continue
2542 2543
2543 2544 for p in (p1, p2):
2544 2545 if not self.index.has_node(p):
2545 2546 raise error.LookupError(
2546 2547 p, self.indexfile, _(b'unknown parent')
2547 2548 )
2548 2549
2549 2550 if not self.index.has_node(deltabase):
2550 2551 raise error.LookupError(
2551 2552 deltabase, self.indexfile, _(b'unknown delta base')
2552 2553 )
2553 2554
2554 2555 baserev = self.rev(deltabase)
2555 2556
2556 2557 if baserev != nullrev and self.iscensored(baserev):
2557 2558 # if base is censored, delta must be full replacement in a
2558 2559 # single patch operation
2559 2560 hlen = struct.calcsize(b">lll")
2560 2561 oldlen = self.rawsize(baserev)
2561 2562 newlen = len(delta) - hlen
2562 2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2563 2564 raise error.CensoredBaseError(
2564 2565 self.indexfile, self.node(baserev)
2565 2566 )
2566 2567
2567 2568 if not flags and self._peek_iscensored(baserev, delta, flush):
2568 2569 flags |= REVIDX_ISCENSORED
2569 2570
2570 2571 # We assume consumers of addrevisioncb will want to retrieve
2571 2572 # the added revision, which will require a call to
2572 2573 # revision(). revision() will fast path if there is a cache
2573 2574 # hit. So, we tell _addrevision() to always cache in this case.
2574 2575 # We're only using addgroup() in the context of changegroup
2575 2576 # generation so the revision data can always be handled as raw
2576 2577 # by the flagprocessor.
2577 2578 rev = self._addrevision(
2578 2579 node,
2579 2580 None,
2580 2581 transaction,
2581 2582 link,
2582 2583 p1,
2583 2584 p2,
2584 2585 flags,
2585 2586 (baserev, delta),
2586 2587 ifh,
2587 2588 dfh,
2588 2589 alwayscache=alwayscache,
2589 2590 deltacomputer=deltacomputer,
2591 sidedata=sidedata,
2590 2592 )
2591 2593
2592 2594 if addrevisioncb:
2593 2595 addrevisioncb(self, rev)
2594 2596 empty = False
2595 2597
2596 2598 if not dfh and not self._inline:
2597 2599 # addrevision switched from inline to conventional
2598 2600 # reopen the index
2599 2601 ifh.close()
2600 2602 dfh = self._datafp(b"a+")
2601 2603 ifh = self._indexfp(b"a+")
2602 2604 self._writinghandles = (ifh, dfh)
2603 2605 finally:
2604 2606 self._writinghandles = None
2605 2607
2606 2608 if dfh:
2607 2609 dfh.close()
2608 2610 ifh.close()
2609 2611 return not empty
2610 2612
2611 2613 def iscensored(self, rev):
2612 2614 """Check if a file revision is censored."""
2613 2615 if not self._censorable:
2614 2616 return False
2615 2617
2616 2618 return self.flags(rev) & REVIDX_ISCENSORED
2617 2619
2618 2620 def _peek_iscensored(self, baserev, delta, flush):
2619 2621 """Quickly check if a delta produces a censored revision."""
2620 2622 if not self._censorable:
2621 2623 return False
2622 2624
2623 2625 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2624 2626
2625 2627 def getstrippoint(self, minlink):
2626 2628 """find the minimum rev that must be stripped to strip the linkrev
2627 2629
2628 2630 Returns a tuple containing the minimum rev and a set of all revs that
2629 2631 have linkrevs that will be broken by this strip.
2630 2632 """
2631 2633 return storageutil.resolvestripinfo(
2632 2634 minlink,
2633 2635 len(self) - 1,
2634 2636 self.headrevs(),
2635 2637 self.linkrev,
2636 2638 self.parentrevs,
2637 2639 )
2638 2640
2639 2641 def strip(self, minlink, transaction):
2640 2642 """truncate the revlog on the first revision with a linkrev >= minlink
2641 2643
2642 2644 This function is called when we're stripping revision minlink and
2643 2645 its descendants from the repository.
2644 2646
2645 2647 We have to remove all revisions with linkrev >= minlink, because
2646 2648 the equivalent changelog revisions will be renumbered after the
2647 2649 strip.
2648 2650
2649 2651 So we truncate the revlog on the first of these revisions, and
2650 2652 trust that the caller has saved the revisions that shouldn't be
2651 2653 removed and that it'll re-add them after this truncation.
2652 2654 """
2653 2655 if len(self) == 0:
2654 2656 return
2655 2657
2656 2658 rev, _ = self.getstrippoint(minlink)
2657 2659 if rev == len(self):
2658 2660 return
2659 2661
2660 2662 # first truncate the files on disk
2661 2663 end = self.start(rev)
2662 2664 if not self._inline:
2663 2665 transaction.add(self.datafile, end)
2664 2666 end = rev * self._io.size
2665 2667 else:
2666 2668 end += rev * self._io.size
2667 2669
2668 2670 transaction.add(self.indexfile, end)
2669 2671
2670 2672 # then reset internal state in memory to forget those revisions
2671 2673 self._revisioncache = None
2672 2674 self._chaininfocache = util.lrucachedict(500)
2673 2675 self._chunkclear()
2674 2676
2675 2677 del self.index[rev:-1]
2676 2678
2677 2679 def checksize(self):
2678 2680 """Check size of index and data files
2679 2681
2680 2682 return a (dd, di) tuple.
2681 2683 - dd: extra bytes for the "data" file
2682 2684 - di: extra bytes for the "index" file
2683 2685
2684 2686 A healthy revlog will return (0, 0).
2685 2687 """
2686 2688 expected = 0
2687 2689 if len(self):
2688 2690 expected = max(0, self.end(len(self) - 1))
2689 2691
2690 2692 try:
2691 2693 with self._datafp() as f:
2692 2694 f.seek(0, io.SEEK_END)
2693 2695 actual = f.tell()
2694 2696 dd = actual - expected
2695 2697 except IOError as inst:
2696 2698 if inst.errno != errno.ENOENT:
2697 2699 raise
2698 2700 dd = 0
2699 2701
2700 2702 try:
2701 2703 f = self.opener(self.indexfile)
2702 2704 f.seek(0, io.SEEK_END)
2703 2705 actual = f.tell()
2704 2706 f.close()
2705 2707 s = self._io.size
2706 2708 i = max(0, actual // s)
2707 2709 di = actual - (i * s)
2708 2710 if self._inline:
2709 2711 databytes = 0
2710 2712 for r in self:
2711 2713 databytes += max(0, self.length(r))
2712 2714 dd = 0
2713 2715 di = actual - len(self) * s - databytes
2714 2716 except IOError as inst:
2715 2717 if inst.errno != errno.ENOENT:
2716 2718 raise
2717 2719 di = 0
2718 2720
2719 2721 return (dd, di)
2720 2722
2721 2723 def files(self):
2722 2724 res = [self.indexfile]
2723 2725 if not self._inline:
2724 2726 res.append(self.datafile)
2725 2727 return res
2726 2728
2727 2729 def emitrevisions(
2728 2730 self,
2729 2731 nodes,
2730 2732 nodesorder=None,
2731 2733 revisiondata=False,
2732 2734 assumehaveparentrevisions=False,
2733 2735 deltamode=repository.CG_DELTAMODE_STD,
2734 2736 ):
2735 2737 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2736 2738 raise error.ProgrammingError(
2737 2739 b'unhandled value for nodesorder: %s' % nodesorder
2738 2740 )
2739 2741
2740 2742 if nodesorder is None and not self._generaldelta:
2741 2743 nodesorder = b'storage'
2742 2744
2743 2745 if (
2744 2746 not self._storedeltachains
2745 2747 and deltamode != repository.CG_DELTAMODE_PREV
2746 2748 ):
2747 2749 deltamode = repository.CG_DELTAMODE_FULL
2748 2750
2749 2751 return storageutil.emitrevisions(
2750 2752 self,
2751 2753 nodes,
2752 2754 nodesorder,
2753 2755 revlogrevisiondelta,
2754 2756 deltaparentfn=self.deltaparent,
2755 2757 candeltafn=self.candelta,
2756 2758 rawsizefn=self.rawsize,
2757 2759 revdifffn=self.revdiff,
2758 2760 flagsfn=self.flags,
2759 2761 deltamode=deltamode,
2760 2762 revisiondata=revisiondata,
2761 2763 assumehaveparentrevisions=assumehaveparentrevisions,
2762 2764 )
2763 2765
2764 2766 DELTAREUSEALWAYS = b'always'
2765 2767 DELTAREUSESAMEREVS = b'samerevs'
2766 2768 DELTAREUSENEVER = b'never'
2767 2769
2768 2770 DELTAREUSEFULLADD = b'fulladd'
2769 2771
2770 2772 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2771 2773
2772 2774 def clone(
2773 2775 self,
2774 2776 tr,
2775 2777 destrevlog,
2776 2778 addrevisioncb=None,
2777 2779 deltareuse=DELTAREUSESAMEREVS,
2778 2780 forcedeltabothparents=None,
2779 2781 sidedatacompanion=None,
2780 2782 ):
2781 2783 """Copy this revlog to another, possibly with format changes.
2782 2784
2783 2785 The destination revlog will contain the same revisions and nodes.
2784 2786 However, it may not be bit-for-bit identical due to e.g. delta encoding
2785 2787 differences.
2786 2788
2787 2789 The ``deltareuse`` argument control how deltas from the existing revlog
2788 2790 are preserved in the destination revlog. The argument can have the
2789 2791 following values:
2790 2792
2791 2793 DELTAREUSEALWAYS
2792 2794 Deltas will always be reused (if possible), even if the destination
2793 2795 revlog would not select the same revisions for the delta. This is the
2794 2796 fastest mode of operation.
2795 2797 DELTAREUSESAMEREVS
2796 2798 Deltas will be reused if the destination revlog would pick the same
2797 2799 revisions for the delta. This mode strikes a balance between speed
2798 2800 and optimization.
2799 2801 DELTAREUSENEVER
2800 2802 Deltas will never be reused. This is the slowest mode of execution.
2801 2803 This mode can be used to recompute deltas (e.g. if the diff/delta
2802 2804 algorithm changes).
2803 2805 DELTAREUSEFULLADD
2804 2806 Revision will be re-added as if their were new content. This is
2805 2807 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2806 2808 eg: large file detection and handling.
2807 2809
2808 2810 Delta computation can be slow, so the choice of delta reuse policy can
2809 2811 significantly affect run time.
2810 2812
2811 2813 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2812 2814 two extremes. Deltas will be reused if they are appropriate. But if the
2813 2815 delta could choose a better revision, it will do so. This means if you
2814 2816 are converting a non-generaldelta revlog to a generaldelta revlog,
2815 2817 deltas will be recomputed if the delta's parent isn't a parent of the
2816 2818 revision.
2817 2819
2818 2820 In addition to the delta policy, the ``forcedeltabothparents``
2819 2821 argument controls whether to force compute deltas against both parents
2820 2822 for merges. By default, the current default is used.
2821 2823
2822 2824 If not None, the `sidedatacompanion` is callable that accept two
2823 2825 arguments:
2824 2826
2825 2827 (srcrevlog, rev)
2826 2828
2827 2829 and return a quintet that control changes to sidedata content from the
2828 2830 old revision to the new clone result:
2829 2831
2830 2832 (dropall, filterout, update, new_flags, dropped_flags)
2831 2833
2832 2834 * if `dropall` is True, all sidedata should be dropped
2833 2835 * `filterout` is a set of sidedata keys that should be dropped
2834 2836 * `update` is a mapping of additionnal/new key -> value
2835 2837 * new_flags is a bitfields of new flags that the revision should get
2836 2838 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2837 2839 """
2838 2840 if deltareuse not in self.DELTAREUSEALL:
2839 2841 raise ValueError(
2840 2842 _(b'value for deltareuse invalid: %s') % deltareuse
2841 2843 )
2842 2844
2843 2845 if len(destrevlog):
2844 2846 raise ValueError(_(b'destination revlog is not empty'))
2845 2847
2846 2848 if getattr(self, 'filteredrevs', None):
2847 2849 raise ValueError(_(b'source revlog has filtered revisions'))
2848 2850 if getattr(destrevlog, 'filteredrevs', None):
2849 2851 raise ValueError(_(b'destination revlog has filtered revisions'))
2850 2852
2851 2853 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2852 2854 # if possible.
2853 2855 oldlazydelta = destrevlog._lazydelta
2854 2856 oldlazydeltabase = destrevlog._lazydeltabase
2855 2857 oldamd = destrevlog._deltabothparents
2856 2858
2857 2859 try:
2858 2860 if deltareuse == self.DELTAREUSEALWAYS:
2859 2861 destrevlog._lazydeltabase = True
2860 2862 destrevlog._lazydelta = True
2861 2863 elif deltareuse == self.DELTAREUSESAMEREVS:
2862 2864 destrevlog._lazydeltabase = False
2863 2865 destrevlog._lazydelta = True
2864 2866 elif deltareuse == self.DELTAREUSENEVER:
2865 2867 destrevlog._lazydeltabase = False
2866 2868 destrevlog._lazydelta = False
2867 2869
2868 2870 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2869 2871
2870 2872 self._clone(
2871 2873 tr,
2872 2874 destrevlog,
2873 2875 addrevisioncb,
2874 2876 deltareuse,
2875 2877 forcedeltabothparents,
2876 2878 sidedatacompanion,
2877 2879 )
2878 2880
2879 2881 finally:
2880 2882 destrevlog._lazydelta = oldlazydelta
2881 2883 destrevlog._lazydeltabase = oldlazydeltabase
2882 2884 destrevlog._deltabothparents = oldamd
2883 2885
2884 2886 def _clone(
2885 2887 self,
2886 2888 tr,
2887 2889 destrevlog,
2888 2890 addrevisioncb,
2889 2891 deltareuse,
2890 2892 forcedeltabothparents,
2891 2893 sidedatacompanion,
2892 2894 ):
2893 2895 """perform the core duty of `revlog.clone` after parameter processing"""
2894 2896 deltacomputer = deltautil.deltacomputer(destrevlog)
2895 2897 index = self.index
2896 2898 for rev in self:
2897 2899 entry = index[rev]
2898 2900
2899 2901 # Some classes override linkrev to take filtered revs into
2900 2902 # account. Use raw entry from index.
2901 2903 flags = entry[0] & 0xFFFF
2902 2904 linkrev = entry[4]
2903 2905 p1 = index[entry[5]][7]
2904 2906 p2 = index[entry[6]][7]
2905 2907 node = entry[7]
2906 2908
2907 2909 sidedataactions = (False, [], {}, 0, 0)
2908 2910 if sidedatacompanion is not None:
2909 2911 sidedataactions = sidedatacompanion(self, rev)
2910 2912
2911 2913 # (Possibly) reuse the delta from the revlog if allowed and
2912 2914 # the revlog chunk is a delta.
2913 2915 cachedelta = None
2914 2916 rawtext = None
2915 2917 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2916 2918 dropall = sidedataactions[0]
2917 2919 filterout = sidedataactions[1]
2918 2920 update = sidedataactions[2]
2919 2921 new_flags = sidedataactions[3]
2920 2922 dropped_flags = sidedataactions[4]
2921 2923 text, sidedata = self._revisiondata(rev)
2922 2924 if dropall:
2923 2925 sidedata = {}
2924 2926 for key in filterout:
2925 2927 sidedata.pop(key, None)
2926 2928 sidedata.update(update)
2927 2929 if not sidedata:
2928 2930 sidedata = None
2929 2931
2930 2932 flags |= new_flags
2931 2933 flags &= ~dropped_flags
2932 2934
2933 2935 destrevlog.addrevision(
2934 2936 text,
2935 2937 tr,
2936 2938 linkrev,
2937 2939 p1,
2938 2940 p2,
2939 2941 cachedelta=cachedelta,
2940 2942 node=node,
2941 2943 flags=flags,
2942 2944 deltacomputer=deltacomputer,
2943 2945 sidedata=sidedata,
2944 2946 )
2945 2947 else:
2946 2948 if destrevlog._lazydelta:
2947 2949 dp = self.deltaparent(rev)
2948 2950 if dp != nullrev:
2949 2951 cachedelta = (dp, bytes(self._chunk(rev)))
2950 2952
2951 2953 if not cachedelta:
2952 2954 rawtext = self.rawdata(rev)
2953 2955
2954 2956 ifh = destrevlog.opener(
2955 2957 destrevlog.indexfile, b'a+', checkambig=False
2956 2958 )
2957 2959 dfh = None
2958 2960 if not destrevlog._inline:
2959 2961 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2960 2962 try:
2961 2963 destrevlog._addrevision(
2962 2964 node,
2963 2965 rawtext,
2964 2966 tr,
2965 2967 linkrev,
2966 2968 p1,
2967 2969 p2,
2968 2970 flags,
2969 2971 cachedelta,
2970 2972 ifh,
2971 2973 dfh,
2972 2974 deltacomputer=deltacomputer,
2973 2975 )
2974 2976 finally:
2975 2977 if dfh:
2976 2978 dfh.close()
2977 2979 ifh.close()
2978 2980
2979 2981 if addrevisioncb:
2980 2982 addrevisioncb(self, rev, node)
2981 2983
2982 2984 def censorrevision(self, tr, censornode, tombstone=b''):
2983 2985 if (self.version & 0xFFFF) == REVLOGV0:
2984 2986 raise error.RevlogError(
2985 2987 _(b'cannot censor with version %d revlogs') % self.version
2986 2988 )
2987 2989
2988 2990 censorrev = self.rev(censornode)
2989 2991 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2990 2992
2991 2993 if len(tombstone) > self.rawsize(censorrev):
2992 2994 raise error.Abort(
2993 2995 _(b'censor tombstone must be no longer than censored data')
2994 2996 )
2995 2997
2996 2998 # Rewriting the revlog in place is hard. Our strategy for censoring is
2997 2999 # to create a new revlog, copy all revisions to it, then replace the
2998 3000 # revlogs on transaction close.
2999 3001
3000 3002 newindexfile = self.indexfile + b'.tmpcensored'
3001 3003 newdatafile = self.datafile + b'.tmpcensored'
3002 3004
3003 3005 # This is a bit dangerous. We could easily have a mismatch of state.
3004 3006 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3005 3007 newrl.version = self.version
3006 3008 newrl._generaldelta = self._generaldelta
3007 3009 newrl._io = self._io
3008 3010
3009 3011 for rev in self.revs():
3010 3012 node = self.node(rev)
3011 3013 p1, p2 = self.parents(node)
3012 3014
3013 3015 if rev == censorrev:
3014 3016 newrl.addrawrevision(
3015 3017 tombstone,
3016 3018 tr,
3017 3019 self.linkrev(censorrev),
3018 3020 p1,
3019 3021 p2,
3020 3022 censornode,
3021 3023 REVIDX_ISCENSORED,
3022 3024 )
3023 3025
3024 3026 if newrl.deltaparent(rev) != nullrev:
3025 3027 raise error.Abort(
3026 3028 _(
3027 3029 b'censored revision stored as delta; '
3028 3030 b'cannot censor'
3029 3031 ),
3030 3032 hint=_(
3031 3033 b'censoring of revlogs is not '
3032 3034 b'fully implemented; please report '
3033 3035 b'this bug'
3034 3036 ),
3035 3037 )
3036 3038 continue
3037 3039
3038 3040 if self.iscensored(rev):
3039 3041 if self.deltaparent(rev) != nullrev:
3040 3042 raise error.Abort(
3041 3043 _(
3042 3044 b'cannot censor due to censored '
3043 3045 b'revision having delta stored'
3044 3046 )
3045 3047 )
3046 3048 rawtext = self._chunk(rev)
3047 3049 else:
3048 3050 rawtext = self.rawdata(rev)
3049 3051
3050 3052 newrl.addrawrevision(
3051 3053 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3052 3054 )
3053 3055
3054 3056 tr.addbackup(self.indexfile, location=b'store')
3055 3057 if not self._inline:
3056 3058 tr.addbackup(self.datafile, location=b'store')
3057 3059
3058 3060 self.opener.rename(newrl.indexfile, self.indexfile)
3059 3061 if not self._inline:
3060 3062 self.opener.rename(newrl.datafile, self.datafile)
3061 3063
3062 3064 self.clearcaches()
3063 3065 self._loadindex()
3064 3066
3065 3067 def verifyintegrity(self, state):
3066 3068 """Verifies the integrity of the revlog.
3067 3069
3068 3070 Yields ``revlogproblem`` instances describing problems that are
3069 3071 found.
3070 3072 """
3071 3073 dd, di = self.checksize()
3072 3074 if dd:
3073 3075 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3074 3076 if di:
3075 3077 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3076 3078
3077 3079 version = self.version & 0xFFFF
3078 3080
3079 3081 # The verifier tells us what version revlog we should be.
3080 3082 if version != state[b'expectedversion']:
3081 3083 yield revlogproblem(
3082 3084 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3083 3085 % (self.indexfile, version, state[b'expectedversion'])
3084 3086 )
3085 3087
3086 3088 state[b'skipread'] = set()
3087 3089 state[b'safe_renamed'] = set()
3088 3090
3089 3091 for rev in self:
3090 3092 node = self.node(rev)
3091 3093
3092 3094 # Verify contents. 4 cases to care about:
3093 3095 #
3094 3096 # common: the most common case
3095 3097 # rename: with a rename
3096 3098 # meta: file content starts with b'\1\n', the metadata
3097 3099 # header defined in filelog.py, but without a rename
3098 3100 # ext: content stored externally
3099 3101 #
3100 3102 # More formally, their differences are shown below:
3101 3103 #
3102 3104 # | common | rename | meta | ext
3103 3105 # -------------------------------------------------------
3104 3106 # flags() | 0 | 0 | 0 | not 0
3105 3107 # renamed() | False | True | False | ?
3106 3108 # rawtext[0:2]=='\1\n'| False | True | True | ?
3107 3109 #
3108 3110 # "rawtext" means the raw text stored in revlog data, which
3109 3111 # could be retrieved by "rawdata(rev)". "text"
3110 3112 # mentioned below is "revision(rev)".
3111 3113 #
3112 3114 # There are 3 different lengths stored physically:
3113 3115 # 1. L1: rawsize, stored in revlog index
3114 3116 # 2. L2: len(rawtext), stored in revlog data
3115 3117 # 3. L3: len(text), stored in revlog data if flags==0, or
3116 3118 # possibly somewhere else if flags!=0
3117 3119 #
3118 3120 # L1 should be equal to L2. L3 could be different from them.
3119 3121 # "text" may or may not affect commit hash depending on flag
3120 3122 # processors (see flagutil.addflagprocessor).
3121 3123 #
3122 3124 # | common | rename | meta | ext
3123 3125 # -------------------------------------------------
3124 3126 # rawsize() | L1 | L1 | L1 | L1
3125 3127 # size() | L1 | L2-LM | L1(*) | L1 (?)
3126 3128 # len(rawtext) | L2 | L2 | L2 | L2
3127 3129 # len(text) | L2 | L2 | L2 | L3
3128 3130 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3129 3131 #
3130 3132 # LM: length of metadata, depending on rawtext
3131 3133 # (*): not ideal, see comment in filelog.size
3132 3134 # (?): could be "- len(meta)" if the resolved content has
3133 3135 # rename metadata
3134 3136 #
3135 3137 # Checks needed to be done:
3136 3138 # 1. length check: L1 == L2, in all cases.
3137 3139 # 2. hash check: depending on flag processor, we may need to
3138 3140 # use either "text" (external), or "rawtext" (in revlog).
3139 3141
3140 3142 try:
3141 3143 skipflags = state.get(b'skipflags', 0)
3142 3144 if skipflags:
3143 3145 skipflags &= self.flags(rev)
3144 3146
3145 3147 _verify_revision(self, skipflags, state, node)
3146 3148
3147 3149 l1 = self.rawsize(rev)
3148 3150 l2 = len(self.rawdata(node))
3149 3151
3150 3152 if l1 != l2:
3151 3153 yield revlogproblem(
3152 3154 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3153 3155 node=node,
3154 3156 )
3155 3157
3156 3158 except error.CensoredNodeError:
3157 3159 if state[b'erroroncensored']:
3158 3160 yield revlogproblem(
3159 3161 error=_(b'censored file data'), node=node
3160 3162 )
3161 3163 state[b'skipread'].add(node)
3162 3164 except Exception as e:
3163 3165 yield revlogproblem(
3164 3166 error=_(b'unpacking %s: %s')
3165 3167 % (short(node), stringutil.forcebytestr(e)),
3166 3168 node=node,
3167 3169 )
3168 3170 state[b'skipread'].add(node)
3169 3171
3170 3172 def storageinfo(
3171 3173 self,
3172 3174 exclusivefiles=False,
3173 3175 sharedfiles=False,
3174 3176 revisionscount=False,
3175 3177 trackedsize=False,
3176 3178 storedsize=False,
3177 3179 ):
3178 3180 d = {}
3179 3181
3180 3182 if exclusivefiles:
3181 3183 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3182 3184 if not self._inline:
3183 3185 d[b'exclusivefiles'].append((self.opener, self.datafile))
3184 3186
3185 3187 if sharedfiles:
3186 3188 d[b'sharedfiles'] = []
3187 3189
3188 3190 if revisionscount:
3189 3191 d[b'revisionscount'] = len(self)
3190 3192
3191 3193 if trackedsize:
3192 3194 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3193 3195
3194 3196 if storedsize:
3195 3197 d[b'storedsize'] = sum(
3196 3198 self.opener.stat(path).st_size for path in self.files()
3197 3199 )
3198 3200
3199 3201 return d
@@ -1,1450 +1,1454 b''
1 1 # storage.py - Testing of storage primitives.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import unittest
11 11
12 12 from ..node import (
13 13 hex,
14 14 nullid,
15 15 nullrev,
16 16 )
17 17 from ..pycompat import getattr
18 18 from .. import (
19 19 error,
20 20 mdiff,
21 21 )
22 22 from ..interfaces import repository
23 23 from ..utils import storageutil
24 24
25 25
26 26 class basetestcase(unittest.TestCase):
27 27 if not getattr(unittest.TestCase, 'assertRaisesRegex', False):
28 28 assertRaisesRegex = ( # camelcase-required
29 29 unittest.TestCase.assertRaisesRegexp
30 30 )
31 31
32 32
33 33 class ifileindextests(basetestcase):
34 34 """Generic tests for the ifileindex interface.
35 35
36 36 All file storage backends for index data should conform to the tests in this
37 37 class.
38 38
39 39 Use ``makeifileindextests()`` to create an instance of this type.
40 40 """
41 41
42 42 def testempty(self):
43 43 f = self._makefilefn()
44 44 self.assertEqual(len(f), 0, b'new file store has 0 length by default')
45 45 self.assertEqual(list(f), [], b'iter yields nothing by default')
46 46
47 47 gen = iter(f)
48 48 with self.assertRaises(StopIteration):
49 49 next(gen)
50 50
51 51 self.assertFalse(f.hasnode(None))
52 52 self.assertFalse(f.hasnode(0))
53 53 self.assertFalse(f.hasnode(nullrev))
54 54 self.assertFalse(f.hasnode(nullid))
55 55 self.assertFalse(f.hasnode(b'0'))
56 56 self.assertFalse(f.hasnode(b'a' * 20))
57 57
58 58 # revs() should evaluate to an empty list.
59 59 self.assertEqual(list(f.revs()), [])
60 60
61 61 revs = iter(f.revs())
62 62 with self.assertRaises(StopIteration):
63 63 next(revs)
64 64
65 65 self.assertEqual(list(f.revs(start=20)), [])
66 66
67 67 # parents() and parentrevs() work with nullid/nullrev.
68 68 self.assertEqual(f.parents(nullid), (nullid, nullid))
69 69 self.assertEqual(f.parentrevs(nullrev), (nullrev, nullrev))
70 70
71 71 with self.assertRaises(error.LookupError):
72 72 f.parents(b'\x01' * 20)
73 73
74 74 for i in range(-5, 5):
75 75 if i == nullrev:
76 76 continue
77 77
78 78 with self.assertRaises(IndexError):
79 79 f.parentrevs(i)
80 80
81 81 # nullid/nullrev lookup always works.
82 82 self.assertEqual(f.rev(nullid), nullrev)
83 83 self.assertEqual(f.node(nullrev), nullid)
84 84
85 85 with self.assertRaises(error.LookupError):
86 86 f.rev(b'\x01' * 20)
87 87
88 88 for i in range(-5, 5):
89 89 if i == nullrev:
90 90 continue
91 91
92 92 with self.assertRaises(IndexError):
93 93 f.node(i)
94 94
95 95 self.assertEqual(f.lookup(nullid), nullid)
96 96 self.assertEqual(f.lookup(nullrev), nullid)
97 97 self.assertEqual(f.lookup(hex(nullid)), nullid)
98 98 self.assertEqual(f.lookup(b'%d' % nullrev), nullid)
99 99
100 100 with self.assertRaises(error.LookupError):
101 101 f.lookup(b'badvalue')
102 102
103 103 with self.assertRaises(error.LookupError):
104 104 f.lookup(hex(nullid)[0:12])
105 105
106 106 with self.assertRaises(error.LookupError):
107 107 f.lookup(b'-2')
108 108
109 109 with self.assertRaises(error.LookupError):
110 110 f.lookup(b'0')
111 111
112 112 with self.assertRaises(error.LookupError):
113 113 f.lookup(b'1')
114 114
115 115 with self.assertRaises(error.LookupError):
116 116 f.lookup(b'11111111111111111111111111111111111111')
117 117
118 118 for i in range(-5, 5):
119 119 if i == nullrev:
120 120 continue
121 121
122 122 with self.assertRaises(LookupError):
123 123 f.lookup(i)
124 124
125 125 self.assertEqual(f.linkrev(nullrev), nullrev)
126 126
127 127 for i in range(-5, 5):
128 128 if i == nullrev:
129 129 continue
130 130
131 131 with self.assertRaises(IndexError):
132 132 f.linkrev(i)
133 133
134 134 self.assertFalse(f.iscensored(nullrev))
135 135
136 136 for i in range(-5, 5):
137 137 if i == nullrev:
138 138 continue
139 139
140 140 with self.assertRaises(IndexError):
141 141 f.iscensored(i)
142 142
143 143 self.assertEqual(list(f.commonancestorsheads(nullid, nullid)), [])
144 144
145 145 with self.assertRaises(ValueError):
146 146 self.assertEqual(list(f.descendants([])), [])
147 147
148 148 self.assertEqual(list(f.descendants([nullrev])), [])
149 149
150 150 self.assertEqual(f.heads(), [nullid])
151 151 self.assertEqual(f.heads(nullid), [nullid])
152 152 self.assertEqual(f.heads(None, [nullid]), [nullid])
153 153 self.assertEqual(f.heads(nullid, [nullid]), [nullid])
154 154
155 155 self.assertEqual(f.children(nullid), [])
156 156
157 157 with self.assertRaises(error.LookupError):
158 158 f.children(b'\x01' * 20)
159 159
160 160 def testsinglerevision(self):
161 161 f = self._makefilefn()
162 162 with self._maketransactionfn() as tr:
163 163 node = f.add(b'initial', None, tr, 0, nullid, nullid)
164 164
165 165 self.assertEqual(len(f), 1)
166 166 self.assertEqual(list(f), [0])
167 167
168 168 gen = iter(f)
169 169 self.assertEqual(next(gen), 0)
170 170
171 171 with self.assertRaises(StopIteration):
172 172 next(gen)
173 173
174 174 self.assertTrue(f.hasnode(node))
175 175 self.assertFalse(f.hasnode(hex(node)))
176 176 self.assertFalse(f.hasnode(nullrev))
177 177 self.assertFalse(f.hasnode(nullid))
178 178 self.assertFalse(f.hasnode(node[0:12]))
179 179 self.assertFalse(f.hasnode(hex(node)[0:20]))
180 180
181 181 self.assertEqual(list(f.revs()), [0])
182 182 self.assertEqual(list(f.revs(start=1)), [])
183 183 self.assertEqual(list(f.revs(start=0)), [0])
184 184 self.assertEqual(list(f.revs(stop=0)), [0])
185 185 self.assertEqual(list(f.revs(stop=1)), [0])
186 186 self.assertEqual(list(f.revs(1, 1)), [])
187 187 # TODO buggy
188 188 self.assertEqual(list(f.revs(1, 0)), [1, 0])
189 189 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
190 190
191 191 self.assertEqual(f.parents(node), (nullid, nullid))
192 192 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
193 193
194 194 with self.assertRaises(error.LookupError):
195 195 f.parents(b'\x01' * 20)
196 196
197 197 with self.assertRaises(IndexError):
198 198 f.parentrevs(1)
199 199
200 200 self.assertEqual(f.rev(node), 0)
201 201
202 202 with self.assertRaises(error.LookupError):
203 203 f.rev(b'\x01' * 20)
204 204
205 205 self.assertEqual(f.node(0), node)
206 206
207 207 with self.assertRaises(IndexError):
208 208 f.node(1)
209 209
210 210 self.assertEqual(f.lookup(node), node)
211 211 self.assertEqual(f.lookup(0), node)
212 212 self.assertEqual(f.lookup(-1), nullid)
213 213 self.assertEqual(f.lookup(b'0'), node)
214 214 self.assertEqual(f.lookup(hex(node)), node)
215 215
216 216 with self.assertRaises(error.LookupError):
217 217 f.lookup(hex(node)[0:12])
218 218
219 219 with self.assertRaises(error.LookupError):
220 220 f.lookup(-2)
221 221
222 222 with self.assertRaises(error.LookupError):
223 223 f.lookup(b'-2')
224 224
225 225 with self.assertRaises(error.LookupError):
226 226 f.lookup(1)
227 227
228 228 with self.assertRaises(error.LookupError):
229 229 f.lookup(b'1')
230 230
231 231 self.assertEqual(f.linkrev(0), 0)
232 232
233 233 with self.assertRaises(IndexError):
234 234 f.linkrev(1)
235 235
236 236 self.assertFalse(f.iscensored(0))
237 237
238 238 with self.assertRaises(IndexError):
239 239 f.iscensored(1)
240 240
241 241 self.assertEqual(list(f.descendants([0])), [])
242 242
243 243 self.assertEqual(f.heads(), [node])
244 244 self.assertEqual(f.heads(node), [node])
245 245 self.assertEqual(f.heads(stop=[node]), [node])
246 246
247 247 with self.assertRaises(error.LookupError):
248 248 f.heads(stop=[b'\x01' * 20])
249 249
250 250 self.assertEqual(f.children(node), [])
251 251
252 252 def testmultiplerevisions(self):
253 253 fulltext0 = b'x' * 1024
254 254 fulltext1 = fulltext0 + b'y'
255 255 fulltext2 = b'y' + fulltext0 + b'z'
256 256
257 257 f = self._makefilefn()
258 258 with self._maketransactionfn() as tr:
259 259 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
260 260 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
261 261 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
262 262
263 263 self.assertEqual(len(f), 3)
264 264 self.assertEqual(list(f), [0, 1, 2])
265 265
266 266 gen = iter(f)
267 267 self.assertEqual(next(gen), 0)
268 268 self.assertEqual(next(gen), 1)
269 269 self.assertEqual(next(gen), 2)
270 270
271 271 with self.assertRaises(StopIteration):
272 272 next(gen)
273 273
274 274 self.assertEqual(list(f.revs()), [0, 1, 2])
275 275 self.assertEqual(list(f.revs(0)), [0, 1, 2])
276 276 self.assertEqual(list(f.revs(1)), [1, 2])
277 277 self.assertEqual(list(f.revs(2)), [2])
278 278 self.assertEqual(list(f.revs(3)), [])
279 279 self.assertEqual(list(f.revs(stop=1)), [0, 1])
280 280 self.assertEqual(list(f.revs(stop=2)), [0, 1, 2])
281 281 self.assertEqual(list(f.revs(stop=3)), [0, 1, 2])
282 282 self.assertEqual(list(f.revs(2, 0)), [2, 1, 0])
283 283 self.assertEqual(list(f.revs(2, 1)), [2, 1])
284 284 # TODO this is wrong
285 285 self.assertEqual(list(f.revs(3, 2)), [3, 2])
286 286
287 287 self.assertEqual(f.parents(node0), (nullid, nullid))
288 288 self.assertEqual(f.parents(node1), (node0, nullid))
289 289 self.assertEqual(f.parents(node2), (node1, nullid))
290 290
291 291 self.assertEqual(f.parentrevs(0), (nullrev, nullrev))
292 292 self.assertEqual(f.parentrevs(1), (0, nullrev))
293 293 self.assertEqual(f.parentrevs(2), (1, nullrev))
294 294
295 295 self.assertEqual(f.rev(node0), 0)
296 296 self.assertEqual(f.rev(node1), 1)
297 297 self.assertEqual(f.rev(node2), 2)
298 298
299 299 with self.assertRaises(error.LookupError):
300 300 f.rev(b'\x01' * 20)
301 301
302 302 self.assertEqual(f.node(0), node0)
303 303 self.assertEqual(f.node(1), node1)
304 304 self.assertEqual(f.node(2), node2)
305 305
306 306 with self.assertRaises(IndexError):
307 307 f.node(3)
308 308
309 309 self.assertEqual(f.lookup(node0), node0)
310 310 self.assertEqual(f.lookup(0), node0)
311 311 self.assertEqual(f.lookup(b'0'), node0)
312 312 self.assertEqual(f.lookup(hex(node0)), node0)
313 313
314 314 self.assertEqual(f.lookup(node1), node1)
315 315 self.assertEqual(f.lookup(1), node1)
316 316 self.assertEqual(f.lookup(b'1'), node1)
317 317 self.assertEqual(f.lookup(hex(node1)), node1)
318 318
319 319 self.assertEqual(f.linkrev(0), 0)
320 320 self.assertEqual(f.linkrev(1), 1)
321 321 self.assertEqual(f.linkrev(2), 3)
322 322
323 323 with self.assertRaises(IndexError):
324 324 f.linkrev(3)
325 325
326 326 self.assertFalse(f.iscensored(0))
327 327 self.assertFalse(f.iscensored(1))
328 328 self.assertFalse(f.iscensored(2))
329 329
330 330 with self.assertRaises(IndexError):
331 331 f.iscensored(3)
332 332
333 333 self.assertEqual(f.commonancestorsheads(node1, nullid), [])
334 334 self.assertEqual(f.commonancestorsheads(node1, node0), [node0])
335 335 self.assertEqual(f.commonancestorsheads(node1, node1), [node1])
336 336 self.assertEqual(f.commonancestorsheads(node0, node1), [node0])
337 337 self.assertEqual(f.commonancestorsheads(node1, node2), [node1])
338 338 self.assertEqual(f.commonancestorsheads(node2, node1), [node1])
339 339
340 340 self.assertEqual(list(f.descendants([0])), [1, 2])
341 341 self.assertEqual(list(f.descendants([1])), [2])
342 342 self.assertEqual(list(f.descendants([0, 1])), [1, 2])
343 343
344 344 self.assertEqual(f.heads(), [node2])
345 345 self.assertEqual(f.heads(node0), [node2])
346 346 self.assertEqual(f.heads(node1), [node2])
347 347 self.assertEqual(f.heads(node2), [node2])
348 348
349 349 # TODO this behavior seems wonky. Is it correct? If so, the
350 350 # docstring for heads() should be updated to reflect desired
351 351 # behavior.
352 352 self.assertEqual(f.heads(stop=[node1]), [node1, node2])
353 353 self.assertEqual(f.heads(stop=[node0]), [node0, node2])
354 354 self.assertEqual(f.heads(stop=[node1, node2]), [node1, node2])
355 355
356 356 with self.assertRaises(error.LookupError):
357 357 f.heads(stop=[b'\x01' * 20])
358 358
359 359 self.assertEqual(f.children(node0), [node1])
360 360 self.assertEqual(f.children(node1), [node2])
361 361 self.assertEqual(f.children(node2), [])
362 362
363 363 def testmultipleheads(self):
364 364 f = self._makefilefn()
365 365
366 366 with self._maketransactionfn() as tr:
367 367 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
368 368 node1 = f.add(b'1', None, tr, 1, node0, nullid)
369 369 node2 = f.add(b'2', None, tr, 2, node1, nullid)
370 370 node3 = f.add(b'3', None, tr, 3, node0, nullid)
371 371 node4 = f.add(b'4', None, tr, 4, node3, nullid)
372 372 node5 = f.add(b'5', None, tr, 5, node0, nullid)
373 373
374 374 self.assertEqual(len(f), 6)
375 375
376 376 self.assertEqual(list(f.descendants([0])), [1, 2, 3, 4, 5])
377 377 self.assertEqual(list(f.descendants([1])), [2])
378 378 self.assertEqual(list(f.descendants([2])), [])
379 379 self.assertEqual(list(f.descendants([3])), [4])
380 380 self.assertEqual(list(f.descendants([0, 1])), [1, 2, 3, 4, 5])
381 381 self.assertEqual(list(f.descendants([1, 3])), [2, 4])
382 382
383 383 self.assertEqual(f.heads(), [node2, node4, node5])
384 384 self.assertEqual(f.heads(node0), [node2, node4, node5])
385 385 self.assertEqual(f.heads(node1), [node2])
386 386 self.assertEqual(f.heads(node2), [node2])
387 387 self.assertEqual(f.heads(node3), [node4])
388 388 self.assertEqual(f.heads(node4), [node4])
389 389 self.assertEqual(f.heads(node5), [node5])
390 390
391 391 # TODO this seems wrong.
392 392 self.assertEqual(f.heads(stop=[node0]), [node0, node2, node4, node5])
393 393 self.assertEqual(f.heads(stop=[node1]), [node1, node2, node4, node5])
394 394
395 395 self.assertEqual(f.children(node0), [node1, node3, node5])
396 396 self.assertEqual(f.children(node1), [node2])
397 397 self.assertEqual(f.children(node2), [])
398 398 self.assertEqual(f.children(node3), [node4])
399 399 self.assertEqual(f.children(node4), [])
400 400 self.assertEqual(f.children(node5), [])
401 401
402 402
403 403 class ifiledatatests(basetestcase):
404 404 """Generic tests for the ifiledata interface.
405 405
406 406 All file storage backends for data should conform to the tests in this
407 407 class.
408 408
409 409 Use ``makeifiledatatests()`` to create an instance of this type.
410 410 """
411 411
412 412 def testempty(self):
413 413 f = self._makefilefn()
414 414
415 415 self.assertEqual(f.storageinfo(), {})
416 416 self.assertEqual(
417 417 f.storageinfo(revisionscount=True, trackedsize=True),
418 418 {b'revisionscount': 0, b'trackedsize': 0},
419 419 )
420 420
421 421 self.assertEqual(f.size(nullrev), 0)
422 422
423 423 for i in range(-5, 5):
424 424 if i == nullrev:
425 425 continue
426 426
427 427 with self.assertRaises(IndexError):
428 428 f.size(i)
429 429
430 430 self.assertEqual(f.revision(nullid), b'')
431 431 self.assertEqual(f.rawdata(nullid), b'')
432 432
433 433 with self.assertRaises(error.LookupError):
434 434 f.revision(b'\x01' * 20)
435 435
436 436 self.assertEqual(f.read(nullid), b'')
437 437
438 438 with self.assertRaises(error.LookupError):
439 439 f.read(b'\x01' * 20)
440 440
441 441 self.assertFalse(f.renamed(nullid))
442 442
443 443 with self.assertRaises(error.LookupError):
444 444 f.read(b'\x01' * 20)
445 445
446 446 self.assertTrue(f.cmp(nullid, b''))
447 447 self.assertTrue(f.cmp(nullid, b'foo'))
448 448
449 449 with self.assertRaises(error.LookupError):
450 450 f.cmp(b'\x01' * 20, b'irrelevant')
451 451
452 452 # Emitting empty list is an empty generator.
453 453 gen = f.emitrevisions([])
454 454 with self.assertRaises(StopIteration):
455 455 next(gen)
456 456
457 457 # Emitting null node yields nothing.
458 458 gen = f.emitrevisions([nullid])
459 459 with self.assertRaises(StopIteration):
460 460 next(gen)
461 461
462 462 # Requesting unknown node fails.
463 463 with self.assertRaises(error.LookupError):
464 464 list(f.emitrevisions([b'\x01' * 20]))
465 465
466 466 def testsinglerevision(self):
467 467 fulltext = b'initial'
468 468
469 469 f = self._makefilefn()
470 470 with self._maketransactionfn() as tr:
471 471 node = f.add(fulltext, None, tr, 0, nullid, nullid)
472 472
473 473 self.assertEqual(f.storageinfo(), {})
474 474 self.assertEqual(
475 475 f.storageinfo(revisionscount=True, trackedsize=True),
476 476 {b'revisionscount': 1, b'trackedsize': len(fulltext)},
477 477 )
478 478
479 479 self.assertEqual(f.size(0), len(fulltext))
480 480
481 481 with self.assertRaises(IndexError):
482 482 f.size(1)
483 483
484 484 self.assertEqual(f.revision(node), fulltext)
485 485 self.assertEqual(f.rawdata(node), fulltext)
486 486
487 487 self.assertEqual(f.read(node), fulltext)
488 488
489 489 self.assertFalse(f.renamed(node))
490 490
491 491 self.assertFalse(f.cmp(node, fulltext))
492 492 self.assertTrue(f.cmp(node, fulltext + b'extra'))
493 493
494 494 # Emitting a single revision works.
495 495 gen = f.emitrevisions([node])
496 496 rev = next(gen)
497 497
498 498 self.assertEqual(rev.node, node)
499 499 self.assertEqual(rev.p1node, nullid)
500 500 self.assertEqual(rev.p2node, nullid)
501 501 self.assertIsNone(rev.linknode)
502 502 self.assertEqual(rev.basenode, nullid)
503 503 self.assertIsNone(rev.baserevisionsize)
504 504 self.assertIsNone(rev.revision)
505 505 self.assertIsNone(rev.delta)
506 506
507 507 with self.assertRaises(StopIteration):
508 508 next(gen)
509 509
510 510 # Requesting revision data works.
511 511 gen = f.emitrevisions([node], revisiondata=True)
512 512 rev = next(gen)
513 513
514 514 self.assertEqual(rev.node, node)
515 515 self.assertEqual(rev.p1node, nullid)
516 516 self.assertEqual(rev.p2node, nullid)
517 517 self.assertIsNone(rev.linknode)
518 518 self.assertEqual(rev.basenode, nullid)
519 519 self.assertIsNone(rev.baserevisionsize)
520 520 self.assertEqual(rev.revision, fulltext)
521 521 self.assertIsNone(rev.delta)
522 522
523 523 with self.assertRaises(StopIteration):
524 524 next(gen)
525 525
526 526 # Emitting an unknown node after a known revision results in error.
527 527 with self.assertRaises(error.LookupError):
528 528 list(f.emitrevisions([node, b'\x01' * 20]))
529 529
530 530 def testmultiplerevisions(self):
531 531 fulltext0 = b'x' * 1024
532 532 fulltext1 = fulltext0 + b'y'
533 533 fulltext2 = b'y' + fulltext0 + b'z'
534 534
535 535 f = self._makefilefn()
536 536 with self._maketransactionfn() as tr:
537 537 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
538 538 node1 = f.add(fulltext1, None, tr, 1, node0, nullid)
539 539 node2 = f.add(fulltext2, None, tr, 3, node1, nullid)
540 540
541 541 self.assertEqual(f.storageinfo(), {})
542 542 self.assertEqual(
543 543 f.storageinfo(revisionscount=True, trackedsize=True),
544 544 {
545 545 b'revisionscount': 3,
546 546 b'trackedsize': len(fulltext0)
547 547 + len(fulltext1)
548 548 + len(fulltext2),
549 549 },
550 550 )
551 551
552 552 self.assertEqual(f.size(0), len(fulltext0))
553 553 self.assertEqual(f.size(1), len(fulltext1))
554 554 self.assertEqual(f.size(2), len(fulltext2))
555 555
556 556 with self.assertRaises(IndexError):
557 557 f.size(3)
558 558
559 559 self.assertEqual(f.revision(node0), fulltext0)
560 560 self.assertEqual(f.rawdata(node0), fulltext0)
561 561 self.assertEqual(f.revision(node1), fulltext1)
562 562 self.assertEqual(f.rawdata(node1), fulltext1)
563 563 self.assertEqual(f.revision(node2), fulltext2)
564 564 self.assertEqual(f.rawdata(node2), fulltext2)
565 565
566 566 with self.assertRaises(error.LookupError):
567 567 f.revision(b'\x01' * 20)
568 568
569 569 self.assertEqual(f.read(node0), fulltext0)
570 570 self.assertEqual(f.read(node1), fulltext1)
571 571 self.assertEqual(f.read(node2), fulltext2)
572 572
573 573 with self.assertRaises(error.LookupError):
574 574 f.read(b'\x01' * 20)
575 575
576 576 self.assertFalse(f.renamed(node0))
577 577 self.assertFalse(f.renamed(node1))
578 578 self.assertFalse(f.renamed(node2))
579 579
580 580 with self.assertRaises(error.LookupError):
581 581 f.renamed(b'\x01' * 20)
582 582
583 583 self.assertFalse(f.cmp(node0, fulltext0))
584 584 self.assertFalse(f.cmp(node1, fulltext1))
585 585 self.assertFalse(f.cmp(node2, fulltext2))
586 586
587 587 self.assertTrue(f.cmp(node1, fulltext0))
588 588 self.assertTrue(f.cmp(node2, fulltext1))
589 589
590 590 with self.assertRaises(error.LookupError):
591 591 f.cmp(b'\x01' * 20, b'irrelevant')
592 592
593 593 # Nodes should be emitted in order.
594 594 gen = f.emitrevisions([node0, node1, node2], revisiondata=True)
595 595
596 596 rev = next(gen)
597 597
598 598 self.assertEqual(rev.node, node0)
599 599 self.assertEqual(rev.p1node, nullid)
600 600 self.assertEqual(rev.p2node, nullid)
601 601 self.assertIsNone(rev.linknode)
602 602 self.assertEqual(rev.basenode, nullid)
603 603 self.assertIsNone(rev.baserevisionsize)
604 604 self.assertEqual(rev.revision, fulltext0)
605 605 self.assertIsNone(rev.delta)
606 606
607 607 rev = next(gen)
608 608
609 609 self.assertEqual(rev.node, node1)
610 610 self.assertEqual(rev.p1node, node0)
611 611 self.assertEqual(rev.p2node, nullid)
612 612 self.assertIsNone(rev.linknode)
613 613 self.assertEqual(rev.basenode, node0)
614 614 self.assertIsNone(rev.baserevisionsize)
615 615 self.assertIsNone(rev.revision)
616 616 self.assertEqual(
617 617 rev.delta,
618 618 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
619 619 )
620 620
621 621 rev = next(gen)
622 622
623 623 self.assertEqual(rev.node, node2)
624 624 self.assertEqual(rev.p1node, node1)
625 625 self.assertEqual(rev.p2node, nullid)
626 626 self.assertIsNone(rev.linknode)
627 627 self.assertEqual(rev.basenode, node1)
628 628 self.assertIsNone(rev.baserevisionsize)
629 629 self.assertIsNone(rev.revision)
630 630 self.assertEqual(
631 631 rev.delta,
632 632 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
633 633 )
634 634
635 635 with self.assertRaises(StopIteration):
636 636 next(gen)
637 637
638 638 # Request not in DAG order is reordered to be in DAG order.
639 639 gen = f.emitrevisions([node2, node1, node0], revisiondata=True)
640 640
641 641 rev = next(gen)
642 642
643 643 self.assertEqual(rev.node, node0)
644 644 self.assertEqual(rev.p1node, nullid)
645 645 self.assertEqual(rev.p2node, nullid)
646 646 self.assertIsNone(rev.linknode)
647 647 self.assertEqual(rev.basenode, nullid)
648 648 self.assertIsNone(rev.baserevisionsize)
649 649 self.assertEqual(rev.revision, fulltext0)
650 650 self.assertIsNone(rev.delta)
651 651
652 652 rev = next(gen)
653 653
654 654 self.assertEqual(rev.node, node1)
655 655 self.assertEqual(rev.p1node, node0)
656 656 self.assertEqual(rev.p2node, nullid)
657 657 self.assertIsNone(rev.linknode)
658 658 self.assertEqual(rev.basenode, node0)
659 659 self.assertIsNone(rev.baserevisionsize)
660 660 self.assertIsNone(rev.revision)
661 661 self.assertEqual(
662 662 rev.delta,
663 663 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
664 664 )
665 665
666 666 rev = next(gen)
667 667
668 668 self.assertEqual(rev.node, node2)
669 669 self.assertEqual(rev.p1node, node1)
670 670 self.assertEqual(rev.p2node, nullid)
671 671 self.assertIsNone(rev.linknode)
672 672 self.assertEqual(rev.basenode, node1)
673 673 self.assertIsNone(rev.baserevisionsize)
674 674 self.assertIsNone(rev.revision)
675 675 self.assertEqual(
676 676 rev.delta,
677 677 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
678 678 )
679 679
680 680 with self.assertRaises(StopIteration):
681 681 next(gen)
682 682
683 683 # Unrecognized nodesorder value raises ProgrammingError.
684 684 with self.assertRaises(error.ProgrammingError):
685 685 list(f.emitrevisions([], nodesorder=b'bad'))
686 686
687 687 # nodesorder=storage is recognized. But we can't test it thoroughly
688 688 # because behavior is storage-dependent.
689 689 res = list(
690 690 f.emitrevisions([node2, node1, node0], nodesorder=b'storage')
691 691 )
692 692 self.assertEqual(len(res), 3)
693 693 self.assertEqual({o.node for o in res}, {node0, node1, node2})
694 694
695 695 # nodesorder=nodes forces the order.
696 696 gen = f.emitrevisions(
697 697 [node2, node0], nodesorder=b'nodes', revisiondata=True
698 698 )
699 699
700 700 rev = next(gen)
701 701 self.assertEqual(rev.node, node2)
702 702 self.assertEqual(rev.p1node, node1)
703 703 self.assertEqual(rev.p2node, nullid)
704 704 self.assertEqual(rev.basenode, nullid)
705 705 self.assertIsNone(rev.baserevisionsize)
706 706 self.assertEqual(rev.revision, fulltext2)
707 707 self.assertIsNone(rev.delta)
708 708
709 709 rev = next(gen)
710 710 self.assertEqual(rev.node, node0)
711 711 self.assertEqual(rev.p1node, nullid)
712 712 self.assertEqual(rev.p2node, nullid)
713 713 # Delta behavior is storage dependent, so we can't easily test it.
714 714
715 715 with self.assertRaises(StopIteration):
716 716 next(gen)
717 717
718 718 # assumehaveparentrevisions=False (the default) won't send a delta for
719 719 # the first revision.
720 720 gen = f.emitrevisions({node2, node1}, revisiondata=True)
721 721
722 722 rev = next(gen)
723 723 self.assertEqual(rev.node, node1)
724 724 self.assertEqual(rev.p1node, node0)
725 725 self.assertEqual(rev.p2node, nullid)
726 726 self.assertEqual(rev.basenode, nullid)
727 727 self.assertIsNone(rev.baserevisionsize)
728 728 self.assertEqual(rev.revision, fulltext1)
729 729 self.assertIsNone(rev.delta)
730 730
731 731 rev = next(gen)
732 732 self.assertEqual(rev.node, node2)
733 733 self.assertEqual(rev.p1node, node1)
734 734 self.assertEqual(rev.p2node, nullid)
735 735 self.assertEqual(rev.basenode, node1)
736 736 self.assertIsNone(rev.baserevisionsize)
737 737 self.assertIsNone(rev.revision)
738 738 self.assertEqual(
739 739 rev.delta,
740 740 b'\x00\x00\x00\x00\x00\x00\x04\x01\x00\x00\x04\x02' + fulltext2,
741 741 )
742 742
743 743 with self.assertRaises(StopIteration):
744 744 next(gen)
745 745
746 746 # assumehaveparentrevisions=True allows delta against initial revision.
747 747 gen = f.emitrevisions(
748 748 [node2, node1], revisiondata=True, assumehaveparentrevisions=True
749 749 )
750 750
751 751 rev = next(gen)
752 752 self.assertEqual(rev.node, node1)
753 753 self.assertEqual(rev.p1node, node0)
754 754 self.assertEqual(rev.p2node, nullid)
755 755 self.assertEqual(rev.basenode, node0)
756 756 self.assertIsNone(rev.baserevisionsize)
757 757 self.assertIsNone(rev.revision)
758 758 self.assertEqual(
759 759 rev.delta,
760 760 b'\x00\x00\x00\x00\x00\x00\x04\x00\x00\x00\x04\x01' + fulltext1,
761 761 )
762 762
763 763 # forceprevious=True forces a delta against the previous revision.
764 764 # Special case for initial revision.
765 765 gen = f.emitrevisions(
766 766 [node0], revisiondata=True, deltamode=repository.CG_DELTAMODE_PREV
767 767 )
768 768
769 769 rev = next(gen)
770 770 self.assertEqual(rev.node, node0)
771 771 self.assertEqual(rev.p1node, nullid)
772 772 self.assertEqual(rev.p2node, nullid)
773 773 self.assertEqual(rev.basenode, nullid)
774 774 self.assertIsNone(rev.baserevisionsize)
775 775 self.assertIsNone(rev.revision)
776 776 self.assertEqual(
777 777 rev.delta,
778 778 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
779 779 )
780 780
781 781 with self.assertRaises(StopIteration):
782 782 next(gen)
783 783
784 784 gen = f.emitrevisions(
785 785 [node0, node2],
786 786 revisiondata=True,
787 787 deltamode=repository.CG_DELTAMODE_PREV,
788 788 )
789 789
790 790 rev = next(gen)
791 791 self.assertEqual(rev.node, node0)
792 792 self.assertEqual(rev.p1node, nullid)
793 793 self.assertEqual(rev.p2node, nullid)
794 794 self.assertEqual(rev.basenode, nullid)
795 795 self.assertIsNone(rev.baserevisionsize)
796 796 self.assertIsNone(rev.revision)
797 797 self.assertEqual(
798 798 rev.delta,
799 799 b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x04\x00' + fulltext0,
800 800 )
801 801
802 802 rev = next(gen)
803 803 self.assertEqual(rev.node, node2)
804 804 self.assertEqual(rev.p1node, node1)
805 805 self.assertEqual(rev.p2node, nullid)
806 806 self.assertEqual(rev.basenode, node0)
807 807
808 808 with self.assertRaises(StopIteration):
809 809 next(gen)
810 810
811 811 def testrenamed(self):
812 812 fulltext0 = b'foo'
813 813 fulltext1 = b'bar'
814 814 fulltext2 = b'baz'
815 815
816 816 meta1 = {
817 817 b'copy': b'source0',
818 818 b'copyrev': b'a' * 40,
819 819 }
820 820
821 821 meta2 = {
822 822 b'copy': b'source1',
823 823 b'copyrev': b'b' * 40,
824 824 }
825 825
826 826 stored1 = b''.join(
827 827 [
828 828 b'\x01\ncopy: source0\n',
829 829 b'copyrev: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa\n\x01\n',
830 830 fulltext1,
831 831 ]
832 832 )
833 833
834 834 stored2 = b''.join(
835 835 [
836 836 b'\x01\ncopy: source1\n',
837 837 b'copyrev: bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb\n\x01\n',
838 838 fulltext2,
839 839 ]
840 840 )
841 841
842 842 f = self._makefilefn()
843 843 with self._maketransactionfn() as tr:
844 844 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
845 845 node1 = f.add(fulltext1, meta1, tr, 1, node0, nullid)
846 846 node2 = f.add(fulltext2, meta2, tr, 2, nullid, nullid)
847 847
848 848 # Metadata header isn't recognized when parent isn't nullid.
849 849 self.assertEqual(f.size(1), len(stored1))
850 850 self.assertEqual(f.size(2), len(fulltext2))
851 851
852 852 self.assertEqual(f.revision(node1), stored1)
853 853 self.assertEqual(f.rawdata(node1), stored1)
854 854 self.assertEqual(f.revision(node2), stored2)
855 855 self.assertEqual(f.rawdata(node2), stored2)
856 856
857 857 self.assertEqual(f.read(node1), fulltext1)
858 858 self.assertEqual(f.read(node2), fulltext2)
859 859
860 860 # Returns False when first parent is set.
861 861 self.assertFalse(f.renamed(node1))
862 862 self.assertEqual(f.renamed(node2), (b'source1', b'\xbb' * 20))
863 863
864 864 self.assertTrue(f.cmp(node1, fulltext1))
865 865 self.assertTrue(f.cmp(node1, stored1))
866 866 self.assertFalse(f.cmp(node2, fulltext2))
867 867 self.assertTrue(f.cmp(node2, stored2))
868 868
869 869 def testmetadataprefix(self):
870 870 # Content with metadata prefix has extra prefix inserted in storage.
871 871 fulltext0 = b'\x01\nfoo'
872 872 stored0 = b'\x01\n\x01\n\x01\nfoo'
873 873
874 874 fulltext1 = b'\x01\nbar'
875 875 meta1 = {
876 876 b'copy': b'source0',
877 877 b'copyrev': b'b' * 40,
878 878 }
879 879 stored1 = b''.join(
880 880 [
881 881 b'\x01\ncopy: source0\n',
882 882 b'copyrev: %s\n' % (b'b' * 40),
883 883 b'\x01\n\x01\nbar',
884 884 ]
885 885 )
886 886
887 887 f = self._makefilefn()
888 888 with self._maketransactionfn() as tr:
889 889 node0 = f.add(fulltext0, {}, tr, 0, nullid, nullid)
890 890 node1 = f.add(fulltext1, meta1, tr, 1, nullid, nullid)
891 891
892 892 # TODO this is buggy.
893 893 self.assertEqual(f.size(0), len(fulltext0) + 4)
894 894
895 895 self.assertEqual(f.size(1), len(fulltext1))
896 896
897 897 self.assertEqual(f.revision(node0), stored0)
898 898 self.assertEqual(f.rawdata(node0), stored0)
899 899
900 900 self.assertEqual(f.revision(node1), stored1)
901 901 self.assertEqual(f.rawdata(node1), stored1)
902 902
903 903 self.assertEqual(f.read(node0), fulltext0)
904 904 self.assertEqual(f.read(node1), fulltext1)
905 905
906 906 self.assertFalse(f.cmp(node0, fulltext0))
907 907 self.assertTrue(f.cmp(node0, stored0))
908 908
909 909 self.assertFalse(f.cmp(node1, fulltext1))
910 910 self.assertTrue(f.cmp(node1, stored0))
911 911
912 912 def testbadnoderead(self):
913 913 f = self._makefilefn()
914 914
915 915 fulltext0 = b'foo\n' * 30
916 916 fulltext1 = fulltext0 + b'bar\n'
917 917
918 918 with self._maketransactionfn() as tr:
919 919 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
920 920 node1 = b'\xaa' * 20
921 921
922 922 self._addrawrevisionfn(
923 923 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
924 924 )
925 925
926 926 self.assertEqual(len(f), 2)
927 927 self.assertEqual(f.parents(node1), (node0, nullid))
928 928
929 929 # revision() raises since it performs hash verification.
930 930 with self.assertRaises(error.StorageError):
931 931 f.revision(node1)
932 932
933 933 # rawdata() still verifies because there are no special storage
934 934 # settings.
935 935 with self.assertRaises(error.StorageError):
936 936 f.rawdata(node1)
937 937
938 938 # read() behaves like revision().
939 939 with self.assertRaises(error.StorageError):
940 940 f.read(node1)
941 941
942 942 # We can't test renamed() here because some backends may not require
943 943 # reading/validating the fulltext to return rename metadata.
944 944
945 945 def testbadnoderevisionraw(self):
946 946 # Like above except we test rawdata() first to isolate
947 947 # revision caching behavior.
948 948 f = self._makefilefn()
949 949
950 950 fulltext0 = b'foo\n' * 30
951 951 fulltext1 = fulltext0 + b'bar\n'
952 952
953 953 with self._maketransactionfn() as tr:
954 954 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
955 955 node1 = b'\xaa' * 20
956 956
957 957 self._addrawrevisionfn(
958 958 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
959 959 )
960 960
961 961 with self.assertRaises(error.StorageError):
962 962 f.rawdata(node1)
963 963
964 964 with self.assertRaises(error.StorageError):
965 965 f.rawdata(node1)
966 966
967 967 def testbadnoderevision(self):
968 968 # Like above except we test read() first to isolate revision caching
969 969 # behavior.
970 970 f = self._makefilefn()
971 971
972 972 fulltext0 = b'foo\n' * 30
973 973 fulltext1 = fulltext0 + b'bar\n'
974 974
975 975 with self._maketransactionfn() as tr:
976 976 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
977 977 node1 = b'\xaa' * 20
978 978
979 979 self._addrawrevisionfn(
980 980 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
981 981 )
982 982
983 983 with self.assertRaises(error.StorageError):
984 984 f.read(node1)
985 985
986 986 with self.assertRaises(error.StorageError):
987 987 f.read(node1)
988 988
989 989 def testbadnodedelta(self):
990 990 f = self._makefilefn()
991 991
992 992 fulltext0 = b'foo\n' * 31
993 993 fulltext1 = fulltext0 + b'bar\n'
994 994 fulltext2 = fulltext1 + b'baz\n'
995 995
996 996 with self._maketransactionfn() as tr:
997 997 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
998 998 node1 = b'\xaa' * 20
999 999
1000 1000 self._addrawrevisionfn(
1001 1001 f, tr, node1, node0, nullid, 1, rawtext=fulltext1
1002 1002 )
1003 1003
1004 1004 with self.assertRaises(error.StorageError):
1005 1005 f.read(node1)
1006 1006
1007 1007 node2 = storageutil.hashrevisionsha1(fulltext2, node1, nullid)
1008 1008
1009 1009 with self._maketransactionfn() as tr:
1010 1010 delta = mdiff.textdiff(fulltext1, fulltext2)
1011 1011 self._addrawrevisionfn(
1012 1012 f, tr, node2, node1, nullid, 2, delta=(1, delta)
1013 1013 )
1014 1014
1015 1015 self.assertEqual(len(f), 3)
1016 1016
1017 1017 # Assuming a delta is stored, we shouldn't need to validate node1 in
1018 1018 # order to retrieve node2.
1019 1019 self.assertEqual(f.read(node2), fulltext2)
1020 1020
1021 1021 def testcensored(self):
1022 1022 f = self._makefilefn()
1023 1023
1024 1024 stored1 = storageutil.packmeta(
1025 1025 {
1026 1026 b'censored': b'tombstone',
1027 1027 },
1028 1028 b'',
1029 1029 )
1030 1030
1031 1031 with self._maketransactionfn() as tr:
1032 1032 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1033 1033
1034 1034 # The node value doesn't matter since we can't verify it.
1035 1035 node1 = b'\xbb' * 20
1036 1036
1037 1037 self._addrawrevisionfn(
1038 1038 f, tr, node1, node0, nullid, 1, stored1, censored=True
1039 1039 )
1040 1040
1041 1041 self.assertTrue(f.iscensored(1))
1042 1042
1043 1043 with self.assertRaises(error.CensoredNodeError):
1044 1044 f.revision(1)
1045 1045
1046 1046 with self.assertRaises(error.CensoredNodeError):
1047 1047 f.rawdata(1)
1048 1048
1049 1049 with self.assertRaises(error.CensoredNodeError):
1050 1050 f.read(1)
1051 1051
1052 1052 def testcensoredrawrevision(self):
1053 1053 # Like above, except we do the rawdata() request first to
1054 1054 # isolate revision caching behavior.
1055 1055
1056 1056 f = self._makefilefn()
1057 1057
1058 1058 stored1 = storageutil.packmeta(
1059 1059 {
1060 1060 b'censored': b'tombstone',
1061 1061 },
1062 1062 b'',
1063 1063 )
1064 1064
1065 1065 with self._maketransactionfn() as tr:
1066 1066 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1067 1067
1068 1068 # The node value doesn't matter since we can't verify it.
1069 1069 node1 = b'\xbb' * 20
1070 1070
1071 1071 self._addrawrevisionfn(
1072 1072 f, tr, node1, node0, nullid, 1, stored1, censored=True
1073 1073 )
1074 1074
1075 1075 with self.assertRaises(error.CensoredNodeError):
1076 1076 f.rawdata(1)
1077 1077
1078 1078
1079 1079 class ifilemutationtests(basetestcase):
1080 1080 """Generic tests for the ifilemutation interface.
1081 1081
1082 1082 All file storage backends that support writing should conform to this
1083 1083 interface.
1084 1084
1085 1085 Use ``makeifilemutationtests()`` to create an instance of this type.
1086 1086 """
1087 1087
1088 1088 def testaddnoop(self):
1089 1089 f = self._makefilefn()
1090 1090 with self._maketransactionfn() as tr:
1091 1091 node0 = f.add(b'foo', None, tr, 0, nullid, nullid)
1092 1092 node1 = f.add(b'foo', None, tr, 0, nullid, nullid)
1093 1093 # Varying by linkrev shouldn't impact hash.
1094 1094 node2 = f.add(b'foo', None, tr, 1, nullid, nullid)
1095 1095
1096 1096 self.assertEqual(node1, node0)
1097 1097 self.assertEqual(node2, node0)
1098 1098 self.assertEqual(len(f), 1)
1099 1099
1100 1100 def testaddrevisionbadnode(self):
1101 1101 f = self._makefilefn()
1102 1102 with self._maketransactionfn() as tr:
1103 1103 # Adding a revision with bad node value fails.
1104 1104 with self.assertRaises(error.StorageError):
1105 1105 f.addrevision(b'foo', tr, 0, nullid, nullid, node=b'\x01' * 20)
1106 1106
1107 1107 def testaddrevisionunknownflag(self):
1108 1108 f = self._makefilefn()
1109 1109 with self._maketransactionfn() as tr:
1110 1110 for i in range(15, 0, -1):
1111 1111 if (1 << i) & ~repository.REVISION_FLAGS_KNOWN:
1112 1112 flags = 1 << i
1113 1113 break
1114 1114
1115 1115 with self.assertRaises(error.StorageError):
1116 1116 f.addrevision(b'foo', tr, 0, nullid, nullid, flags=flags)
1117 1117
1118 1118 def testaddgroupsimple(self):
1119 1119 f = self._makefilefn()
1120 1120
1121 1121 callbackargs = []
1122 1122
1123 1123 def cb(*args, **kwargs):
1124 1124 callbackargs.append((args, kwargs))
1125 1125
1126 1126 def linkmapper(node):
1127 1127 return 0
1128 1128
1129 1129 with self._maketransactionfn() as tr:
1130 1130 nodes = []
1131 1131
1132 1132 def onchangeset(cl, rev):
1133 1133 node = cl.node(rev)
1134 1134 nodes.append(node)
1135 1135 cb(cl, node)
1136 1136
1137 1137 def ondupchangeset(cl, rev):
1138 1138 nodes.append(cl.node(rev))
1139 1139
1140 1140 f.addgroup(
1141 1141 [],
1142 1142 None,
1143 1143 tr,
1144 1144 addrevisioncb=onchangeset,
1145 1145 duplicaterevisioncb=ondupchangeset,
1146 1146 )
1147 1147
1148 1148 self.assertEqual(nodes, [])
1149 1149 self.assertEqual(callbackargs, [])
1150 1150 self.assertEqual(len(f), 0)
1151 1151
1152 1152 fulltext0 = b'foo'
1153 1153 delta0 = mdiff.trivialdiffheader(len(fulltext0)) + fulltext0
1154 1154
1155 1155 with self._maketransactionfn() as tr:
1156 1156 node0 = f.add(fulltext0, None, tr, 0, nullid, nullid)
1157 1157
1158 1158 f = self._makefilefn()
1159 1159
1160 1160 deltas = [
1161 (node0, nullid, nullid, nullid, nullid, delta0, 0),
1161 (node0, nullid, nullid, nullid, nullid, delta0, 0, {}),
1162 1162 ]
1163 1163
1164 1164 with self._maketransactionfn() as tr:
1165 1165 nodes = []
1166 1166
1167 1167 def onchangeset(cl, rev):
1168 1168 node = cl.node(rev)
1169 1169 nodes.append(node)
1170 1170 cb(cl, node)
1171 1171
1172 1172 def ondupchangeset(cl, rev):
1173 1173 nodes.append(cl.node(rev))
1174 1174
1175 1175 f.addgroup(
1176 1176 deltas,
1177 1177 linkmapper,
1178 1178 tr,
1179 1179 addrevisioncb=onchangeset,
1180 1180 duplicaterevisioncb=ondupchangeset,
1181 1181 )
1182 1182
1183 1183 self.assertEqual(
1184 1184 nodes,
1185 1185 [
1186 1186 b'\x49\xd8\xcb\xb1\x5c\xe2\x57\x92\x04\x47'
1187 1187 b'\x00\x6b\x46\x97\x8b\x7a\xf9\x80\xa9\x79'
1188 1188 ],
1189 1189 )
1190 1190
1191 1191 self.assertEqual(len(callbackargs), 1)
1192 1192 self.assertEqual(callbackargs[0][0][1], nodes[0])
1193 1193
1194 1194 self.assertEqual(list(f.revs()), [0])
1195 1195 self.assertEqual(f.rev(nodes[0]), 0)
1196 1196 self.assertEqual(f.node(0), nodes[0])
1197 1197
1198 1198 def testaddgroupmultiple(self):
1199 1199 f = self._makefilefn()
1200 1200
1201 1201 fulltexts = [
1202 1202 b'foo',
1203 1203 b'bar',
1204 1204 b'x' * 1024,
1205 1205 ]
1206 1206
1207 1207 nodes = []
1208 1208 with self._maketransactionfn() as tr:
1209 1209 for fulltext in fulltexts:
1210 1210 nodes.append(f.add(fulltext, None, tr, 0, nullid, nullid))
1211 1211
1212 1212 f = self._makefilefn()
1213 1213 deltas = []
1214 1214 for i, fulltext in enumerate(fulltexts):
1215 1215 delta = mdiff.trivialdiffheader(len(fulltext)) + fulltext
1216 1216
1217 deltas.append((nodes[i], nullid, nullid, nullid, nullid, delta, 0))
1217 deltas.append(
1218 (nodes[i], nullid, nullid, nullid, nullid, delta, 0, {})
1219 )
1218 1220
1219 1221 with self._maketransactionfn() as tr:
1220 1222 newnodes = []
1221 1223
1222 1224 def onchangeset(cl, rev):
1223 1225 newnodes.append(cl.node(rev))
1224 1226
1225 1227 f.addgroup(
1226 1228 deltas,
1227 1229 lambda x: 0,
1228 1230 tr,
1229 1231 addrevisioncb=onchangeset,
1230 1232 duplicaterevisioncb=onchangeset,
1231 1233 )
1232 1234 self.assertEqual(newnodes, nodes)
1233 1235
1234 1236 self.assertEqual(len(f), len(deltas))
1235 1237 self.assertEqual(list(f.revs()), [0, 1, 2])
1236 1238 self.assertEqual(f.rev(nodes[0]), 0)
1237 1239 self.assertEqual(f.rev(nodes[1]), 1)
1238 1240 self.assertEqual(f.rev(nodes[2]), 2)
1239 1241 self.assertEqual(f.node(0), nodes[0])
1240 1242 self.assertEqual(f.node(1), nodes[1])
1241 1243 self.assertEqual(f.node(2), nodes[2])
1242 1244
1243 1245 def testdeltaagainstcensored(self):
1244 1246 # Attempt to apply a delta made against a censored revision.
1245 1247 f = self._makefilefn()
1246 1248
1247 1249 stored1 = storageutil.packmeta(
1248 1250 {
1249 1251 b'censored': b'tombstone',
1250 1252 },
1251 1253 b'',
1252 1254 )
1253 1255
1254 1256 with self._maketransactionfn() as tr:
1255 1257 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1256 1258
1257 1259 # The node value doesn't matter since we can't verify it.
1258 1260 node1 = b'\xbb' * 20
1259 1261
1260 1262 self._addrawrevisionfn(
1261 1263 f, tr, node1, node0, nullid, 1, stored1, censored=True
1262 1264 )
1263 1265
1264 1266 delta = mdiff.textdiff(b'bar\n' * 30, (b'bar\n' * 30) + b'baz\n')
1265 deltas = [(b'\xcc' * 20, node1, nullid, b'\x01' * 20, node1, delta, 0)]
1267 deltas = [
1268 (b'\xcc' * 20, node1, nullid, b'\x01' * 20, node1, delta, 0, {})
1269 ]
1266 1270
1267 1271 with self._maketransactionfn() as tr:
1268 1272 with self.assertRaises(error.CensoredBaseError):
1269 1273 f.addgroup(deltas, lambda x: 0, tr)
1270 1274
1271 1275 def testcensorrevisionbasic(self):
1272 1276 f = self._makefilefn()
1273 1277
1274 1278 with self._maketransactionfn() as tr:
1275 1279 node0 = f.add(b'foo\n' * 30, None, tr, 0, nullid, nullid)
1276 1280 node1 = f.add(b'foo\n' * 31, None, tr, 1, node0, nullid)
1277 1281 node2 = f.add(b'foo\n' * 32, None, tr, 2, node1, nullid)
1278 1282
1279 1283 with self._maketransactionfn() as tr:
1280 1284 f.censorrevision(tr, node1)
1281 1285
1282 1286 self.assertEqual(len(f), 3)
1283 1287 self.assertEqual(list(f.revs()), [0, 1, 2])
1284 1288
1285 1289 self.assertEqual(f.read(node0), b'foo\n' * 30)
1286 1290 self.assertEqual(f.read(node2), b'foo\n' * 32)
1287 1291
1288 1292 with self.assertRaises(error.CensoredNodeError):
1289 1293 f.read(node1)
1290 1294
1291 1295 def testgetstrippointnoparents(self):
1292 1296 # N revisions where none have parents.
1293 1297 f = self._makefilefn()
1294 1298
1295 1299 with self._maketransactionfn() as tr:
1296 1300 for rev in range(10):
1297 1301 f.add(b'%d' % rev, None, tr, rev, nullid, nullid)
1298 1302
1299 1303 for rev in range(10):
1300 1304 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1301 1305
1302 1306 def testgetstrippointlinear(self):
1303 1307 # N revisions in a linear chain.
1304 1308 f = self._makefilefn()
1305 1309
1306 1310 with self._maketransactionfn() as tr:
1307 1311 p1 = nullid
1308 1312
1309 1313 for rev in range(10):
1310 1314 f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1311 1315
1312 1316 for rev in range(10):
1313 1317 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1314 1318
1315 1319 def testgetstrippointmultipleheads(self):
1316 1320 f = self._makefilefn()
1317 1321
1318 1322 with self._maketransactionfn() as tr:
1319 1323 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1320 1324 node1 = f.add(b'1', None, tr, 1, node0, nullid)
1321 1325 f.add(b'2', None, tr, 2, node1, nullid)
1322 1326 f.add(b'3', None, tr, 3, node0, nullid)
1323 1327 f.add(b'4', None, tr, 4, node0, nullid)
1324 1328
1325 1329 for rev in range(5):
1326 1330 self.assertEqual(f.getstrippoint(rev), (rev, set()))
1327 1331
1328 1332 def testgetstrippointearlierlinkrevs(self):
1329 1333 f = self._makefilefn()
1330 1334
1331 1335 with self._maketransactionfn() as tr:
1332 1336 node0 = f.add(b'0', None, tr, 0, nullid, nullid)
1333 1337 f.add(b'1', None, tr, 10, node0, nullid)
1334 1338 f.add(b'2', None, tr, 5, node0, nullid)
1335 1339
1336 1340 self.assertEqual(f.getstrippoint(0), (0, set()))
1337 1341 self.assertEqual(f.getstrippoint(1), (1, set()))
1338 1342 self.assertEqual(f.getstrippoint(2), (1, set()))
1339 1343 self.assertEqual(f.getstrippoint(3), (1, set()))
1340 1344 self.assertEqual(f.getstrippoint(4), (1, set()))
1341 1345 self.assertEqual(f.getstrippoint(5), (1, set()))
1342 1346 self.assertEqual(f.getstrippoint(6), (1, {2}))
1343 1347 self.assertEqual(f.getstrippoint(7), (1, {2}))
1344 1348 self.assertEqual(f.getstrippoint(8), (1, {2}))
1345 1349 self.assertEqual(f.getstrippoint(9), (1, {2}))
1346 1350 self.assertEqual(f.getstrippoint(10), (1, {2}))
1347 1351 self.assertEqual(f.getstrippoint(11), (3, set()))
1348 1352
1349 1353 def teststripempty(self):
1350 1354 f = self._makefilefn()
1351 1355
1352 1356 with self._maketransactionfn() as tr:
1353 1357 f.strip(0, tr)
1354 1358
1355 1359 self.assertEqual(len(f), 0)
1356 1360
1357 1361 def teststripall(self):
1358 1362 f = self._makefilefn()
1359 1363
1360 1364 with self._maketransactionfn() as tr:
1361 1365 p1 = nullid
1362 1366 for rev in range(10):
1363 1367 p1 = f.add(b'%d' % rev, None, tr, rev, p1, nullid)
1364 1368
1365 1369 self.assertEqual(len(f), 10)
1366 1370
1367 1371 with self._maketransactionfn() as tr:
1368 1372 f.strip(0, tr)
1369 1373
1370 1374 self.assertEqual(len(f), 0)
1371 1375
1372 1376 def teststrippartial(self):
1373 1377 f = self._makefilefn()
1374 1378
1375 1379 with self._maketransactionfn() as tr:
1376 1380 f.add(b'0', None, tr, 0, nullid, nullid)
1377 1381 node1 = f.add(b'1', None, tr, 5, nullid, nullid)
1378 1382 node2 = f.add(b'2', None, tr, 10, nullid, nullid)
1379 1383
1380 1384 self.assertEqual(len(f), 3)
1381 1385
1382 1386 with self._maketransactionfn() as tr:
1383 1387 f.strip(11, tr)
1384 1388
1385 1389 self.assertEqual(len(f), 3)
1386 1390
1387 1391 with self._maketransactionfn() as tr:
1388 1392 f.strip(10, tr)
1389 1393
1390 1394 self.assertEqual(len(f), 2)
1391 1395
1392 1396 with self.assertRaises(error.LookupError):
1393 1397 f.rev(node2)
1394 1398
1395 1399 with self._maketransactionfn() as tr:
1396 1400 f.strip(6, tr)
1397 1401
1398 1402 self.assertEqual(len(f), 2)
1399 1403
1400 1404 with self._maketransactionfn() as tr:
1401 1405 f.strip(3, tr)
1402 1406
1403 1407 self.assertEqual(len(f), 1)
1404 1408
1405 1409 with self.assertRaises(error.LookupError):
1406 1410 f.rev(node1)
1407 1411
1408 1412
1409 1413 def makeifileindextests(makefilefn, maketransactionfn, addrawrevisionfn):
1410 1414 """Create a unittest.TestCase class suitable for testing file storage.
1411 1415
1412 1416 ``makefilefn`` is a callable which receives the test case as an
1413 1417 argument and returns an object implementing the ``ifilestorage`` interface.
1414 1418
1415 1419 ``maketransactionfn`` is a callable which receives the test case as an
1416 1420 argument and returns a transaction object.
1417 1421
1418 1422 ``addrawrevisionfn`` is a callable which receives arguments describing a
1419 1423 low-level revision to add. This callable allows the insertion of
1420 1424 potentially bad data into the store in order to facilitate testing.
1421 1425
1422 1426 Returns a type that is a ``unittest.TestCase`` that can be used for
1423 1427 testing the object implementing the file storage interface. Simply
1424 1428 assign the returned value to a module-level attribute and a test loader
1425 1429 should find and run it automatically.
1426 1430 """
1427 1431 d = {
1428 1432 '_makefilefn': makefilefn,
1429 1433 '_maketransactionfn': maketransactionfn,
1430 1434 '_addrawrevisionfn': addrawrevisionfn,
1431 1435 }
1432 1436 return type('ifileindextests', (ifileindextests,), d)
1433 1437
1434 1438
1435 1439 def makeifiledatatests(makefilefn, maketransactionfn, addrawrevisionfn):
1436 1440 d = {
1437 1441 '_makefilefn': makefilefn,
1438 1442 '_maketransactionfn': maketransactionfn,
1439 1443 '_addrawrevisionfn': addrawrevisionfn,
1440 1444 }
1441 1445 return type('ifiledatatests', (ifiledatatests,), d)
1442 1446
1443 1447
1444 1448 def makeifilemutationtests(makefilefn, maketransactionfn, addrawrevisionfn):
1445 1449 d = {
1446 1450 '_makefilefn': makefilefn,
1447 1451 '_maketransactionfn': maketransactionfn,
1448 1452 '_addrawrevisionfn': addrawrevisionfn,
1449 1453 }
1450 1454 return type('ifilemutationtests', (ifilemutationtests,), d)
@@ -1,512 +1,513 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 26 from ..utils import hashutil
27 27
28 28 _nullhash = hashutil.sha1(nullid)
29 29
30 30
31 31 def hashrevisionsha1(text, p1, p2):
32 32 """Compute the SHA-1 for revision data and its parents.
33 33
34 34 This hash combines both the current file contents and its history
35 35 in a manner that makes it easy to distinguish nodes with the same
36 36 content in the revision graph.
37 37 """
38 38 # As of now, if one of the parent node is null, p2 is null
39 39 if p2 == nullid:
40 40 # deep copy of a hash is faster than creating one
41 41 s = _nullhash.copy()
42 42 s.update(p1)
43 43 else:
44 44 # none of the parent nodes are nullid
45 45 if p1 < p2:
46 46 a = p1
47 47 b = p2
48 48 else:
49 49 a = p2
50 50 b = p1
51 51 s = hashutil.sha1(a)
52 52 s.update(b)
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56
57 57 METADATA_RE = re.compile(b'\x01\n')
58 58
59 59
60 60 def parsemeta(text):
61 61 """Parse metadata header from revision data.
62 62
63 63 Returns a 2-tuple of (metadata, offset), where both can be None if there
64 64 is no metadata.
65 65 """
66 66 # text can be buffer, so we can't use .startswith or .index
67 67 if text[:2] != b'\x01\n':
68 68 return None, None
69 69 s = METADATA_RE.search(text, 2).start()
70 70 mtext = text[2:s]
71 71 meta = {}
72 72 for l in mtext.splitlines():
73 73 k, v = l.split(b': ', 1)
74 74 meta[k] = v
75 75 return meta, s + 2
76 76
77 77
78 78 def packmeta(meta, text):
79 79 """Add metadata to fulltext to produce revision text."""
80 80 keys = sorted(meta)
81 81 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
82 82 return b'\x01\n%s\x01\n%s' % (metatext, text)
83 83
84 84
85 85 def iscensoredtext(text):
86 86 meta = parsemeta(text)[0]
87 87 return meta and b'censored' in meta
88 88
89 89
90 90 def filtermetadata(text):
91 91 """Extract just the revision data from source text.
92 92
93 93 Returns ``text`` unless it has a metadata header, in which case we return
94 94 a new buffer without hte metadata.
95 95 """
96 96 if not text.startswith(b'\x01\n'):
97 97 return text
98 98
99 99 offset = text.index(b'\x01\n', 2)
100 100 return text[offset + 2 :]
101 101
102 102
103 103 def filerevisioncopied(store, node):
104 104 """Resolve file revision copy metadata.
105 105
106 106 Returns ``False`` if the file has no copy metadata. Otherwise a
107 107 2-tuple of the source filename and node.
108 108 """
109 109 if store.parents(node)[0] != nullid:
110 110 return False
111 111
112 112 meta = parsemeta(store.revision(node))[0]
113 113
114 114 # copy and copyrev occur in pairs. In rare cases due to old bugs,
115 115 # one can occur without the other. So ensure both are present to flag
116 116 # as a copy.
117 117 if meta and b'copy' in meta and b'copyrev' in meta:
118 118 return meta[b'copy'], bin(meta[b'copyrev'])
119 119
120 120 return False
121 121
122 122
123 123 def filedataequivalent(store, node, filedata):
124 124 """Determines whether file data is equivalent to a stored node.
125 125
126 126 Returns True if the passed file data would hash to the same value
127 127 as a stored revision and False otherwise.
128 128
129 129 When a stored revision is censored, filedata must be empty to have
130 130 equivalence.
131 131
132 132 When a stored revision has copy metadata, it is ignored as part
133 133 of the compare.
134 134 """
135 135
136 136 if filedata.startswith(b'\x01\n'):
137 137 revisiontext = b'\x01\n\x01\n' + filedata
138 138 else:
139 139 revisiontext = filedata
140 140
141 141 p1, p2 = store.parents(node)
142 142
143 143 computednode = hashrevisionsha1(revisiontext, p1, p2)
144 144
145 145 if computednode == node:
146 146 return True
147 147
148 148 # Censored files compare against the empty file.
149 149 if store.iscensored(store.rev(node)):
150 150 return filedata == b''
151 151
152 152 # Renaming a file produces a different hash, even if the data
153 153 # remains unchanged. Check if that's the case.
154 154 if store.renamed(node):
155 155 return store.read(node) == filedata
156 156
157 157 return False
158 158
159 159
160 160 def iterrevs(storelen, start=0, stop=None):
161 161 """Iterate over revision numbers in a store."""
162 162 step = 1
163 163
164 164 if stop is not None:
165 165 if start > stop:
166 166 step = -1
167 167 stop += step
168 168 if stop > storelen:
169 169 stop = storelen
170 170 else:
171 171 stop = storelen
172 172
173 173 return pycompat.xrange(start, stop, step)
174 174
175 175
176 176 def fileidlookup(store, fileid, identifier):
177 177 """Resolve the file node for a value.
178 178
179 179 ``store`` is an object implementing the ``ifileindex`` interface.
180 180
181 181 ``fileid`` can be:
182 182
183 183 * A 20 or 32 byte binary node.
184 184 * An integer revision number
185 185 * A 40 or 64 byte hex node.
186 186 * A bytes that can be parsed as an integer representing a revision number.
187 187
188 188 ``identifier`` is used to populate ``error.LookupError`` with an identifier
189 189 for the store.
190 190
191 191 Raises ``error.LookupError`` on failure.
192 192 """
193 193 if isinstance(fileid, int):
194 194 try:
195 195 return store.node(fileid)
196 196 except IndexError:
197 197 raise error.LookupError(
198 198 b'%d' % fileid, identifier, _(b'no match found')
199 199 )
200 200
201 201 if len(fileid) in (20, 32):
202 202 try:
203 203 store.rev(fileid)
204 204 return fileid
205 205 except error.LookupError:
206 206 pass
207 207
208 208 if len(fileid) in (40, 64):
209 209 try:
210 210 rawnode = bin(fileid)
211 211 store.rev(rawnode)
212 212 return rawnode
213 213 except TypeError:
214 214 pass
215 215
216 216 try:
217 217 rev = int(fileid)
218 218
219 219 if b'%d' % rev != fileid:
220 220 raise ValueError
221 221
222 222 try:
223 223 return store.node(rev)
224 224 except (IndexError, TypeError):
225 225 pass
226 226 except (ValueError, OverflowError):
227 227 pass
228 228
229 229 raise error.LookupError(fileid, identifier, _(b'no match found'))
230 230
231 231
232 232 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
233 233 """Resolve information needed to strip revisions.
234 234
235 235 Finds the minimum revision number that must be stripped in order to
236 236 strip ``minlinkrev``.
237 237
238 238 Returns a 2-tuple of the minimum revision number to do that and a set
239 239 of all revision numbers that have linkrevs that would be broken
240 240 by that strip.
241 241
242 242 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
243 243 ``headrevs`` is an iterable of head revisions.
244 244 ``linkrevfn`` is a callable that receives a revision and returns a linked
245 245 revision.
246 246 ``parentrevsfn`` is a callable that receives a revision number and returns
247 247 an iterable of its parent revision numbers.
248 248 """
249 249 brokenrevs = set()
250 250 strippoint = tiprev + 1
251 251
252 252 heads = {}
253 253 futurelargelinkrevs = set()
254 254 for head in headrevs:
255 255 headlinkrev = linkrevfn(head)
256 256 heads[head] = headlinkrev
257 257 if headlinkrev >= minlinkrev:
258 258 futurelargelinkrevs.add(headlinkrev)
259 259
260 260 # This algorithm involves walking down the rev graph, starting at the
261 261 # heads. Since the revs are topologically sorted according to linkrev,
262 262 # once all head linkrevs are below the minlink, we know there are
263 263 # no more revs that could have a linkrev greater than minlink.
264 264 # So we can stop walking.
265 265 while futurelargelinkrevs:
266 266 strippoint -= 1
267 267 linkrev = heads.pop(strippoint)
268 268
269 269 if linkrev < minlinkrev:
270 270 brokenrevs.add(strippoint)
271 271 else:
272 272 futurelargelinkrevs.remove(linkrev)
273 273
274 274 for p in parentrevsfn(strippoint):
275 275 if p != nullrev:
276 276 plinkrev = linkrevfn(p)
277 277 heads[p] = plinkrev
278 278 if plinkrev >= minlinkrev:
279 279 futurelargelinkrevs.add(plinkrev)
280 280
281 281 return strippoint, brokenrevs
282 282
283 283
284 284 def emitrevisions(
285 285 store,
286 286 nodes,
287 287 nodesorder,
288 288 resultcls,
289 289 deltaparentfn=None,
290 290 candeltafn=None,
291 291 rawsizefn=None,
292 292 revdifffn=None,
293 293 flagsfn=None,
294 294 deltamode=repository.CG_DELTAMODE_STD,
295 295 revisiondata=False,
296 296 assumehaveparentrevisions=False,
297 297 ):
298 298 """Generic implementation of ifiledata.emitrevisions().
299 299
300 300 Emitting revision data is subtly complex. This function attempts to
301 301 encapsulate all the logic for doing so in a backend-agnostic way.
302 302
303 303 ``store``
304 304 Object conforming to ``ifilestorage`` interface.
305 305
306 306 ``nodes``
307 307 List of revision nodes whose data to emit.
308 308
309 309 ``resultcls``
310 310 A type implementing the ``irevisiondelta`` interface that will be
311 311 constructed and returned.
312 312
313 313 ``deltaparentfn`` (optional)
314 314 Callable receiving a revision number and returning the revision number
315 315 of a revision that the internal delta is stored against. This delta
316 316 will be preferred over computing a new arbitrary delta.
317 317
318 318 If not defined, a delta will always be computed from raw revision
319 319 data.
320 320
321 321 ``candeltafn`` (optional)
322 322 Callable receiving a pair of revision numbers that returns a bool
323 323 indicating whether a delta between them can be produced.
324 324
325 325 If not defined, it is assumed that any two revisions can delta with
326 326 each other.
327 327
328 328 ``rawsizefn`` (optional)
329 329 Callable receiving a revision number and returning the length of the
330 330 ``store.rawdata(rev)``.
331 331
332 332 If not defined, ``len(store.rawdata(rev))`` will be called.
333 333
334 334 ``revdifffn`` (optional)
335 335 Callable receiving a pair of revision numbers that returns a delta
336 336 between them.
337 337
338 338 If not defined, a delta will be computed by invoking mdiff code
339 339 on ``store.revision()`` results.
340 340
341 341 Defining this function allows a precomputed or stored delta to be
342 342 used without having to compute on.
343 343
344 344 ``flagsfn`` (optional)
345 345 Callable receiving a revision number and returns the integer flags
346 346 value for it. If not defined, flags value will be 0.
347 347
348 348 ``deltamode``
349 349 constaint on delta to be sent:
350 350 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
351 351 * CG_DELTAMODE_PREV - only delta against "prev",
352 352 * CG_DELTAMODE_FULL - only issue full snapshot.
353 353
354 354 Whether to send fulltext revisions instead of deltas, if allowed.
355 355
356 356 ``nodesorder``
357 357 ``revisiondata``
358 358 ``assumehaveparentrevisions``
359 359 """
360 360
361 361 fnode = store.node
362 362 frev = store.rev
363 363
364 364 if nodesorder == b'nodes':
365 365 revs = [frev(n) for n in nodes]
366 366 elif nodesorder == b'linear':
367 367 revs = {frev(n) for n in nodes}
368 368 revs = dagop.linearize(revs, store.parentrevs)
369 369 else: # storage and default
370 370 revs = sorted(frev(n) for n in nodes)
371 371
372 372 prevrev = None
373 373
374 374 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
375 375 prevrev = store.parentrevs(revs[0])[0]
376 376
377 377 # Set of revs available to delta against.
378 378 available = set()
379 379
380 380 for rev in revs:
381 381 if rev == nullrev:
382 382 continue
383 383
384 384 node = fnode(rev)
385 385 p1rev, p2rev = store.parentrevs(rev)
386 386
387 387 if deltaparentfn:
388 388 deltaparentrev = deltaparentfn(rev)
389 389 else:
390 390 deltaparentrev = nullrev
391 391
392 392 # Forced delta against previous mode.
393 393 if deltamode == repository.CG_DELTAMODE_PREV:
394 394 baserev = prevrev
395 395
396 396 # We're instructed to send fulltext. Honor that.
397 397 elif deltamode == repository.CG_DELTAMODE_FULL:
398 398 baserev = nullrev
399 399 # We're instructed to use p1. Honor that
400 400 elif deltamode == repository.CG_DELTAMODE_P1:
401 401 baserev = p1rev
402 402
403 403 # There is a delta in storage. We try to use that because it
404 404 # amounts to effectively copying data from storage and is
405 405 # therefore the fastest.
406 406 elif deltaparentrev != nullrev:
407 407 # Base revision was already emitted in this group. We can
408 408 # always safely use the delta.
409 409 if deltaparentrev in available:
410 410 baserev = deltaparentrev
411 411
412 412 # Base revision is a parent that hasn't been emitted already.
413 413 # Use it if we can assume the receiver has the parent revision.
414 414 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
415 415 baserev = deltaparentrev
416 416
417 417 # No guarantee the receiver has the delta parent. Send delta
418 418 # against last revision (if possible), which in the common case
419 419 # should be similar enough to this revision that the delta is
420 420 # reasonable.
421 421 elif prevrev is not None:
422 422 baserev = prevrev
423 423 else:
424 424 baserev = nullrev
425 425
426 426 # Storage has a fulltext revision.
427 427
428 428 # Let's use the previous revision, which is as good a guess as any.
429 429 # There is definitely room to improve this logic.
430 430 elif prevrev is not None:
431 431 baserev = prevrev
432 432 else:
433 433 baserev = nullrev
434 434
435 435 # But we can't actually use our chosen delta base for whatever
436 436 # reason. Reset to fulltext.
437 437 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
438 438 baserev = nullrev
439 439
440 440 revision = None
441 441 delta = None
442 442 baserevisionsize = None
443 443
444 444 if revisiondata:
445 445 if store.iscensored(baserev) or store.iscensored(rev):
446 446 try:
447 447 revision = store.rawdata(node)
448 448 except error.CensoredNodeError as e:
449 449 revision = e.tombstone
450 450
451 451 if baserev != nullrev:
452 452 if rawsizefn:
453 453 baserevisionsize = rawsizefn(baserev)
454 454 else:
455 455 baserevisionsize = len(store.rawdata(baserev))
456 456
457 457 elif (
458 458 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
459 459 ):
460 460 revision = store.rawdata(node)
461 461 available.add(rev)
462 462 else:
463 463 if revdifffn:
464 464 delta = revdifffn(baserev, rev)
465 465 else:
466 466 delta = mdiff.textdiff(
467 467 store.rawdata(baserev), store.rawdata(rev)
468 468 )
469 469
470 470 available.add(rev)
471 471
472 472 yield resultcls(
473 473 node=node,
474 474 p1node=fnode(p1rev),
475 475 p2node=fnode(p2rev),
476 476 basenode=fnode(baserev),
477 477 flags=flagsfn(rev) if flagsfn else 0,
478 478 baserevisionsize=baserevisionsize,
479 479 revision=revision,
480 480 delta=delta,
481 sidedata=sidedata,
481 482 )
482 483
483 484 prevrev = rev
484 485
485 486
486 487 def deltaiscensored(delta, baserev, baselenfn):
487 488 """Determine if a delta represents censored revision data.
488 489
489 490 ``baserev`` is the base revision this delta is encoded against.
490 491 ``baselenfn`` is a callable receiving a revision number that resolves the
491 492 length of the revision fulltext.
492 493
493 494 Returns a bool indicating if the result of the delta represents a censored
494 495 revision.
495 496 """
496 497 # Fragile heuristic: unless new file meta keys are added alphabetically
497 498 # preceding "censored", all censored revisions are prefixed by
498 499 # "\1\ncensored:". A delta producing such a censored revision must be a
499 500 # full-replacement delta, so we inspect the first and only patch in the
500 501 # delta for this prefix.
501 502 hlen = struct.calcsize(b">lll")
502 503 if len(delta) <= hlen:
503 504 return False
504 505
505 506 oldlen = baselenfn(baserev)
506 507 newlen = len(delta) - hlen
507 508 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
508 509 return False
509 510
510 511 add = b"\1\ncensored:"
511 512 addlen = len(add)
512 513 return newlen >= addlen and delta[hlen : hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now