##// END OF EJS Templates
changegroupv4: add sidedata helpers...
Raphaël Gomès -
r47449:45f0d529 default
parent child Browse files
Show More
@@ -1,1315 +1,1317 b''
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 48 import sqlite3
49 49 import struct
50 50 import threading
51 51 import zlib
52 52
53 53 from mercurial.i18n import _
54 54 from mercurial.node import (
55 55 nullid,
56 56 nullrev,
57 57 short,
58 58 )
59 59 from mercurial.thirdparty import attr
60 60 from mercurial import (
61 61 ancestor,
62 62 dagop,
63 63 encoding,
64 64 error,
65 65 extensions,
66 66 localrepo,
67 67 mdiff,
68 68 pycompat,
69 69 registrar,
70 70 requirements,
71 71 util,
72 72 verify,
73 73 )
74 74 from mercurial.interfaces import (
75 75 repository,
76 76 util as interfaceutil,
77 77 )
78 78 from mercurial.utils import (
79 79 hashutil,
80 80 storageutil,
81 81 )
82 82
83 83 try:
84 84 from mercurial import zstd
85 85
86 86 zstd.__version__
87 87 except ImportError:
88 88 zstd = None
89 89
90 90 configtable = {}
91 91 configitem = registrar.configitem(configtable)
92 92
93 93 # experimental config: storage.sqlite.compression
94 94 configitem(
95 95 b'storage',
96 96 b'sqlite.compression',
97 97 default=b'zstd' if zstd else b'zlib',
98 98 experimental=True,
99 99 )
100 100
101 101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
102 102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
103 103 # be specifying the version(s) of Mercurial they are tested with, or
104 104 # leave the attribute unspecified.
105 105 testedwith = b'ships-with-hg-core'
106 106
107 107 REQUIREMENT = b'exp-sqlite-001'
108 108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
109 109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
110 110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
111 111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
112 112
113 113 CURRENT_SCHEMA_VERSION = 1
114 114
115 115 COMPRESSION_NONE = 1
116 116 COMPRESSION_ZSTD = 2
117 117 COMPRESSION_ZLIB = 3
118 118
119 119 FLAG_CENSORED = 1
120 120 FLAG_MISSING_P1 = 2
121 121 FLAG_MISSING_P2 = 4
122 122
123 123 CREATE_SCHEMA = [
124 124 # Deltas are stored as content-indexed blobs.
125 125 # compression column holds COMPRESSION_* constant for how the
126 126 # delta is encoded.
127 127 'CREATE TABLE delta ('
128 128 ' id INTEGER PRIMARY KEY, '
129 129 ' compression INTEGER NOT NULL, '
130 130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
131 131 ' delta BLOB NOT NULL '
132 132 ')',
133 133 # Tracked paths are denormalized to integers to avoid redundant
134 134 # storage of the path name.
135 135 'CREATE TABLE filepath ('
136 136 ' id INTEGER PRIMARY KEY, '
137 137 ' path BLOB NOT NULL '
138 138 ')',
139 139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
140 140 # We have a single table for all file revision data.
141 141 # Each file revision is uniquely described by a (path, rev) and
142 142 # (path, node).
143 143 #
144 144 # Revision data is stored as a pointer to the delta producing this
145 145 # revision and the file revision whose delta should be applied before
146 146 # that one. One can reconstruct the delta chain by recursively following
147 147 # the delta base revision pointers until one encounters NULL.
148 148 #
149 149 # flags column holds bitwise integer flags controlling storage options.
150 150 # These flags are defined by the FLAG_* constants.
151 151 'CREATE TABLE fileindex ('
152 152 ' id INTEGER PRIMARY KEY, '
153 153 ' pathid INTEGER REFERENCES filepath(id), '
154 154 ' revnum INTEGER NOT NULL, '
155 155 ' p1rev INTEGER NOT NULL, '
156 156 ' p2rev INTEGER NOT NULL, '
157 157 ' linkrev INTEGER NOT NULL, '
158 158 ' flags INTEGER NOT NULL, '
159 159 ' deltaid INTEGER REFERENCES delta(id), '
160 160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
161 161 ' node BLOB NOT NULL '
162 162 ')',
163 163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
164 164 ' ON fileindex (pathid, revnum)',
165 165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
166 166 # Provide a view over all file data for convenience.
167 167 'CREATE VIEW filedata AS '
168 168 'SELECT '
169 169 ' fileindex.id AS id, '
170 170 ' filepath.id AS pathid, '
171 171 ' filepath.path AS path, '
172 172 ' fileindex.revnum AS revnum, '
173 173 ' fileindex.node AS node, '
174 174 ' fileindex.p1rev AS p1rev, '
175 175 ' fileindex.p2rev AS p2rev, '
176 176 ' fileindex.linkrev AS linkrev, '
177 177 ' fileindex.flags AS flags, '
178 178 ' fileindex.deltaid AS deltaid, '
179 179 ' fileindex.deltabaseid AS deltabaseid '
180 180 'FROM filepath, fileindex '
181 181 'WHERE fileindex.pathid=filepath.id',
182 182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
183 183 ]
184 184
185 185
186 186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
187 187 """Resolve a delta chain for a file node."""
188 188
189 189 # TODO the "not in ({stops})" here is possibly slowing down the query
190 190 # because it needs to perform the lookup on every recursive invocation.
191 191 # This could possibly be faster if we created a temporary query with
192 192 # baseid "poisoned" to null and limited the recursive filter to
193 193 # "is not null".
194 194 res = db.execute(
195 195 'WITH RECURSIVE '
196 196 ' deltachain(deltaid, baseid) AS ('
197 197 ' SELECT deltaid, deltabaseid FROM fileindex '
198 198 ' WHERE pathid=? AND node=? '
199 199 ' UNION ALL '
200 200 ' SELECT fileindex.deltaid, deltabaseid '
201 201 ' FROM fileindex, deltachain '
202 202 ' WHERE '
203 203 ' fileindex.id=deltachain.baseid '
204 204 ' AND deltachain.baseid IS NOT NULL '
205 205 ' AND fileindex.id NOT IN ({stops}) '
206 206 ' ) '
207 207 'SELECT deltachain.baseid, compression, delta '
208 208 'FROM deltachain, delta '
209 209 'WHERE delta.id=deltachain.deltaid'.format(
210 210 stops=','.join(['?'] * len(stoprids))
211 211 ),
212 212 tuple([pathid, node] + list(stoprids.keys())),
213 213 )
214 214
215 215 deltas = []
216 216 lastdeltabaseid = None
217 217
218 218 for deltabaseid, compression, delta in res:
219 219 lastdeltabaseid = deltabaseid
220 220
221 221 if compression == COMPRESSION_ZSTD:
222 222 delta = zstddctx.decompress(delta)
223 223 elif compression == COMPRESSION_NONE:
224 224 delta = delta
225 225 elif compression == COMPRESSION_ZLIB:
226 226 delta = zlib.decompress(delta)
227 227 else:
228 228 raise SQLiteStoreError(
229 229 b'unhandled compression type: %d' % compression
230 230 )
231 231
232 232 deltas.append(delta)
233 233
234 234 if lastdeltabaseid in stoprids:
235 235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 236 else:
237 237 basetext = deltas.pop()
238 238
239 239 deltas.reverse()
240 240 fulltext = mdiff.patches(basetext, deltas)
241 241
242 242 # SQLite returns buffer instances for blob columns on Python 2. This
243 243 # type can propagate through the delta application layer. Because
244 244 # downstream callers assume revisions are bytes, cast as needed.
245 245 if not isinstance(fulltext, bytes):
246 246 fulltext = bytes(delta)
247 247
248 248 return fulltext
249 249
250 250
251 251 def insertdelta(db, compression, hash, delta):
252 252 try:
253 253 return db.execute(
254 254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
255 255 (compression, hash, delta),
256 256 ).lastrowid
257 257 except sqlite3.IntegrityError:
258 258 return db.execute(
259 259 'SELECT id FROM delta WHERE hash=?', (hash,)
260 260 ).fetchone()[0]
261 261
262 262
263 263 class SQLiteStoreError(error.StorageError):
264 264 pass
265 265
266 266
267 267 @attr.s
268 268 class revisionentry(object):
269 269 rid = attr.ib()
270 270 rev = attr.ib()
271 271 node = attr.ib()
272 272 p1rev = attr.ib()
273 273 p2rev = attr.ib()
274 274 p1node = attr.ib()
275 275 p2node = attr.ib()
276 276 linkrev = attr.ib()
277 277 flags = attr.ib()
278 278
279 279
280 280 @interfaceutil.implementer(repository.irevisiondelta)
281 281 @attr.s(slots=True)
282 282 class sqliterevisiondelta(object):
283 283 node = attr.ib()
284 284 p1node = attr.ib()
285 285 p2node = attr.ib()
286 286 basenode = attr.ib()
287 287 flags = attr.ib()
288 288 baserevisionsize = attr.ib()
289 289 revision = attr.ib()
290 290 delta = attr.ib()
291 291 sidedata = attr.ib()
292 292 linknode = attr.ib(default=None)
293 293
294 294
295 295 @interfaceutil.implementer(repository.iverifyproblem)
296 296 @attr.s(frozen=True)
297 297 class sqliteproblem(object):
298 298 warning = attr.ib(default=None)
299 299 error = attr.ib(default=None)
300 300 node = attr.ib(default=None)
301 301
302 302
303 303 @interfaceutil.implementer(repository.ifilestorage)
304 304 class sqlitefilestore(object):
305 305 """Implements storage for an individual tracked path."""
306 306
307 307 def __init__(self, db, path, compression):
308 308 self._db = db
309 309 self._path = path
310 310
311 311 self._pathid = None
312 312
313 313 # revnum -> node
314 314 self._revtonode = {}
315 315 # node -> revnum
316 316 self._nodetorev = {}
317 317 # node -> data structure
318 318 self._revisions = {}
319 319
320 320 self._revisioncache = util.lrucachedict(10)
321 321
322 322 self._compengine = compression
323 323
324 324 if compression == b'zstd':
325 325 self._cctx = zstd.ZstdCompressor(level=3)
326 326 self._dctx = zstd.ZstdDecompressor()
327 327 else:
328 328 self._cctx = None
329 329 self._dctx = None
330 330
331 331 self._refreshindex()
332 332
333 333 def _refreshindex(self):
334 334 self._revtonode = {}
335 335 self._nodetorev = {}
336 336 self._revisions = {}
337 337
338 338 res = list(
339 339 self._db.execute(
340 340 'SELECT id FROM filepath WHERE path=?', (self._path,)
341 341 )
342 342 )
343 343
344 344 if not res:
345 345 self._pathid = None
346 346 return
347 347
348 348 self._pathid = res[0][0]
349 349
350 350 res = self._db.execute(
351 351 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
352 352 'FROM fileindex '
353 353 'WHERE pathid=? '
354 354 'ORDER BY revnum ASC',
355 355 (self._pathid,),
356 356 )
357 357
358 358 for i, row in enumerate(res):
359 359 rid, rev, node, p1rev, p2rev, linkrev, flags = row
360 360
361 361 if i != rev:
362 362 raise SQLiteStoreError(
363 363 _(b'sqlite database has inconsistent revision numbers')
364 364 )
365 365
366 366 if p1rev == nullrev:
367 367 p1node = nullid
368 368 else:
369 369 p1node = self._revtonode[p1rev]
370 370
371 371 if p2rev == nullrev:
372 372 p2node = nullid
373 373 else:
374 374 p2node = self._revtonode[p2rev]
375 375
376 376 entry = revisionentry(
377 377 rid=rid,
378 378 rev=rev,
379 379 node=node,
380 380 p1rev=p1rev,
381 381 p2rev=p2rev,
382 382 p1node=p1node,
383 383 p2node=p2node,
384 384 linkrev=linkrev,
385 385 flags=flags,
386 386 )
387 387
388 388 self._revtonode[rev] = node
389 389 self._nodetorev[node] = rev
390 390 self._revisions[node] = entry
391 391
392 392 # Start of ifileindex interface.
393 393
394 394 def __len__(self):
395 395 return len(self._revisions)
396 396
397 397 def __iter__(self):
398 398 return iter(pycompat.xrange(len(self._revisions)))
399 399
400 400 def hasnode(self, node):
401 401 if node == nullid:
402 402 return False
403 403
404 404 return node in self._nodetorev
405 405
406 406 def revs(self, start=0, stop=None):
407 407 return storageutil.iterrevs(
408 408 len(self._revisions), start=start, stop=stop
409 409 )
410 410
411 411 def parents(self, node):
412 412 if node == nullid:
413 413 return nullid, nullid
414 414
415 415 if node not in self._revisions:
416 416 raise error.LookupError(node, self._path, _(b'no node'))
417 417
418 418 entry = self._revisions[node]
419 419 return entry.p1node, entry.p2node
420 420
421 421 def parentrevs(self, rev):
422 422 if rev == nullrev:
423 423 return nullrev, nullrev
424 424
425 425 if rev not in self._revtonode:
426 426 raise IndexError(rev)
427 427
428 428 entry = self._revisions[self._revtonode[rev]]
429 429 return entry.p1rev, entry.p2rev
430 430
431 431 def rev(self, node):
432 432 if node == nullid:
433 433 return nullrev
434 434
435 435 if node not in self._nodetorev:
436 436 raise error.LookupError(node, self._path, _(b'no node'))
437 437
438 438 return self._nodetorev[node]
439 439
440 440 def node(self, rev):
441 441 if rev == nullrev:
442 442 return nullid
443 443
444 444 if rev not in self._revtonode:
445 445 raise IndexError(rev)
446 446
447 447 return self._revtonode[rev]
448 448
449 449 def lookup(self, node):
450 450 return storageutil.fileidlookup(self, node, self._path)
451 451
452 452 def linkrev(self, rev):
453 453 if rev == nullrev:
454 454 return nullrev
455 455
456 456 if rev not in self._revtonode:
457 457 raise IndexError(rev)
458 458
459 459 entry = self._revisions[self._revtonode[rev]]
460 460 return entry.linkrev
461 461
462 462 def iscensored(self, rev):
463 463 if rev == nullrev:
464 464 return False
465 465
466 466 if rev not in self._revtonode:
467 467 raise IndexError(rev)
468 468
469 469 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
470 470
471 471 def commonancestorsheads(self, node1, node2):
472 472 rev1 = self.rev(node1)
473 473 rev2 = self.rev(node2)
474 474
475 475 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
476 476 return pycompat.maplist(self.node, ancestors)
477 477
478 478 def descendants(self, revs):
479 479 # TODO we could implement this using a recursive SQL query, which
480 480 # might be faster.
481 481 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
482 482
483 483 def heads(self, start=None, stop=None):
484 484 if start is None and stop is None:
485 485 if not len(self):
486 486 return [nullid]
487 487
488 488 startrev = self.rev(start) if start is not None else nullrev
489 489 stoprevs = {self.rev(n) for n in stop or []}
490 490
491 491 revs = dagop.headrevssubset(
492 492 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
493 493 )
494 494
495 495 return [self.node(rev) for rev in revs]
496 496
497 497 def children(self, node):
498 498 rev = self.rev(node)
499 499
500 500 res = self._db.execute(
501 501 'SELECT'
502 502 ' node '
503 503 ' FROM filedata '
504 504 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
505 505 ' ORDER BY revnum ASC',
506 506 (self._path, rev, rev),
507 507 )
508 508
509 509 return [row[0] for row in res]
510 510
511 511 # End of ifileindex interface.
512 512
513 513 # Start of ifiledata interface.
514 514
515 515 def size(self, rev):
516 516 if rev == nullrev:
517 517 return 0
518 518
519 519 if rev not in self._revtonode:
520 520 raise IndexError(rev)
521 521
522 522 node = self._revtonode[rev]
523 523
524 524 if self.renamed(node):
525 525 return len(self.read(node))
526 526
527 527 return len(self.revision(node))
528 528
529 529 def revision(self, node, raw=False, _verifyhash=True):
530 530 if node in (nullid, nullrev):
531 531 return b''
532 532
533 533 if isinstance(node, int):
534 534 node = self.node(node)
535 535
536 536 if node not in self._nodetorev:
537 537 raise error.LookupError(node, self._path, _(b'no node'))
538 538
539 539 if node in self._revisioncache:
540 540 return self._revisioncache[node]
541 541
542 542 # Because we have a fulltext revision cache, we are able to
543 543 # short-circuit delta chain traversal and decompression as soon as
544 544 # we encounter a revision in the cache.
545 545
546 546 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
547 547
548 548 if not stoprids:
549 549 stoprids[-1] = None
550 550
551 551 fulltext = resolvedeltachain(
552 552 self._db,
553 553 self._pathid,
554 554 node,
555 555 self._revisioncache,
556 556 stoprids,
557 557 zstddctx=self._dctx,
558 558 )
559 559
560 560 # Don't verify hashes if parent nodes were rewritten, as the hash
561 561 # wouldn't verify.
562 562 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
563 563 _verifyhash = False
564 564
565 565 if _verifyhash:
566 566 self._checkhash(fulltext, node)
567 567 self._revisioncache[node] = fulltext
568 568
569 569 return fulltext
570 570
571 571 def rawdata(self, *args, **kwargs):
572 572 return self.revision(*args, **kwargs)
573 573
574 574 def read(self, node):
575 575 return storageutil.filtermetadata(self.revision(node))
576 576
577 577 def renamed(self, node):
578 578 return storageutil.filerevisioncopied(self, node)
579 579
580 580 def cmp(self, node, fulltext):
581 581 return not storageutil.filedataequivalent(self, node, fulltext)
582 582
583 583 def emitrevisions(
584 584 self,
585 585 nodes,
586 586 nodesorder=None,
587 587 revisiondata=False,
588 588 assumehaveparentrevisions=False,
589 589 deltamode=repository.CG_DELTAMODE_STD,
590 sidedata_helpers=None,
590 591 ):
591 592 if nodesorder not in (b'nodes', b'storage', b'linear', None):
592 593 raise error.ProgrammingError(
593 594 b'unhandled value for nodesorder: %s' % nodesorder
594 595 )
595 596
596 597 nodes = [n for n in nodes if n != nullid]
597 598
598 599 if not nodes:
599 600 return
600 601
601 602 # TODO perform in a single query.
602 603 res = self._db.execute(
603 604 'SELECT revnum, deltaid FROM fileindex '
604 605 'WHERE pathid=? '
605 606 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
606 607 tuple([self._pathid] + nodes),
607 608 )
608 609
609 610 deltabases = {}
610 611
611 612 for rev, deltaid in res:
612 613 res = self._db.execute(
613 614 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
614 615 (self._pathid, deltaid),
615 616 )
616 617 deltabases[rev] = res.fetchone()[0]
617 618
618 619 # TODO define revdifffn so we can use delta from storage.
619 620 for delta in storageutil.emitrevisions(
620 621 self,
621 622 nodes,
622 623 nodesorder,
623 624 sqliterevisiondelta,
624 625 deltaparentfn=deltabases.__getitem__,
625 626 revisiondata=revisiondata,
626 627 assumehaveparentrevisions=assumehaveparentrevisions,
627 628 deltamode=deltamode,
629 sidedata_helpers=sidedata_helpers,
628 630 ):
629 631
630 632 yield delta
631 633
632 634 # End of ifiledata interface.
633 635
634 636 # Start of ifilemutation interface.
635 637
636 638 def add(self, filedata, meta, transaction, linkrev, p1, p2):
637 639 if meta or filedata.startswith(b'\x01\n'):
638 640 filedata = storageutil.packmeta(meta, filedata)
639 641
640 642 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
641 643 return self.node(rev)
642 644
643 645 def addrevision(
644 646 self,
645 647 revisiondata,
646 648 transaction,
647 649 linkrev,
648 650 p1,
649 651 p2,
650 652 node=None,
651 653 flags=0,
652 654 cachedelta=None,
653 655 ):
654 656 if flags:
655 657 raise SQLiteStoreError(_(b'flags not supported on revisions'))
656 658
657 659 validatehash = node is not None
658 660 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
659 661
660 662 if validatehash:
661 663 self._checkhash(revisiondata, node, p1, p2)
662 664
663 665 rev = self._nodetorev.get(node)
664 666 if rev is not None:
665 667 return rev
666 668
667 669 rev = self._addrawrevision(
668 670 node, revisiondata, transaction, linkrev, p1, p2
669 671 )
670 672
671 673 self._revisioncache[node] = revisiondata
672 674 return rev
673 675
674 676 def addgroup(
675 677 self,
676 678 deltas,
677 679 linkmapper,
678 680 transaction,
679 681 addrevisioncb=None,
680 682 duplicaterevisioncb=None,
681 683 maybemissingparents=False,
682 684 ):
683 685 empty = True
684 686
685 687 for (
686 688 node,
687 689 p1,
688 690 p2,
689 691 linknode,
690 692 deltabase,
691 693 delta,
692 694 wireflags,
693 695 sidedata,
694 696 ) in deltas:
695 697 storeflags = 0
696 698
697 699 if wireflags & repository.REVISION_FLAG_CENSORED:
698 700 storeflags |= FLAG_CENSORED
699 701
700 702 if wireflags & ~repository.REVISION_FLAG_CENSORED:
701 703 raise SQLiteStoreError(b'unhandled revision flag')
702 704
703 705 if maybemissingparents:
704 706 if p1 != nullid and not self.hasnode(p1):
705 707 p1 = nullid
706 708 storeflags |= FLAG_MISSING_P1
707 709
708 710 if p2 != nullid and not self.hasnode(p2):
709 711 p2 = nullid
710 712 storeflags |= FLAG_MISSING_P2
711 713
712 714 baserev = self.rev(deltabase)
713 715
714 716 # If base is censored, delta must be full replacement in a single
715 717 # patch operation.
716 718 if baserev != nullrev and self.iscensored(baserev):
717 719 hlen = struct.calcsize(b'>lll')
718 720 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
719 721 newlen = len(delta) - hlen
720 722
721 723 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
722 724 raise error.CensoredBaseError(self._path, deltabase)
723 725
724 726 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
725 727 delta, baserev, lambda x: len(self.rawdata(x))
726 728 ):
727 729 storeflags |= FLAG_CENSORED
728 730
729 731 linkrev = linkmapper(linknode)
730 732
731 733 if node in self._revisions:
732 734 # Possibly reset parents to make them proper.
733 735 entry = self._revisions[node]
734 736
735 737 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
736 738 entry.p1node = p1
737 739 entry.p1rev = self._nodetorev[p1]
738 740 entry.flags &= ~FLAG_MISSING_P1
739 741
740 742 self._db.execute(
741 743 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
742 744 (self._nodetorev[p1], entry.flags, entry.rid),
743 745 )
744 746
745 747 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
746 748 entry.p2node = p2
747 749 entry.p2rev = self._nodetorev[p2]
748 750 entry.flags &= ~FLAG_MISSING_P2
749 751
750 752 self._db.execute(
751 753 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
752 754 (self._nodetorev[p1], entry.flags, entry.rid),
753 755 )
754 756
755 757 if duplicaterevisioncb:
756 758 duplicaterevisioncb(self, self.rev(node))
757 759 empty = False
758 760 continue
759 761
760 762 if deltabase == nullid:
761 763 text = mdiff.patch(b'', delta)
762 764 storedelta = None
763 765 else:
764 766 text = None
765 767 storedelta = (deltabase, delta)
766 768
767 769 rev = self._addrawrevision(
768 770 node,
769 771 text,
770 772 transaction,
771 773 linkrev,
772 774 p1,
773 775 p2,
774 776 storedelta=storedelta,
775 777 flags=storeflags,
776 778 )
777 779
778 780 if addrevisioncb:
779 781 addrevisioncb(self, rev)
780 782 empty = False
781 783
782 784 return not empty
783 785
784 786 def censorrevision(self, tr, censornode, tombstone=b''):
785 787 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
786 788
787 789 # This restriction is cargo culted from revlogs and makes no sense for
788 790 # SQLite, since columns can be resized at will.
789 791 if len(tombstone) > len(self.rawdata(censornode)):
790 792 raise error.Abort(
791 793 _(b'censor tombstone must be no longer than censored data')
792 794 )
793 795
794 796 # We need to replace the censored revision's data with the tombstone.
795 797 # But replacing that data will have implications for delta chains that
796 798 # reference it.
797 799 #
798 800 # While "better," more complex strategies are possible, we do something
799 801 # simple: we find delta chain children of the censored revision and we
800 802 # replace those incremental deltas with fulltexts of their corresponding
801 803 # revision. Then we delete the now-unreferenced delta and original
802 804 # revision and insert a replacement.
803 805
804 806 # Find the delta to be censored.
805 807 censoreddeltaid = self._db.execute(
806 808 'SELECT deltaid FROM fileindex WHERE id=?',
807 809 (self._revisions[censornode].rid,),
808 810 ).fetchone()[0]
809 811
810 812 # Find all its delta chain children.
811 813 # TODO once we support storing deltas for !files, we'll need to look
812 814 # for those delta chains too.
813 815 rows = list(
814 816 self._db.execute(
815 817 'SELECT id, pathid, node FROM fileindex '
816 818 'WHERE deltabaseid=? OR deltaid=?',
817 819 (censoreddeltaid, censoreddeltaid),
818 820 )
819 821 )
820 822
821 823 for row in rows:
822 824 rid, pathid, node = row
823 825
824 826 fulltext = resolvedeltachain(
825 827 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
826 828 )
827 829
828 830 deltahash = hashutil.sha1(fulltext).digest()
829 831
830 832 if self._compengine == b'zstd':
831 833 deltablob = self._cctx.compress(fulltext)
832 834 compression = COMPRESSION_ZSTD
833 835 elif self._compengine == b'zlib':
834 836 deltablob = zlib.compress(fulltext)
835 837 compression = COMPRESSION_ZLIB
836 838 elif self._compengine == b'none':
837 839 deltablob = fulltext
838 840 compression = COMPRESSION_NONE
839 841 else:
840 842 raise error.ProgrammingError(
841 843 b'unhandled compression engine: %s' % self._compengine
842 844 )
843 845
844 846 if len(deltablob) >= len(fulltext):
845 847 deltablob = fulltext
846 848 compression = COMPRESSION_NONE
847 849
848 850 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
849 851
850 852 self._db.execute(
851 853 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
852 854 'WHERE id=?',
853 855 (deltaid, rid),
854 856 )
855 857
856 858 # Now create the tombstone delta and replace the delta on the censored
857 859 # node.
858 860 deltahash = hashutil.sha1(tombstone).digest()
859 861 tombstonedeltaid = insertdelta(
860 862 self._db, COMPRESSION_NONE, deltahash, tombstone
861 863 )
862 864
863 865 flags = self._revisions[censornode].flags
864 866 flags |= FLAG_CENSORED
865 867
866 868 self._db.execute(
867 869 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
868 870 'WHERE pathid=? AND node=?',
869 871 (flags, tombstonedeltaid, self._pathid, censornode),
870 872 )
871 873
872 874 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
873 875
874 876 self._refreshindex()
875 877 self._revisioncache.clear()
876 878
877 879 def getstrippoint(self, minlink):
878 880 return storageutil.resolvestripinfo(
879 881 minlink,
880 882 len(self) - 1,
881 883 [self.rev(n) for n in self.heads()],
882 884 self.linkrev,
883 885 self.parentrevs,
884 886 )
885 887
886 888 def strip(self, minlink, transaction):
887 889 if not len(self):
888 890 return
889 891
890 892 rev, _ignored = self.getstrippoint(minlink)
891 893
892 894 if rev == len(self):
893 895 return
894 896
895 897 for rev in self.revs(rev):
896 898 self._db.execute(
897 899 'DELETE FROM fileindex WHERE pathid=? AND node=?',
898 900 (self._pathid, self.node(rev)),
899 901 )
900 902
901 903 # TODO how should we garbage collect data in delta table?
902 904
903 905 self._refreshindex()
904 906
905 907 # End of ifilemutation interface.
906 908
907 909 # Start of ifilestorage interface.
908 910
909 911 def files(self):
910 912 return []
911 913
912 914 def sidedata(self, nodeorrev, _df=None):
913 915 # Not supported for now
914 916 return {}
915 917
916 918 def storageinfo(
917 919 self,
918 920 exclusivefiles=False,
919 921 sharedfiles=False,
920 922 revisionscount=False,
921 923 trackedsize=False,
922 924 storedsize=False,
923 925 ):
924 926 d = {}
925 927
926 928 if exclusivefiles:
927 929 d[b'exclusivefiles'] = []
928 930
929 931 if sharedfiles:
930 932 # TODO list sqlite file(s) here.
931 933 d[b'sharedfiles'] = []
932 934
933 935 if revisionscount:
934 936 d[b'revisionscount'] = len(self)
935 937
936 938 if trackedsize:
937 939 d[b'trackedsize'] = sum(
938 940 len(self.revision(node)) for node in self._nodetorev
939 941 )
940 942
941 943 if storedsize:
942 944 # TODO implement this?
943 945 d[b'storedsize'] = None
944 946
945 947 return d
946 948
947 949 def verifyintegrity(self, state):
948 950 state[b'skipread'] = set()
949 951
950 952 for rev in self:
951 953 node = self.node(rev)
952 954
953 955 try:
954 956 self.revision(node)
955 957 except Exception as e:
956 958 yield sqliteproblem(
957 959 error=_(b'unpacking %s: %s') % (short(node), e), node=node
958 960 )
959 961
960 962 state[b'skipread'].add(node)
961 963
962 964 # End of ifilestorage interface.
963 965
964 966 def _checkhash(self, fulltext, node, p1=None, p2=None):
965 967 if p1 is None and p2 is None:
966 968 p1, p2 = self.parents(node)
967 969
968 970 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
969 971 return
970 972
971 973 try:
972 974 del self._revisioncache[node]
973 975 except KeyError:
974 976 pass
975 977
976 978 if storageutil.iscensoredtext(fulltext):
977 979 raise error.CensoredNodeError(self._path, node, fulltext)
978 980
979 981 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
980 982
981 983 def _addrawrevision(
982 984 self,
983 985 node,
984 986 revisiondata,
985 987 transaction,
986 988 linkrev,
987 989 p1,
988 990 p2,
989 991 storedelta=None,
990 992 flags=0,
991 993 ):
992 994 if self._pathid is None:
993 995 res = self._db.execute(
994 996 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
995 997 )
996 998 self._pathid = res.lastrowid
997 999
998 1000 # For simplicity, always store a delta against p1.
999 1001 # TODO we need a lot more logic here to make behavior reasonable.
1000 1002
1001 1003 if storedelta:
1002 1004 deltabase, delta = storedelta
1003 1005
1004 1006 if isinstance(deltabase, int):
1005 1007 deltabase = self.node(deltabase)
1006 1008
1007 1009 else:
1008 1010 assert revisiondata is not None
1009 1011 deltabase = p1
1010 1012
1011 1013 if deltabase == nullid:
1012 1014 delta = revisiondata
1013 1015 else:
1014 1016 delta = mdiff.textdiff(
1015 1017 self.revision(self.rev(deltabase)), revisiondata
1016 1018 )
1017 1019
1018 1020 # File index stores a pointer to its delta and the parent delta.
1019 1021 # The parent delta is stored via a pointer to the fileindex PK.
1020 1022 if deltabase == nullid:
1021 1023 baseid = None
1022 1024 else:
1023 1025 baseid = self._revisions[deltabase].rid
1024 1026
1025 1027 # Deltas are stored with a hash of their content. This allows
1026 1028 # us to de-duplicate. The table is configured to ignore conflicts
1027 1029 # and it is faster to just insert and silently noop than to look
1028 1030 # first.
1029 1031 deltahash = hashutil.sha1(delta).digest()
1030 1032
1031 1033 if self._compengine == b'zstd':
1032 1034 deltablob = self._cctx.compress(delta)
1033 1035 compression = COMPRESSION_ZSTD
1034 1036 elif self._compengine == b'zlib':
1035 1037 deltablob = zlib.compress(delta)
1036 1038 compression = COMPRESSION_ZLIB
1037 1039 elif self._compengine == b'none':
1038 1040 deltablob = delta
1039 1041 compression = COMPRESSION_NONE
1040 1042 else:
1041 1043 raise error.ProgrammingError(
1042 1044 b'unhandled compression engine: %s' % self._compengine
1043 1045 )
1044 1046
1045 1047 # Don't store compressed data if it isn't practical.
1046 1048 if len(deltablob) >= len(delta):
1047 1049 deltablob = delta
1048 1050 compression = COMPRESSION_NONE
1049 1051
1050 1052 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1051 1053
1052 1054 rev = len(self)
1053 1055
1054 1056 if p1 == nullid:
1055 1057 p1rev = nullrev
1056 1058 else:
1057 1059 p1rev = self._nodetorev[p1]
1058 1060
1059 1061 if p2 == nullid:
1060 1062 p2rev = nullrev
1061 1063 else:
1062 1064 p2rev = self._nodetorev[p2]
1063 1065
1064 1066 rid = self._db.execute(
1065 1067 'INSERT INTO fileindex ('
1066 1068 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1067 1069 ' deltaid, deltabaseid) '
1068 1070 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1069 1071 (
1070 1072 self._pathid,
1071 1073 rev,
1072 1074 node,
1073 1075 p1rev,
1074 1076 p2rev,
1075 1077 linkrev,
1076 1078 flags,
1077 1079 deltaid,
1078 1080 baseid,
1079 1081 ),
1080 1082 ).lastrowid
1081 1083
1082 1084 entry = revisionentry(
1083 1085 rid=rid,
1084 1086 rev=rev,
1085 1087 node=node,
1086 1088 p1rev=p1rev,
1087 1089 p2rev=p2rev,
1088 1090 p1node=p1,
1089 1091 p2node=p2,
1090 1092 linkrev=linkrev,
1091 1093 flags=flags,
1092 1094 )
1093 1095
1094 1096 self._nodetorev[node] = rev
1095 1097 self._revtonode[rev] = node
1096 1098 self._revisions[node] = entry
1097 1099
1098 1100 return rev
1099 1101
1100 1102
1101 1103 class sqliterepository(localrepo.localrepository):
1102 1104 def cancopy(self):
1103 1105 return False
1104 1106
1105 1107 def transaction(self, *args, **kwargs):
1106 1108 current = self.currenttransaction()
1107 1109
1108 1110 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1109 1111
1110 1112 if current:
1111 1113 return tr
1112 1114
1113 1115 self._dbconn.execute('BEGIN TRANSACTION')
1114 1116
1115 1117 def committransaction(_):
1116 1118 self._dbconn.commit()
1117 1119
1118 1120 tr.addfinalize(b'sqlitestore', committransaction)
1119 1121
1120 1122 return tr
1121 1123
1122 1124 @property
1123 1125 def _dbconn(self):
1124 1126 # SQLite connections can only be used on the thread that created
1125 1127 # them. In most cases, this "just works." However, hgweb uses
1126 1128 # multiple threads.
1127 1129 tid = threading.current_thread().ident
1128 1130
1129 1131 if self._db:
1130 1132 if self._db[0] == tid:
1131 1133 return self._db[1]
1132 1134
1133 1135 db = makedb(self.svfs.join(b'db.sqlite'))
1134 1136 self._db = (tid, db)
1135 1137
1136 1138 return db
1137 1139
1138 1140
1139 1141 def makedb(path):
1140 1142 """Construct a database handle for a database at path."""
1141 1143
1142 1144 db = sqlite3.connect(encoding.strfromlocal(path))
1143 1145 db.text_factory = bytes
1144 1146
1145 1147 res = db.execute('PRAGMA user_version').fetchone()[0]
1146 1148
1147 1149 # New database.
1148 1150 if res == 0:
1149 1151 for statement in CREATE_SCHEMA:
1150 1152 db.execute(statement)
1151 1153
1152 1154 db.commit()
1153 1155
1154 1156 elif res == CURRENT_SCHEMA_VERSION:
1155 1157 pass
1156 1158
1157 1159 else:
1158 1160 raise error.Abort(_(b'sqlite database has unrecognized version'))
1159 1161
1160 1162 db.execute('PRAGMA journal_mode=WAL')
1161 1163
1162 1164 return db
1163 1165
1164 1166
1165 1167 def featuresetup(ui, supported):
1166 1168 supported.add(REQUIREMENT)
1167 1169
1168 1170 if zstd:
1169 1171 supported.add(REQUIREMENT_ZSTD)
1170 1172
1171 1173 supported.add(REQUIREMENT_ZLIB)
1172 1174 supported.add(REQUIREMENT_NONE)
1173 1175 supported.add(REQUIREMENT_SHALLOW_FILES)
1174 1176 supported.add(requirements.NARROW_REQUIREMENT)
1175 1177
1176 1178
1177 1179 def newreporequirements(orig, ui, createopts):
1178 1180 if createopts[b'backend'] != b'sqlite':
1179 1181 return orig(ui, createopts)
1180 1182
1181 1183 # This restriction can be lifted once we have more confidence.
1182 1184 if b'sharedrepo' in createopts:
1183 1185 raise error.Abort(
1184 1186 _(b'shared repositories not supported with SQLite store')
1185 1187 )
1186 1188
1187 1189 # This filtering is out of an abundance of caution: we want to ensure
1188 1190 # we honor creation options and we do that by annotating exactly the
1189 1191 # creation options we recognize.
1190 1192 known = {
1191 1193 b'narrowfiles',
1192 1194 b'backend',
1193 1195 b'shallowfilestore',
1194 1196 }
1195 1197
1196 1198 unsupported = set(createopts) - known
1197 1199 if unsupported:
1198 1200 raise error.Abort(
1199 1201 _(b'SQLite store does not support repo creation option: %s')
1200 1202 % b', '.join(sorted(unsupported))
1201 1203 )
1202 1204
1203 1205 # Since we're a hybrid store that still relies on revlogs, we fall back
1204 1206 # to using the revlogv1 backend's storage requirements then adding our
1205 1207 # own requirement.
1206 1208 createopts[b'backend'] = b'revlogv1'
1207 1209 requirements = orig(ui, createopts)
1208 1210 requirements.add(REQUIREMENT)
1209 1211
1210 1212 compression = ui.config(b'storage', b'sqlite.compression')
1211 1213
1212 1214 if compression == b'zstd' and not zstd:
1213 1215 raise error.Abort(
1214 1216 _(
1215 1217 b'storage.sqlite.compression set to "zstd" but '
1216 1218 b'zstandard compression not available to this '
1217 1219 b'Mercurial install'
1218 1220 )
1219 1221 )
1220 1222
1221 1223 if compression == b'zstd':
1222 1224 requirements.add(REQUIREMENT_ZSTD)
1223 1225 elif compression == b'zlib':
1224 1226 requirements.add(REQUIREMENT_ZLIB)
1225 1227 elif compression == b'none':
1226 1228 requirements.add(REQUIREMENT_NONE)
1227 1229 else:
1228 1230 raise error.Abort(
1229 1231 _(
1230 1232 b'unknown compression engine defined in '
1231 1233 b'storage.sqlite.compression: %s'
1232 1234 )
1233 1235 % compression
1234 1236 )
1235 1237
1236 1238 if createopts.get(b'shallowfilestore'):
1237 1239 requirements.add(REQUIREMENT_SHALLOW_FILES)
1238 1240
1239 1241 return requirements
1240 1242
1241 1243
1242 1244 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1243 1245 class sqlitefilestorage(object):
1244 1246 """Repository file storage backed by SQLite."""
1245 1247
1246 1248 def file(self, path):
1247 1249 if path[0] == b'/':
1248 1250 path = path[1:]
1249 1251
1250 1252 if REQUIREMENT_ZSTD in self.requirements:
1251 1253 compression = b'zstd'
1252 1254 elif REQUIREMENT_ZLIB in self.requirements:
1253 1255 compression = b'zlib'
1254 1256 elif REQUIREMENT_NONE in self.requirements:
1255 1257 compression = b'none'
1256 1258 else:
1257 1259 raise error.Abort(
1258 1260 _(
1259 1261 b'unable to determine what compression engine '
1260 1262 b'to use for SQLite storage'
1261 1263 )
1262 1264 )
1263 1265
1264 1266 return sqlitefilestore(self._dbconn, path, compression)
1265 1267
1266 1268
1267 1269 def makefilestorage(orig, requirements, features, **kwargs):
1268 1270 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1269 1271 if REQUIREMENT in requirements:
1270 1272 if REQUIREMENT_SHALLOW_FILES in requirements:
1271 1273 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1272 1274
1273 1275 return sqlitefilestorage
1274 1276 else:
1275 1277 return orig(requirements=requirements, features=features, **kwargs)
1276 1278
1277 1279
1278 1280 def makemain(orig, ui, requirements, **kwargs):
1279 1281 if REQUIREMENT in requirements:
1280 1282 if REQUIREMENT_ZSTD in requirements and not zstd:
1281 1283 raise error.Abort(
1282 1284 _(
1283 1285 b'repository uses zstandard compression, which '
1284 1286 b'is not available to this Mercurial install'
1285 1287 )
1286 1288 )
1287 1289
1288 1290 return sqliterepository
1289 1291
1290 1292 return orig(requirements=requirements, **kwargs)
1291 1293
1292 1294
1293 1295 def verifierinit(orig, self, *args, **kwargs):
1294 1296 orig(self, *args, **kwargs)
1295 1297
1296 1298 # We don't care that files in the store don't align with what is
1297 1299 # advertised. So suppress these warnings.
1298 1300 self.warnorphanstorefiles = False
1299 1301
1300 1302
1301 1303 def extsetup(ui):
1302 1304 localrepo.featuresetupfuncs.add(featuresetup)
1303 1305 extensions.wrapfunction(
1304 1306 localrepo, b'newreporequirements', newreporequirements
1305 1307 )
1306 1308 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1307 1309 extensions.wrapfunction(localrepo, b'makemain', makemain)
1308 1310 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1309 1311
1310 1312
1311 1313 def reposetup(ui, repo):
1312 1314 if isinstance(repo, sqliterepository):
1313 1315 repo._db = None
1314 1316
1315 1317 # TODO check for bundlerepository?
@@ -1,1794 +1,1861 b''
1 1 # changegroup.py - Mercurial changegroup manipulation functions
2 2 #
3 3 # Copyright 2006 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11 import struct
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullid,
18 18 nullrev,
19 19 short,
20 20 )
21 21 from .pycompat import open
22 22
23 23 from . import (
24 24 error,
25 25 match as matchmod,
26 26 mdiff,
27 27 phases,
28 28 pycompat,
29 29 requirements,
30 30 scmutil,
31 31 util,
32 32 )
33 33
34 34 from .interfaces import repository
35 35 from .revlogutils import sidedata as sidedatamod
36 36
37 37 _CHANGEGROUPV1_DELTA_HEADER = struct.Struct(b"20s20s20s20s")
38 38 _CHANGEGROUPV2_DELTA_HEADER = struct.Struct(b"20s20s20s20s20s")
39 39 _CHANGEGROUPV3_DELTA_HEADER = struct.Struct(b">20s20s20s20s20sH")
40 40
41 41 LFS_REQUIREMENT = b'lfs'
42 42
43 43 readexactly = util.readexactly
44 44
45 45
46 46 def getchunk(stream):
47 47 """return the next chunk from stream as a string"""
48 48 d = readexactly(stream, 4)
49 49 l = struct.unpack(b">l", d)[0]
50 50 if l <= 4:
51 51 if l:
52 52 raise error.Abort(_(b"invalid chunk length %d") % l)
53 53 return b""
54 54 return readexactly(stream, l - 4)
55 55
56 56
57 57 def chunkheader(length):
58 58 """return a changegroup chunk header (string)"""
59 59 return struct.pack(b">l", length + 4)
60 60
61 61
62 62 def closechunk():
63 63 """return a changegroup chunk header (string) for a zero-length chunk"""
64 64 return struct.pack(b">l", 0)
65 65
66 66
67 67 def _fileheader(path):
68 68 """Obtain a changegroup chunk header for a named path."""
69 69 return chunkheader(len(path)) + path
70 70
71 71
72 72 def writechunks(ui, chunks, filename, vfs=None):
73 73 """Write chunks to a file and return its filename.
74 74
75 75 The stream is assumed to be a bundle file.
76 76 Existing files will not be overwritten.
77 77 If no filename is specified, a temporary file is created.
78 78 """
79 79 fh = None
80 80 cleanup = None
81 81 try:
82 82 if filename:
83 83 if vfs:
84 84 fh = vfs.open(filename, b"wb")
85 85 else:
86 86 # Increase default buffer size because default is usually
87 87 # small (4k is common on Linux).
88 88 fh = open(filename, b"wb", 131072)
89 89 else:
90 90 fd, filename = pycompat.mkstemp(prefix=b"hg-bundle-", suffix=b".hg")
91 91 fh = os.fdopen(fd, "wb")
92 92 cleanup = filename
93 93 for c in chunks:
94 94 fh.write(c)
95 95 cleanup = None
96 96 return filename
97 97 finally:
98 98 if fh is not None:
99 99 fh.close()
100 100 if cleanup is not None:
101 101 if filename and vfs:
102 102 vfs.unlink(cleanup)
103 103 else:
104 104 os.unlink(cleanup)
105 105
106 106
107 107 class cg1unpacker(object):
108 108 """Unpacker for cg1 changegroup streams.
109 109
110 110 A changegroup unpacker handles the framing of the revision data in
111 111 the wire format. Most consumers will want to use the apply()
112 112 method to add the changes from the changegroup to a repository.
113 113
114 114 If you're forwarding a changegroup unmodified to another consumer,
115 115 use getchunks(), which returns an iterator of changegroup
116 116 chunks. This is mostly useful for cases where you need to know the
117 117 data stream has ended by observing the end of the changegroup.
118 118
119 119 deltachunk() is useful only if you're applying delta data. Most
120 120 consumers should prefer apply() instead.
121 121
122 122 A few other public methods exist. Those are used only for
123 123 bundlerepo and some debug commands - their use is discouraged.
124 124 """
125 125
126 126 deltaheader = _CHANGEGROUPV1_DELTA_HEADER
127 127 deltaheadersize = deltaheader.size
128 128 version = b'01'
129 129 _grouplistcount = 1 # One list of files after the manifests
130 130
131 131 def __init__(self, fh, alg, extras=None):
132 132 if alg is None:
133 133 alg = b'UN'
134 134 if alg not in util.compengines.supportedbundletypes:
135 135 raise error.Abort(_(b'unknown stream compression type: %s') % alg)
136 136 if alg == b'BZ':
137 137 alg = b'_truncatedBZ'
138 138
139 139 compengine = util.compengines.forbundletype(alg)
140 140 self._stream = compengine.decompressorreader(fh)
141 141 self._type = alg
142 142 self.extras = extras or {}
143 143 self.callback = None
144 144
145 145 # These methods (compressed, read, seek, tell) all appear to only
146 146 # be used by bundlerepo, but it's a little hard to tell.
147 147 def compressed(self):
148 148 return self._type is not None and self._type != b'UN'
149 149
150 150 def read(self, l):
151 151 return self._stream.read(l)
152 152
153 153 def seek(self, pos):
154 154 return self._stream.seek(pos)
155 155
156 156 def tell(self):
157 157 return self._stream.tell()
158 158
159 159 def close(self):
160 160 return self._stream.close()
161 161
162 162 def _chunklength(self):
163 163 d = readexactly(self._stream, 4)
164 164 l = struct.unpack(b">l", d)[0]
165 165 if l <= 4:
166 166 if l:
167 167 raise error.Abort(_(b"invalid chunk length %d") % l)
168 168 return 0
169 169 if self.callback:
170 170 self.callback()
171 171 return l - 4
172 172
173 173 def changelogheader(self):
174 174 """v10 does not have a changelog header chunk"""
175 175 return {}
176 176
177 177 def manifestheader(self):
178 178 """v10 does not have a manifest header chunk"""
179 179 return {}
180 180
181 181 def filelogheader(self):
182 182 """return the header of the filelogs chunk, v10 only has the filename"""
183 183 l = self._chunklength()
184 184 if not l:
185 185 return {}
186 186 fname = readexactly(self._stream, l)
187 187 return {b'filename': fname}
188 188
189 189 def _deltaheader(self, headertuple, prevnode):
190 190 node, p1, p2, cs = headertuple
191 191 if prevnode is None:
192 192 deltabase = p1
193 193 else:
194 194 deltabase = prevnode
195 195 flags = 0
196 196 return node, p1, p2, deltabase, cs, flags
197 197
198 198 def deltachunk(self, prevnode):
199 199 l = self._chunklength()
200 200 if not l:
201 201 return {}
202 202 headerdata = readexactly(self._stream, self.deltaheadersize)
203 203 header = self.deltaheader.unpack(headerdata)
204 204 delta = readexactly(self._stream, l - self.deltaheadersize)
205 205 node, p1, p2, deltabase, cs, flags = self._deltaheader(header, prevnode)
206 206 # cg4 forward-compat
207 207 sidedata = {}
208 208 return (node, p1, p2, cs, deltabase, delta, flags, sidedata)
209 209
210 210 def getchunks(self):
211 211 """returns all the chunks contains in the bundle
212 212
213 213 Used when you need to forward the binary stream to a file or another
214 214 network API. To do so, it parse the changegroup data, otherwise it will
215 215 block in case of sshrepo because it don't know the end of the stream.
216 216 """
217 217 # For changegroup 1 and 2, we expect 3 parts: changelog, manifestlog,
218 218 # and a list of filelogs. For changegroup 3, we expect 4 parts:
219 219 # changelog, manifestlog, a list of tree manifestlogs, and a list of
220 220 # filelogs.
221 221 #
222 222 # Changelog and manifestlog parts are terminated with empty chunks. The
223 223 # tree and file parts are a list of entry sections. Each entry section
224 224 # is a series of chunks terminating in an empty chunk. The list of these
225 225 # entry sections is terminated in yet another empty chunk, so we know
226 226 # we've reached the end of the tree/file list when we reach an empty
227 227 # chunk that was proceeded by no non-empty chunks.
228 228
229 229 parts = 0
230 230 while parts < 2 + self._grouplistcount:
231 231 noentries = True
232 232 while True:
233 233 chunk = getchunk(self)
234 234 if not chunk:
235 235 # The first two empty chunks represent the end of the
236 236 # changelog and the manifestlog portions. The remaining
237 237 # empty chunks represent either A) the end of individual
238 238 # tree or file entries in the file list, or B) the end of
239 239 # the entire list. It's the end of the entire list if there
240 240 # were no entries (i.e. noentries is True).
241 241 if parts < 2:
242 242 parts += 1
243 243 elif noentries:
244 244 parts += 1
245 245 break
246 246 noentries = False
247 247 yield chunkheader(len(chunk))
248 248 pos = 0
249 249 while pos < len(chunk):
250 250 next = pos + 2 ** 20
251 251 yield chunk[pos:next]
252 252 pos = next
253 253 yield closechunk()
254 254
255 255 def _unpackmanifests(self, repo, revmap, trp, prog):
256 256 self.callback = prog.increment
257 257 # no need to check for empty manifest group here:
258 258 # if the result of the merge of 1 and 2 is the same in 3 and 4,
259 259 # no new manifest will be created and the manifest group will
260 260 # be empty during the pull
261 261 self.manifestheader()
262 262 deltas = self.deltaiter()
263 263 repo.manifestlog.getstorage(b'').addgroup(deltas, revmap, trp)
264 264 prog.complete()
265 265 self.callback = None
266 266
267 267 def apply(
268 268 self,
269 269 repo,
270 270 tr,
271 271 srctype,
272 272 url,
273 273 targetphase=phases.draft,
274 274 expectedtotal=None,
275 sidedata_categories=None,
275 276 ):
276 277 """Add the changegroup returned by source.read() to this repo.
277 278 srctype is a string like 'push', 'pull', or 'unbundle'. url is
278 279 the URL of the repo where this changegroup is coming from.
279 280
280 281 Return an integer summarizing the change to this repo:
281 282 - nothing changed or no source: 0
282 283 - more heads than before: 1+added heads (2..n)
283 284 - fewer heads than before: -1-removed heads (-2..-n)
284 285 - number of heads stays the same: 1
286
287 `sidedata_categories` is an optional set of the remote's sidedata wanted
288 categories.
285 289 """
286 290 repo = repo.unfiltered()
287 291
292 # Only useful if we're adding sidedata categories. If both peers have
293 # the same categories, then we simply don't do anything.
294 if self.version == b'04' and srctype == b'pull':
295 sidedata_helpers = get_sidedata_helpers(
296 repo,
297 sidedata_categories or set(),
298 pull=True,
299 )
300 else:
301 sidedata_helpers = None
302
288 303 def csmap(x):
289 304 repo.ui.debug(b"add changeset %s\n" % short(x))
290 305 return len(cl)
291 306
292 307 def revmap(x):
293 308 return cl.rev(x)
294 309
295 310 try:
296 311 # The transaction may already carry source information. In this
297 312 # case we use the top level data. We overwrite the argument
298 313 # because we need to use the top level value (if they exist)
299 314 # in this function.
300 315 srctype = tr.hookargs.setdefault(b'source', srctype)
301 316 tr.hookargs.setdefault(b'url', url)
302 317 repo.hook(
303 318 b'prechangegroup', throw=True, **pycompat.strkwargs(tr.hookargs)
304 319 )
305 320
306 321 # write changelog data to temp files so concurrent readers
307 322 # will not see an inconsistent view
308 323 cl = repo.changelog
309 324 cl.delayupdate(tr)
310 325 oldheads = set(cl.heads())
311 326
312 327 trp = weakref.proxy(tr)
313 328 # pull off the changeset group
314 329 repo.ui.status(_(b"adding changesets\n"))
315 330 clstart = len(cl)
316 331 progress = repo.ui.makeprogress(
317 332 _(b'changesets'), unit=_(b'chunks'), total=expectedtotal
318 333 )
319 334 self.callback = progress.increment
320 335
321 336 efilesset = set()
322 337 duprevs = []
323 338
324 339 def ondupchangelog(cl, rev):
325 340 if rev < clstart:
326 341 duprevs.append(rev)
327 342
328 343 def onchangelog(cl, rev):
329 344 ctx = cl.changelogrevision(rev)
330 345 efilesset.update(ctx.files)
331 346 repo.register_changeset(rev, ctx)
332 347
333 348 self.changelogheader()
334 349 deltas = self.deltaiter()
335 350 if not cl.addgroup(
336 351 deltas,
337 352 csmap,
338 353 trp,
339 354 alwayscache=True,
340 355 addrevisioncb=onchangelog,
341 356 duplicaterevisioncb=ondupchangelog,
342 357 ):
343 358 repo.ui.develwarn(
344 359 b'applied empty changelog from changegroup',
345 360 config=b'warn-empty-changegroup',
346 361 )
347 362 efiles = len(efilesset)
348 363 clend = len(cl)
349 364 changesets = clend - clstart
350 365 progress.complete()
351 366 del deltas
352 367 # TODO Python 2.7 removal
353 368 # del efilesset
354 369 efilesset = None
355 370 self.callback = None
356 371
357 372 # pull off the manifest group
358 373 repo.ui.status(_(b"adding manifests\n"))
359 374 # We know that we'll never have more manifests than we had
360 375 # changesets.
361 376 progress = repo.ui.makeprogress(
362 377 _(b'manifests'), unit=_(b'chunks'), total=changesets
363 378 )
364 379 self._unpackmanifests(repo, revmap, trp, progress)
365 380
366 381 needfiles = {}
367 382 if repo.ui.configbool(b'server', b'validate'):
368 383 cl = repo.changelog
369 384 ml = repo.manifestlog
370 385 # validate incoming csets have their manifests
371 386 for cset in pycompat.xrange(clstart, clend):
372 387 mfnode = cl.changelogrevision(cset).manifest
373 388 mfest = ml[mfnode].readdelta()
374 389 # store file nodes we must see
375 390 for f, n in pycompat.iteritems(mfest):
376 391 needfiles.setdefault(f, set()).add(n)
377 392
378 393 # process the files
379 394 repo.ui.status(_(b"adding file changes\n"))
380 395 newrevs, newfiles = _addchangegroupfiles(
381 396 repo, self, revmap, trp, efiles, needfiles
382 397 )
383 398
384 399 # making sure the value exists
385 400 tr.changes.setdefault(b'changegroup-count-changesets', 0)
386 401 tr.changes.setdefault(b'changegroup-count-revisions', 0)
387 402 tr.changes.setdefault(b'changegroup-count-files', 0)
388 403 tr.changes.setdefault(b'changegroup-count-heads', 0)
389 404
390 405 # some code use bundle operation for internal purpose. They usually
391 406 # set `ui.quiet` to do this outside of user sight. Size the report
392 407 # of such operation now happens at the end of the transaction, that
393 408 # ui.quiet has not direct effect on the output.
394 409 #
395 410 # To preserve this intend use an inelegant hack, we fail to report
396 411 # the change if `quiet` is set. We should probably move to
397 412 # something better, but this is a good first step to allow the "end
398 413 # of transaction report" to pass tests.
399 414 if not repo.ui.quiet:
400 415 tr.changes[b'changegroup-count-changesets'] += changesets
401 416 tr.changes[b'changegroup-count-revisions'] += newrevs
402 417 tr.changes[b'changegroup-count-files'] += newfiles
403 418
404 419 deltaheads = 0
405 420 if oldheads:
406 421 heads = cl.heads()
407 422 deltaheads += len(heads) - len(oldheads)
408 423 for h in heads:
409 424 if h not in oldheads and repo[h].closesbranch():
410 425 deltaheads -= 1
411 426
412 427 # see previous comment about checking ui.quiet
413 428 if not repo.ui.quiet:
414 429 tr.changes[b'changegroup-count-heads'] += deltaheads
415 430 repo.invalidatevolatilesets()
416 431
417 432 if changesets > 0:
418 433 if b'node' not in tr.hookargs:
419 434 tr.hookargs[b'node'] = hex(cl.node(clstart))
420 435 tr.hookargs[b'node_last'] = hex(cl.node(clend - 1))
421 436 hookargs = dict(tr.hookargs)
422 437 else:
423 438 hookargs = dict(tr.hookargs)
424 439 hookargs[b'node'] = hex(cl.node(clstart))
425 440 hookargs[b'node_last'] = hex(cl.node(clend - 1))
426 441 repo.hook(
427 442 b'pretxnchangegroup',
428 443 throw=True,
429 444 **pycompat.strkwargs(hookargs)
430 445 )
431 446
432 447 added = pycompat.xrange(clstart, clend)
433 448 phaseall = None
434 449 if srctype in (b'push', b'serve'):
435 450 # Old servers can not push the boundary themselves.
436 451 # New servers won't push the boundary if changeset already
437 452 # exists locally as secret
438 453 #
439 454 # We should not use added here but the list of all change in
440 455 # the bundle
441 456 if repo.publishing():
442 457 targetphase = phaseall = phases.public
443 458 else:
444 459 # closer target phase computation
445 460
446 461 # Those changesets have been pushed from the
447 462 # outside, their phases are going to be pushed
448 463 # alongside. Therefor `targetphase` is
449 464 # ignored.
450 465 targetphase = phaseall = phases.draft
451 466 if added:
452 467 phases.registernew(repo, tr, targetphase, added)
453 468 if phaseall is not None:
454 469 if duprevs:
455 470 duprevs.extend(added)
456 471 else:
457 472 duprevs = added
458 473 phases.advanceboundary(repo, tr, phaseall, [], revs=duprevs)
459 474 duprevs = []
460 475
461 476 if changesets > 0:
462 477
463 478 def runhooks(unused_success):
464 479 # These hooks run when the lock releases, not when the
465 480 # transaction closes. So it's possible for the changelog
466 481 # to have changed since we last saw it.
467 482 if clstart >= len(repo):
468 483 return
469 484
470 485 repo.hook(b"changegroup", **pycompat.strkwargs(hookargs))
471 486
472 487 for rev in added:
473 488 args = hookargs.copy()
474 489 args[b'node'] = hex(cl.node(rev))
475 490 del args[b'node_last']
476 491 repo.hook(b"incoming", **pycompat.strkwargs(args))
477 492
478 493 newheads = [h for h in repo.heads() if h not in oldheads]
479 494 repo.ui.log(
480 495 b"incoming",
481 496 b"%d incoming changes - new heads: %s\n",
482 497 len(added),
483 498 b', '.join([hex(c[:6]) for c in newheads]),
484 499 )
485 500
486 501 tr.addpostclose(
487 502 b'changegroup-runhooks-%020i' % clstart,
488 503 lambda tr: repo._afterlock(runhooks),
489 504 )
490 505 finally:
491 506 repo.ui.flush()
492 507 # never return 0 here:
493 508 if deltaheads < 0:
494 509 ret = deltaheads - 1
495 510 else:
496 511 ret = deltaheads + 1
497 512 return ret
498 513
499 514 def deltaiter(self):
500 515 """
501 516 returns an iterator of the deltas in this changegroup
502 517
503 518 Useful for passing to the underlying storage system to be stored.
504 519 """
505 520 chain = None
506 521 for chunkdata in iter(lambda: self.deltachunk(chain), {}):
507 522 # Chunkdata: (node, p1, p2, cs, deltabase, delta, flags)
508 523 yield chunkdata
509 524 chain = chunkdata[0]
510 525
511 526
512 527 class cg2unpacker(cg1unpacker):
513 528 """Unpacker for cg2 streams.
514 529
515 530 cg2 streams add support for generaldelta, so the delta header
516 531 format is slightly different. All other features about the data
517 532 remain the same.
518 533 """
519 534
520 535 deltaheader = _CHANGEGROUPV2_DELTA_HEADER
521 536 deltaheadersize = deltaheader.size
522 537 version = b'02'
523 538
524 539 def _deltaheader(self, headertuple, prevnode):
525 540 node, p1, p2, deltabase, cs = headertuple
526 541 flags = 0
527 542 return node, p1, p2, deltabase, cs, flags
528 543
529 544
530 545 class cg3unpacker(cg2unpacker):
531 546 """Unpacker for cg3 streams.
532 547
533 548 cg3 streams add support for exchanging treemanifests and revlog
534 549 flags. It adds the revlog flags to the delta header and an empty chunk
535 550 separating manifests and files.
536 551 """
537 552
538 553 deltaheader = _CHANGEGROUPV3_DELTA_HEADER
539 554 deltaheadersize = deltaheader.size
540 555 version = b'03'
541 556 _grouplistcount = 2 # One list of manifests and one list of files
542 557
543 558 def _deltaheader(self, headertuple, prevnode):
544 559 node, p1, p2, deltabase, cs, flags = headertuple
545 560 return node, p1, p2, deltabase, cs, flags
546 561
547 562 def _unpackmanifests(self, repo, revmap, trp, prog):
548 563 super(cg3unpacker, self)._unpackmanifests(repo, revmap, trp, prog)
549 564 for chunkdata in iter(self.filelogheader, {}):
550 565 # If we get here, there are directory manifests in the changegroup
551 566 d = chunkdata[b"filename"]
552 567 repo.ui.debug(b"adding %s revisions\n" % d)
553 568 deltas = self.deltaiter()
554 569 if not repo.manifestlog.getstorage(d).addgroup(deltas, revmap, trp):
555 570 raise error.Abort(_(b"received dir revlog group is empty"))
556 571
557 572
558 573 class cg4unpacker(cg3unpacker):
559 574 """Unpacker for cg4 streams.
560 575
561 576 cg4 streams add support for exchanging sidedata.
562 577 """
563 578
564 579 version = b'04'
565 580
566 581 def deltachunk(self, prevnode):
567 582 res = super(cg4unpacker, self).deltachunk(prevnode)
568 583 if not res:
569 584 return res
570 585
571 586 (node, p1, p2, cs, deltabase, delta, flags, _sidedata) = res
572 587
573 588 sidedata_raw = getchunk(self._stream)
574 589 sidedata = {}
575 590 if len(sidedata_raw) > 0:
576 591 sidedata = sidedatamod.deserialize_sidedata(sidedata_raw)
577 592
578 593 return node, p1, p2, cs, deltabase, delta, flags, sidedata
579 594
580 595
581 596 class headerlessfixup(object):
582 597 def __init__(self, fh, h):
583 598 self._h = h
584 599 self._fh = fh
585 600
586 601 def read(self, n):
587 602 if self._h:
588 603 d, self._h = self._h[:n], self._h[n:]
589 604 if len(d) < n:
590 605 d += readexactly(self._fh, n - len(d))
591 606 return d
592 607 return readexactly(self._fh, n)
593 608
594 609
595 610 def _revisiondeltatochunks(delta, headerfn):
596 611 """Serialize a revisiondelta to changegroup chunks."""
597 612
598 613 # The captured revision delta may be encoded as a delta against
599 614 # a base revision or as a full revision. The changegroup format
600 615 # requires that everything on the wire be deltas. So for full
601 616 # revisions, we need to invent a header that says to rewrite
602 617 # data.
603 618
604 619 if delta.delta is not None:
605 620 prefix, data = b'', delta.delta
606 621 elif delta.basenode == nullid:
607 622 data = delta.revision
608 623 prefix = mdiff.trivialdiffheader(len(data))
609 624 else:
610 625 data = delta.revision
611 626 prefix = mdiff.replacediffheader(delta.baserevisionsize, len(data))
612 627
613 628 meta = headerfn(delta)
614 629
615 630 yield chunkheader(len(meta) + len(prefix) + len(data))
616 631 yield meta
617 632 if prefix:
618 633 yield prefix
619 634 yield data
620 635
621 636 sidedata = delta.sidedata
622 637 if sidedata is not None:
623 638 # Need a separate chunk for sidedata to be able to differentiate
624 639 # "raw delta" length and sidedata length
625 640 yield chunkheader(len(sidedata))
626 641 yield sidedata
627 642
628 643
629 644 def _sortnodesellipsis(store, nodes, cl, lookup):
630 645 """Sort nodes for changegroup generation."""
631 646 # Ellipses serving mode.
632 647 #
633 648 # In a perfect world, we'd generate better ellipsis-ified graphs
634 649 # for non-changelog revlogs. In practice, we haven't started doing
635 650 # that yet, so the resulting DAGs for the manifestlog and filelogs
636 651 # are actually full of bogus parentage on all the ellipsis
637 652 # nodes. This has the side effect that, while the contents are
638 653 # correct, the individual DAGs might be completely out of whack in
639 654 # a case like 882681bc3166 and its ancestors (back about 10
640 655 # revisions or so) in the main hg repo.
641 656 #
642 657 # The one invariant we *know* holds is that the new (potentially
643 658 # bogus) DAG shape will be valid if we order the nodes in the
644 659 # order that they're introduced in dramatis personae by the
645 660 # changelog, so what we do is we sort the non-changelog histories
646 661 # by the order in which they are used by the changelog.
647 662 key = lambda n: cl.rev(lookup(n))
648 663 return sorted(nodes, key=key)
649 664
650 665
651 666 def _resolvenarrowrevisioninfo(
652 667 cl,
653 668 store,
654 669 ischangelog,
655 670 rev,
656 671 linkrev,
657 672 linknode,
658 673 clrevtolocalrev,
659 674 fullclnodes,
660 675 precomputedellipsis,
661 676 ):
662 677 linkparents = precomputedellipsis[linkrev]
663 678
664 679 def local(clrev):
665 680 """Turn a changelog revnum into a local revnum.
666 681
667 682 The ellipsis dag is stored as revnums on the changelog,
668 683 but when we're producing ellipsis entries for
669 684 non-changelog revlogs, we need to turn those numbers into
670 685 something local. This does that for us, and during the
671 686 changelog sending phase will also expand the stored
672 687 mappings as needed.
673 688 """
674 689 if clrev == nullrev:
675 690 return nullrev
676 691
677 692 if ischangelog:
678 693 return clrev
679 694
680 695 # Walk the ellipsis-ized changelog breadth-first looking for a
681 696 # change that has been linked from the current revlog.
682 697 #
683 698 # For a flat manifest revlog only a single step should be necessary
684 699 # as all relevant changelog entries are relevant to the flat
685 700 # manifest.
686 701 #
687 702 # For a filelog or tree manifest dirlog however not every changelog
688 703 # entry will have been relevant, so we need to skip some changelog
689 704 # nodes even after ellipsis-izing.
690 705 walk = [clrev]
691 706 while walk:
692 707 p = walk[0]
693 708 walk = walk[1:]
694 709 if p in clrevtolocalrev:
695 710 return clrevtolocalrev[p]
696 711 elif p in fullclnodes:
697 712 walk.extend([pp for pp in cl.parentrevs(p) if pp != nullrev])
698 713 elif p in precomputedellipsis:
699 714 walk.extend(
700 715 [pp for pp in precomputedellipsis[p] if pp != nullrev]
701 716 )
702 717 else:
703 718 # In this case, we've got an ellipsis with parents
704 719 # outside the current bundle (likely an
705 720 # incremental pull). We "know" that we can use the
706 721 # value of this same revlog at whatever revision
707 722 # is pointed to by linknode. "Know" is in scare
708 723 # quotes because I haven't done enough examination
709 724 # of edge cases to convince myself this is really
710 725 # a fact - it works for all the (admittedly
711 726 # thorough) cases in our testsuite, but I would be
712 727 # somewhat unsurprised to find a case in the wild
713 728 # where this breaks down a bit. That said, I don't
714 729 # know if it would hurt anything.
715 730 for i in pycompat.xrange(rev, 0, -1):
716 731 if store.linkrev(i) == clrev:
717 732 return i
718 733 # We failed to resolve a parent for this node, so
719 734 # we crash the changegroup construction.
720 735 raise error.Abort(
721 736 b"unable to resolve parent while packing '%s' %r"
722 737 b' for changeset %r' % (store.indexfile, rev, clrev)
723 738 )
724 739
725 740 return nullrev
726 741
727 742 if not linkparents or (store.parentrevs(rev) == (nullrev, nullrev)):
728 743 p1, p2 = nullrev, nullrev
729 744 elif len(linkparents) == 1:
730 745 (p1,) = sorted(local(p) for p in linkparents)
731 746 p2 = nullrev
732 747 else:
733 748 p1, p2 = sorted(local(p) for p in linkparents)
734 749
735 750 p1node, p2node = store.node(p1), store.node(p2)
736 751
737 752 return p1node, p2node, linknode
738 753
739 754
740 755 def deltagroup(
741 756 repo,
742 757 store,
743 758 nodes,
744 759 ischangelog,
745 760 lookup,
746 761 forcedeltaparentprev,
747 762 topic=None,
748 763 ellipses=False,
749 764 clrevtolocalrev=None,
750 765 fullclnodes=None,
751 766 precomputedellipsis=None,
767 sidedata_helpers=None,
752 768 ):
753 769 """Calculate deltas for a set of revisions.
754 770
755 771 Is a generator of ``revisiondelta`` instances.
756 772
757 773 If topic is not None, progress detail will be generated using this
758 774 topic name (e.g. changesets, manifests, etc).
775
776 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
759 777 """
760 778 if not nodes:
761 779 return
762 780
763 781 cl = repo.changelog
764 782
765 783 if ischangelog:
766 784 # `hg log` shows changesets in storage order. To preserve order
767 785 # across clones, send out changesets in storage order.
768 786 nodesorder = b'storage'
769 787 elif ellipses:
770 788 nodes = _sortnodesellipsis(store, nodes, cl, lookup)
771 789 nodesorder = b'nodes'
772 790 else:
773 791 nodesorder = None
774 792
775 793 # Perform ellipses filtering and revision massaging. We do this before
776 794 # emitrevisions() because a) filtering out revisions creates less work
777 795 # for emitrevisions() b) dropping revisions would break emitrevisions()'s
778 796 # assumptions about delta choices and we would possibly send a delta
779 797 # referencing a missing base revision.
780 798 #
781 799 # Also, calling lookup() has side-effects with regards to populating
782 800 # data structures. If we don't call lookup() for each node or if we call
783 801 # lookup() after the first pass through each node, things can break -
784 802 # possibly intermittently depending on the python hash seed! For that
785 803 # reason, we store a mapping of all linknodes during the initial node
786 804 # pass rather than use lookup() on the output side.
787 805 if ellipses:
788 806 filtered = []
789 807 adjustedparents = {}
790 808 linknodes = {}
791 809
792 810 for node in nodes:
793 811 rev = store.rev(node)
794 812 linknode = lookup(node)
795 813 linkrev = cl.rev(linknode)
796 814 clrevtolocalrev[linkrev] = rev
797 815
798 816 # If linknode is in fullclnodes, it means the corresponding
799 817 # changeset was a full changeset and is being sent unaltered.
800 818 if linknode in fullclnodes:
801 819 linknodes[node] = linknode
802 820
803 821 # If the corresponding changeset wasn't in the set computed
804 822 # as relevant to us, it should be dropped outright.
805 823 elif linkrev not in precomputedellipsis:
806 824 continue
807 825
808 826 else:
809 827 # We could probably do this later and avoid the dict
810 828 # holding state. But it likely doesn't matter.
811 829 p1node, p2node, linknode = _resolvenarrowrevisioninfo(
812 830 cl,
813 831 store,
814 832 ischangelog,
815 833 rev,
816 834 linkrev,
817 835 linknode,
818 836 clrevtolocalrev,
819 837 fullclnodes,
820 838 precomputedellipsis,
821 839 )
822 840
823 841 adjustedparents[node] = (p1node, p2node)
824 842 linknodes[node] = linknode
825 843
826 844 filtered.append(node)
827 845
828 846 nodes = filtered
829 847
830 848 # We expect the first pass to be fast, so we only engage the progress
831 849 # meter for constructing the revision deltas.
832 850 progress = None
833 851 if topic is not None:
834 852 progress = repo.ui.makeprogress(
835 853 topic, unit=_(b'chunks'), total=len(nodes)
836 854 )
837 855
838 856 configtarget = repo.ui.config(b'devel', b'bundle.delta')
839 857 if configtarget not in (b'', b'p1', b'full'):
840 858 msg = _("""config "devel.bundle.delta" as unknown value: %s""")
841 859 repo.ui.warn(msg % configtarget)
842 860
843 861 deltamode = repository.CG_DELTAMODE_STD
844 862 if forcedeltaparentprev:
845 863 deltamode = repository.CG_DELTAMODE_PREV
846 864 elif configtarget == b'p1':
847 865 deltamode = repository.CG_DELTAMODE_P1
848 866 elif configtarget == b'full':
849 867 deltamode = repository.CG_DELTAMODE_FULL
850 868
851 869 revisions = store.emitrevisions(
852 870 nodes,
853 871 nodesorder=nodesorder,
854 872 revisiondata=True,
855 873 assumehaveparentrevisions=not ellipses,
856 874 deltamode=deltamode,
875 sidedata_helpers=sidedata_helpers,
857 876 )
858 877
859 878 for i, revision in enumerate(revisions):
860 879 if progress:
861 880 progress.update(i + 1)
862 881
863 882 if ellipses:
864 883 linknode = linknodes[revision.node]
865 884
866 885 if revision.node in adjustedparents:
867 886 p1node, p2node = adjustedparents[revision.node]
868 887 revision.p1node = p1node
869 888 revision.p2node = p2node
870 889 revision.flags |= repository.REVISION_FLAG_ELLIPSIS
871 890
872 891 else:
873 892 linknode = lookup(revision.node)
874 893
875 894 revision.linknode = linknode
876 895 yield revision
877 896
878 897 if progress:
879 898 progress.complete()
880 899
881 900
882 901 class cgpacker(object):
883 902 def __init__(
884 903 self,
885 904 repo,
886 905 oldmatcher,
887 906 matcher,
888 907 version,
889 908 builddeltaheader,
890 909 manifestsend,
891 910 forcedeltaparentprev=False,
892 911 bundlecaps=None,
893 912 ellipses=False,
894 913 shallow=False,
895 914 ellipsisroots=None,
896 915 fullnodes=None,
897 916 remote_sidedata=None,
898 917 ):
899 918 """Given a source repo, construct a bundler.
900 919
901 920 oldmatcher is a matcher that matches on files the client already has.
902 921 These will not be included in the changegroup.
903 922
904 923 matcher is a matcher that matches on files to include in the
905 924 changegroup. Used to facilitate sparse changegroups.
906 925
907 926 forcedeltaparentprev indicates whether delta parents must be against
908 927 the previous revision in a delta group. This should only be used for
909 928 compatibility with changegroup version 1.
910 929
911 930 builddeltaheader is a callable that constructs the header for a group
912 931 delta.
913 932
914 933 manifestsend is a chunk to send after manifests have been fully emitted.
915 934
916 935 ellipses indicates whether ellipsis serving mode is enabled.
917 936
918 937 bundlecaps is optional and can be used to specify the set of
919 938 capabilities which can be used to build the bundle. While bundlecaps is
920 939 unused in core Mercurial, extensions rely on this feature to communicate
921 940 capabilities to customize the changegroup packer.
922 941
923 942 shallow indicates whether shallow data might be sent. The packer may
924 943 need to pack file contents not introduced by the changes being packed.
925 944
926 945 fullnodes is the set of changelog nodes which should not be ellipsis
927 946 nodes. We store this rather than the set of nodes that should be
928 947 ellipsis because for very large histories we expect this to be
929 948 significantly smaller.
930 949
931 950 remote_sidedata is the set of sidedata categories wanted by the remote.
932 951 """
933 952 assert oldmatcher
934 953 assert matcher
935 954 self._oldmatcher = oldmatcher
936 955 self._matcher = matcher
937 956
938 957 self.version = version
939 958 self._forcedeltaparentprev = forcedeltaparentprev
940 959 self._builddeltaheader = builddeltaheader
941 960 self._manifestsend = manifestsend
942 961 self._ellipses = ellipses
943 962
944 963 # Set of capabilities we can use to build the bundle.
945 964 if bundlecaps is None:
946 965 bundlecaps = set()
947 966 self._bundlecaps = bundlecaps
948 967 if remote_sidedata is None:
949 968 remote_sidedata = set()
950 969 self._remote_sidedata = remote_sidedata
951 970 self._isshallow = shallow
952 971 self._fullclnodes = fullnodes
953 972
954 973 # Maps ellipsis revs to their roots at the changelog level.
955 974 self._precomputedellipsis = ellipsisroots
956 975
957 976 self._repo = repo
958 977
959 978 if self._repo.ui.verbose and not self._repo.ui.debugflag:
960 979 self._verbosenote = self._repo.ui.note
961 980 else:
962 981 self._verbosenote = lambda s: None
963 982
964 983 def generate(
965 984 self, commonrevs, clnodes, fastpathlinkrev, source, changelog=True
966 985 ):
967 986 """Yield a sequence of changegroup byte chunks.
968 987 If changelog is False, changelog data won't be added to changegroup
969 988 """
970 989
971 990 repo = self._repo
972 991 cl = repo.changelog
973 992
974 993 self._verbosenote(_(b'uncompressed size of bundle content:\n'))
975 994 size = 0
976 995
996 sidedata_helpers = None
997 if self.version == b'04':
998 remote_sidedata = self._remote_sidedata
999 if source == b'strip':
1000 # We're our own remote when stripping, get the no-op helpers
1001 # TODO a better approach would be for the strip bundle to
1002 # correctly advertise its sidedata categories directly.
1003 remote_sidedata = repo._wanted_sidedata
1004 sidedata_helpers = get_sidedata_helpers(repo, remote_sidedata)
1005
977 1006 clstate, deltas = self._generatechangelog(
978 cl, clnodes, generate=changelog
1007 cl,
1008 clnodes,
1009 generate=changelog,
1010 sidedata_helpers=sidedata_helpers,
979 1011 )
980 1012 for delta in deltas:
981 1013 for chunk in _revisiondeltatochunks(delta, self._builddeltaheader):
982 1014 size += len(chunk)
983 1015 yield chunk
984 1016
985 1017 close = closechunk()
986 1018 size += len(close)
987 1019 yield closechunk()
988 1020
989 1021 self._verbosenote(_(b'%8.i (changelog)\n') % size)
990 1022
991 1023 clrevorder = clstate[b'clrevorder']
992 1024 manifests = clstate[b'manifests']
993 1025 changedfiles = clstate[b'changedfiles']
994 1026
995 1027 # We need to make sure that the linkrev in the changegroup refers to
996 1028 # the first changeset that introduced the manifest or file revision.
997 1029 # The fastpath is usually safer than the slowpath, because the filelogs
998 1030 # are walked in revlog order.
999 1031 #
1000 1032 # When taking the slowpath when the manifest revlog uses generaldelta,
1001 1033 # the manifest may be walked in the "wrong" order. Without 'clrevorder',
1002 1034 # we would get an incorrect linkrev (see fix in cc0ff93d0c0c).
1003 1035 #
1004 1036 # When taking the fastpath, we are only vulnerable to reordering
1005 1037 # of the changelog itself. The changelog never uses generaldelta and is
1006 1038 # never reordered. To handle this case, we simply take the slowpath,
1007 1039 # which already has the 'clrevorder' logic. This was also fixed in
1008 1040 # cc0ff93d0c0c.
1009 1041
1010 1042 # Treemanifests don't work correctly with fastpathlinkrev
1011 1043 # either, because we don't discover which directory nodes to
1012 1044 # send along with files. This could probably be fixed.
1013 1045 fastpathlinkrev = fastpathlinkrev and not scmutil.istreemanifest(repo)
1014 1046
1015 1047 fnodes = {} # needed file nodes
1016 1048
1017 1049 size = 0
1018 1050 it = self.generatemanifests(
1019 1051 commonrevs,
1020 1052 clrevorder,
1021 1053 fastpathlinkrev,
1022 1054 manifests,
1023 1055 fnodes,
1024 1056 source,
1025 1057 clstate[b'clrevtomanifestrev'],
1058 sidedata_helpers=sidedata_helpers,
1026 1059 )
1027 1060
1028 1061 for tree, deltas in it:
1029 1062 if tree:
1030 1063 assert self.version in (b'03', b'04')
1031 1064 chunk = _fileheader(tree)
1032 1065 size += len(chunk)
1033 1066 yield chunk
1034 1067
1035 1068 for delta in deltas:
1036 1069 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1037 1070 for chunk in chunks:
1038 1071 size += len(chunk)
1039 1072 yield chunk
1040 1073
1041 1074 close = closechunk()
1042 1075 size += len(close)
1043 1076 yield close
1044 1077
1045 1078 self._verbosenote(_(b'%8.i (manifests)\n') % size)
1046 1079 yield self._manifestsend
1047 1080
1048 1081 mfdicts = None
1049 1082 if self._ellipses and self._isshallow:
1050 1083 mfdicts = [
1051 1084 (repo.manifestlog[n].read(), lr)
1052 1085 for (n, lr) in pycompat.iteritems(manifests)
1053 1086 ]
1054 1087
1055 1088 manifests.clear()
1056 1089 clrevs = {cl.rev(x) for x in clnodes}
1057 1090
1058 1091 it = self.generatefiles(
1059 1092 changedfiles,
1060 1093 commonrevs,
1061 1094 source,
1062 1095 mfdicts,
1063 1096 fastpathlinkrev,
1064 1097 fnodes,
1065 1098 clrevs,
1099 sidedata_helpers=sidedata_helpers,
1066 1100 )
1067 1101
1068 1102 for path, deltas in it:
1069 1103 h = _fileheader(path)
1070 1104 size = len(h)
1071 1105 yield h
1072 1106
1073 1107 for delta in deltas:
1074 1108 chunks = _revisiondeltatochunks(delta, self._builddeltaheader)
1075 1109 for chunk in chunks:
1076 1110 size += len(chunk)
1077 1111 yield chunk
1078 1112
1079 1113 close = closechunk()
1080 1114 size += len(close)
1081 1115 yield close
1082 1116
1083 1117 self._verbosenote(_(b'%8.i %s\n') % (size, path))
1084 1118
1085 1119 yield closechunk()
1086 1120
1087 1121 if clnodes:
1088 1122 repo.hook(b'outgoing', node=hex(clnodes[0]), source=source)
1089 1123
1090 def _generatechangelog(self, cl, nodes, generate=True):
1124 def _generatechangelog(
1125 self, cl, nodes, generate=True, sidedata_helpers=None
1126 ):
1091 1127 """Generate data for changelog chunks.
1092 1128
1093 1129 Returns a 2-tuple of a dict containing state and an iterable of
1094 1130 byte chunks. The state will not be fully populated until the
1095 1131 chunk stream has been fully consumed.
1096 1132
1097 1133 if generate is False, the state will be fully populated and no chunk
1098 1134 stream will be yielded
1135
1136 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1099 1137 """
1100 1138 clrevorder = {}
1101 1139 manifests = {}
1102 1140 mfl = self._repo.manifestlog
1103 1141 changedfiles = set()
1104 1142 clrevtomanifestrev = {}
1105 1143
1106 1144 state = {
1107 1145 b'clrevorder': clrevorder,
1108 1146 b'manifests': manifests,
1109 1147 b'changedfiles': changedfiles,
1110 1148 b'clrevtomanifestrev': clrevtomanifestrev,
1111 1149 }
1112 1150
1113 1151 if not (generate or self._ellipses):
1114 1152 # sort the nodes in storage order
1115 1153 nodes = sorted(nodes, key=cl.rev)
1116 1154 for node in nodes:
1117 1155 c = cl.changelogrevision(node)
1118 1156 clrevorder[node] = len(clrevorder)
1119 1157 # record the first changeset introducing this manifest version
1120 1158 manifests.setdefault(c.manifest, node)
1121 1159 # Record a complete list of potentially-changed files in
1122 1160 # this manifest.
1123 1161 changedfiles.update(c.files)
1124 1162
1125 1163 return state, ()
1126 1164
1127 1165 # Callback for the changelog, used to collect changed files and
1128 1166 # manifest nodes.
1129 1167 # Returns the linkrev node (identity in the changelog case).
1130 1168 def lookupcl(x):
1131 1169 c = cl.changelogrevision(x)
1132 1170 clrevorder[x] = len(clrevorder)
1133 1171
1134 1172 if self._ellipses:
1135 1173 # Only update manifests if x is going to be sent. Otherwise we
1136 1174 # end up with bogus linkrevs specified for manifests and
1137 1175 # we skip some manifest nodes that we should otherwise
1138 1176 # have sent.
1139 1177 if (
1140 1178 x in self._fullclnodes
1141 1179 or cl.rev(x) in self._precomputedellipsis
1142 1180 ):
1143 1181
1144 1182 manifestnode = c.manifest
1145 1183 # Record the first changeset introducing this manifest
1146 1184 # version.
1147 1185 manifests.setdefault(manifestnode, x)
1148 1186 # Set this narrow-specific dict so we have the lowest
1149 1187 # manifest revnum to look up for this cl revnum. (Part of
1150 1188 # mapping changelog ellipsis parents to manifest ellipsis
1151 1189 # parents)
1152 1190 clrevtomanifestrev.setdefault(
1153 1191 cl.rev(x), mfl.rev(manifestnode)
1154 1192 )
1155 1193 # We can't trust the changed files list in the changeset if the
1156 1194 # client requested a shallow clone.
1157 1195 if self._isshallow:
1158 1196 changedfiles.update(mfl[c.manifest].read().keys())
1159 1197 else:
1160 1198 changedfiles.update(c.files)
1161 1199 else:
1162 1200 # record the first changeset introducing this manifest version
1163 1201 manifests.setdefault(c.manifest, x)
1164 1202 # Record a complete list of potentially-changed files in
1165 1203 # this manifest.
1166 1204 changedfiles.update(c.files)
1167 1205
1168 1206 return x
1169 1207
1170 1208 gen = deltagroup(
1171 1209 self._repo,
1172 1210 cl,
1173 1211 nodes,
1174 1212 True,
1175 1213 lookupcl,
1176 1214 self._forcedeltaparentprev,
1177 1215 ellipses=self._ellipses,
1178 1216 topic=_(b'changesets'),
1179 1217 clrevtolocalrev={},
1180 1218 fullclnodes=self._fullclnodes,
1181 1219 precomputedellipsis=self._precomputedellipsis,
1220 sidedata_helpers=sidedata_helpers,
1182 1221 )
1183 1222
1184 1223 return state, gen
1185 1224
1186 1225 def generatemanifests(
1187 1226 self,
1188 1227 commonrevs,
1189 1228 clrevorder,
1190 1229 fastpathlinkrev,
1191 1230 manifests,
1192 1231 fnodes,
1193 1232 source,
1194 1233 clrevtolocalrev,
1234 sidedata_helpers=None,
1195 1235 ):
1196 1236 """Returns an iterator of changegroup chunks containing manifests.
1197 1237
1198 1238 `source` is unused here, but is used by extensions like remotefilelog to
1199 1239 change what is sent based in pulls vs pushes, etc.
1240
1241 See `storageutil.emitrevisions` for the doc on `sidedata_helpers`.
1200 1242 """
1201 1243 repo = self._repo
1202 1244 mfl = repo.manifestlog
1203 1245 tmfnodes = {b'': manifests}
1204 1246
1205 1247 # Callback for the manifest, used to collect linkrevs for filelog
1206 1248 # revisions.
1207 1249 # Returns the linkrev node (collected in lookupcl).
1208 1250 def makelookupmflinknode(tree, nodes):
1209 1251 if fastpathlinkrev:
1210 1252 assert not tree
1211 1253 return (
1212 1254 manifests.__getitem__
1213 1255 ) # pytype: disable=unsupported-operands
1214 1256
1215 1257 def lookupmflinknode(x):
1216 1258 """Callback for looking up the linknode for manifests.
1217 1259
1218 1260 Returns the linkrev node for the specified manifest.
1219 1261
1220 1262 SIDE EFFECT:
1221 1263
1222 1264 1) fclnodes gets populated with the list of relevant
1223 1265 file nodes if we're not using fastpathlinkrev
1224 1266 2) When treemanifests are in use, collects treemanifest nodes
1225 1267 to send
1226 1268
1227 1269 Note that this means manifests must be completely sent to
1228 1270 the client before you can trust the list of files and
1229 1271 treemanifests to send.
1230 1272 """
1231 1273 clnode = nodes[x]
1232 1274 mdata = mfl.get(tree, x).readfast(shallow=True)
1233 1275 for p, n, fl in mdata.iterentries():
1234 1276 if fl == b't': # subdirectory manifest
1235 1277 subtree = tree + p + b'/'
1236 1278 tmfclnodes = tmfnodes.setdefault(subtree, {})
1237 1279 tmfclnode = tmfclnodes.setdefault(n, clnode)
1238 1280 if clrevorder[clnode] < clrevorder[tmfclnode]:
1239 1281 tmfclnodes[n] = clnode
1240 1282 else:
1241 1283 f = tree + p
1242 1284 fclnodes = fnodes.setdefault(f, {})
1243 1285 fclnode = fclnodes.setdefault(n, clnode)
1244 1286 if clrevorder[clnode] < clrevorder[fclnode]:
1245 1287 fclnodes[n] = clnode
1246 1288 return clnode
1247 1289
1248 1290 return lookupmflinknode
1249 1291
1250 1292 while tmfnodes:
1251 1293 tree, nodes = tmfnodes.popitem()
1252 1294
1253 1295 should_visit = self._matcher.visitdir(tree[:-1])
1254 1296 if tree and not should_visit:
1255 1297 continue
1256 1298
1257 1299 store = mfl.getstorage(tree)
1258 1300
1259 1301 if not should_visit:
1260 1302 # No nodes to send because this directory is out of
1261 1303 # the client's view of the repository (probably
1262 1304 # because of narrow clones). Do this even for the root
1263 1305 # directory (tree=='')
1264 1306 prunednodes = []
1265 1307 else:
1266 1308 # Avoid sending any manifest nodes we can prove the
1267 1309 # client already has by checking linkrevs. See the
1268 1310 # related comment in generatefiles().
1269 1311 prunednodes = self._prunemanifests(store, nodes, commonrevs)
1270 1312
1271 1313 if tree and not prunednodes:
1272 1314 continue
1273 1315
1274 1316 lookupfn = makelookupmflinknode(tree, nodes)
1275 1317
1276 1318 deltas = deltagroup(
1277 1319 self._repo,
1278 1320 store,
1279 1321 prunednodes,
1280 1322 False,
1281 1323 lookupfn,
1282 1324 self._forcedeltaparentprev,
1283 1325 ellipses=self._ellipses,
1284 1326 topic=_(b'manifests'),
1285 1327 clrevtolocalrev=clrevtolocalrev,
1286 1328 fullclnodes=self._fullclnodes,
1287 1329 precomputedellipsis=self._precomputedellipsis,
1330 sidedata_helpers=sidedata_helpers,
1288 1331 )
1289 1332
1290 1333 if not self._oldmatcher.visitdir(store.tree[:-1]):
1291 1334 yield tree, deltas
1292 1335 else:
1293 1336 # 'deltas' is a generator and we need to consume it even if
1294 1337 # we are not going to send it because a side-effect is that
1295 1338 # it updates tmdnodes (via lookupfn)
1296 1339 for d in deltas:
1297 1340 pass
1298 1341 if not tree:
1299 1342 yield tree, []
1300 1343
1301 1344 def _prunemanifests(self, store, nodes, commonrevs):
1302 1345 if not self._ellipses:
1303 1346 # In non-ellipses case and large repositories, it is better to
1304 1347 # prevent calling of store.rev and store.linkrev on a lot of
1305 1348 # nodes as compared to sending some extra data
1306 1349 return nodes.copy()
1307 1350 # This is split out as a separate method to allow filtering
1308 1351 # commonrevs in extension code.
1309 1352 #
1310 1353 # TODO(augie): this shouldn't be required, instead we should
1311 1354 # make filtering of revisions to send delegated to the store
1312 1355 # layer.
1313 1356 frev, flr = store.rev, store.linkrev
1314 1357 return [n for n in nodes if flr(frev(n)) not in commonrevs]
1315 1358
1316 1359 # The 'source' parameter is useful for extensions
1317 1360 def generatefiles(
1318 1361 self,
1319 1362 changedfiles,
1320 1363 commonrevs,
1321 1364 source,
1322 1365 mfdicts,
1323 1366 fastpathlinkrev,
1324 1367 fnodes,
1325 1368 clrevs,
1369 sidedata_helpers=None,
1326 1370 ):
1327 1371 changedfiles = [
1328 1372 f
1329 1373 for f in changedfiles
1330 1374 if self._matcher(f) and not self._oldmatcher(f)
1331 1375 ]
1332 1376
1333 1377 if not fastpathlinkrev:
1334 1378
1335 1379 def normallinknodes(unused, fname):
1336 1380 return fnodes.get(fname, {})
1337 1381
1338 1382 else:
1339 1383 cln = self._repo.changelog.node
1340 1384
1341 1385 def normallinknodes(store, fname):
1342 1386 flinkrev = store.linkrev
1343 1387 fnode = store.node
1344 1388 revs = ((r, flinkrev(r)) for r in store)
1345 1389 return {fnode(r): cln(lr) for r, lr in revs if lr in clrevs}
1346 1390
1347 1391 clrevtolocalrev = {}
1348 1392
1349 1393 if self._isshallow:
1350 1394 # In a shallow clone, the linknodes callback needs to also include
1351 1395 # those file nodes that are in the manifests we sent but weren't
1352 1396 # introduced by those manifests.
1353 1397 commonctxs = [self._repo[c] for c in commonrevs]
1354 1398 clrev = self._repo.changelog.rev
1355 1399
1356 1400 def linknodes(flog, fname):
1357 1401 for c in commonctxs:
1358 1402 try:
1359 1403 fnode = c.filenode(fname)
1360 1404 clrevtolocalrev[c.rev()] = flog.rev(fnode)
1361 1405 except error.ManifestLookupError:
1362 1406 pass
1363 1407 links = normallinknodes(flog, fname)
1364 1408 if len(links) != len(mfdicts):
1365 1409 for mf, lr in mfdicts:
1366 1410 fnode = mf.get(fname, None)
1367 1411 if fnode in links:
1368 1412 links[fnode] = min(links[fnode], lr, key=clrev)
1369 1413 elif fnode:
1370 1414 links[fnode] = lr
1371 1415 return links
1372 1416
1373 1417 else:
1374 1418 linknodes = normallinknodes
1375 1419
1376 1420 repo = self._repo
1377 1421 progress = repo.ui.makeprogress(
1378 1422 _(b'files'), unit=_(b'files'), total=len(changedfiles)
1379 1423 )
1380 1424 for i, fname in enumerate(sorted(changedfiles)):
1381 1425 filerevlog = repo.file(fname)
1382 1426 if not filerevlog:
1383 1427 raise error.Abort(
1384 1428 _(b"empty or missing file data for %s") % fname
1385 1429 )
1386 1430
1387 1431 clrevtolocalrev.clear()
1388 1432
1389 1433 linkrevnodes = linknodes(filerevlog, fname)
1390 1434 # Lookup for filenodes, we collected the linkrev nodes above in the
1391 1435 # fastpath case and with lookupmf in the slowpath case.
1392 1436 def lookupfilelog(x):
1393 1437 return linkrevnodes[x]
1394 1438
1395 1439 frev, flr = filerevlog.rev, filerevlog.linkrev
1396 1440 # Skip sending any filenode we know the client already
1397 1441 # has. This avoids over-sending files relatively
1398 1442 # inexpensively, so it's not a problem if we under-filter
1399 1443 # here.
1400 1444 filenodes = [
1401 1445 n for n in linkrevnodes if flr(frev(n)) not in commonrevs
1402 1446 ]
1403 1447
1404 1448 if not filenodes:
1405 1449 continue
1406 1450
1407 1451 progress.update(i + 1, item=fname)
1408 1452
1409 1453 deltas = deltagroup(
1410 1454 self._repo,
1411 1455 filerevlog,
1412 1456 filenodes,
1413 1457 False,
1414 1458 lookupfilelog,
1415 1459 self._forcedeltaparentprev,
1416 1460 ellipses=self._ellipses,
1417 1461 clrevtolocalrev=clrevtolocalrev,
1418 1462 fullclnodes=self._fullclnodes,
1419 1463 precomputedellipsis=self._precomputedellipsis,
1464 sidedata_helpers=sidedata_helpers,
1420 1465 )
1421 1466
1422 1467 yield fname, deltas
1423 1468
1424 1469 progress.complete()
1425 1470
1426 1471
1427 1472 def _makecg1packer(
1428 1473 repo,
1429 1474 oldmatcher,
1430 1475 matcher,
1431 1476 bundlecaps,
1432 1477 ellipses=False,
1433 1478 shallow=False,
1434 1479 ellipsisroots=None,
1435 1480 fullnodes=None,
1436 1481 remote_sidedata=None,
1437 1482 ):
1438 1483 builddeltaheader = lambda d: _CHANGEGROUPV1_DELTA_HEADER.pack(
1439 1484 d.node, d.p1node, d.p2node, d.linknode
1440 1485 )
1441 1486
1442 1487 return cgpacker(
1443 1488 repo,
1444 1489 oldmatcher,
1445 1490 matcher,
1446 1491 b'01',
1447 1492 builddeltaheader=builddeltaheader,
1448 1493 manifestsend=b'',
1449 1494 forcedeltaparentprev=True,
1450 1495 bundlecaps=bundlecaps,
1451 1496 ellipses=ellipses,
1452 1497 shallow=shallow,
1453 1498 ellipsisroots=ellipsisroots,
1454 1499 fullnodes=fullnodes,
1455 1500 )
1456 1501
1457 1502
1458 1503 def _makecg2packer(
1459 1504 repo,
1460 1505 oldmatcher,
1461 1506 matcher,
1462 1507 bundlecaps,
1463 1508 ellipses=False,
1464 1509 shallow=False,
1465 1510 ellipsisroots=None,
1466 1511 fullnodes=None,
1467 1512 remote_sidedata=None,
1468 1513 ):
1469 1514 builddeltaheader = lambda d: _CHANGEGROUPV2_DELTA_HEADER.pack(
1470 1515 d.node, d.p1node, d.p2node, d.basenode, d.linknode
1471 1516 )
1472 1517
1473 1518 return cgpacker(
1474 1519 repo,
1475 1520 oldmatcher,
1476 1521 matcher,
1477 1522 b'02',
1478 1523 builddeltaheader=builddeltaheader,
1479 1524 manifestsend=b'',
1480 1525 bundlecaps=bundlecaps,
1481 1526 ellipses=ellipses,
1482 1527 shallow=shallow,
1483 1528 ellipsisroots=ellipsisroots,
1484 1529 fullnodes=fullnodes,
1485 1530 )
1486 1531
1487 1532
1488 1533 def _makecg3packer(
1489 1534 repo,
1490 1535 oldmatcher,
1491 1536 matcher,
1492 1537 bundlecaps,
1493 1538 ellipses=False,
1494 1539 shallow=False,
1495 1540 ellipsisroots=None,
1496 1541 fullnodes=None,
1497 1542 remote_sidedata=None,
1498 1543 ):
1499 1544 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1500 1545 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1501 1546 )
1502 1547
1503 1548 return cgpacker(
1504 1549 repo,
1505 1550 oldmatcher,
1506 1551 matcher,
1507 1552 b'03',
1508 1553 builddeltaheader=builddeltaheader,
1509 1554 manifestsend=closechunk(),
1510 1555 bundlecaps=bundlecaps,
1511 1556 ellipses=ellipses,
1512 1557 shallow=shallow,
1513 1558 ellipsisroots=ellipsisroots,
1514 1559 fullnodes=fullnodes,
1515 1560 )
1516 1561
1517 1562
1518 1563 def _makecg4packer(
1519 1564 repo,
1520 1565 oldmatcher,
1521 1566 matcher,
1522 1567 bundlecaps,
1523 1568 ellipses=False,
1524 1569 shallow=False,
1525 1570 ellipsisroots=None,
1526 1571 fullnodes=None,
1527 1572 remote_sidedata=None,
1528 1573 ):
1529 1574 # Same header func as cg3. Sidedata is in a separate chunk from the delta to
1530 1575 # differenciate "raw delta" and sidedata.
1531 1576 builddeltaheader = lambda d: _CHANGEGROUPV3_DELTA_HEADER.pack(
1532 1577 d.node, d.p1node, d.p2node, d.basenode, d.linknode, d.flags
1533 1578 )
1534 1579
1535 1580 return cgpacker(
1536 1581 repo,
1537 1582 oldmatcher,
1538 1583 matcher,
1539 1584 b'04',
1540 1585 builddeltaheader=builddeltaheader,
1541 1586 manifestsend=closechunk(),
1542 1587 bundlecaps=bundlecaps,
1543 1588 ellipses=ellipses,
1544 1589 shallow=shallow,
1545 1590 ellipsisroots=ellipsisroots,
1546 1591 fullnodes=fullnodes,
1547 1592 remote_sidedata=remote_sidedata,
1548 1593 )
1549 1594
1550 1595
1551 1596 _packermap = {
1552 1597 b'01': (_makecg1packer, cg1unpacker),
1553 1598 # cg2 adds support for exchanging generaldelta
1554 1599 b'02': (_makecg2packer, cg2unpacker),
1555 1600 # cg3 adds support for exchanging revlog flags and treemanifests
1556 1601 b'03': (_makecg3packer, cg3unpacker),
1557 1602 # ch4 adds support for exchanging sidedata
1558 1603 b'04': (_makecg4packer, cg4unpacker),
1559 1604 }
1560 1605
1561 1606
1562 1607 def allsupportedversions(repo):
1563 1608 versions = set(_packermap.keys())
1564 1609 needv03 = False
1565 1610 if (
1566 1611 repo.ui.configbool(b'experimental', b'changegroup3')
1567 1612 or repo.ui.configbool(b'experimental', b'treemanifest')
1568 1613 or scmutil.istreemanifest(repo)
1569 1614 ):
1570 1615 # we keep version 03 because we need to to exchange treemanifest data
1571 1616 #
1572 1617 # we also keep vresion 01 and 02, because it is possible for repo to
1573 1618 # contains both normal and tree manifest at the same time. so using
1574 1619 # older version to pull data is viable
1575 1620 #
1576 1621 # (or even to push subset of history)
1577 1622 needv03 = True
1578 1623 has_revlogv2 = requirements.REVLOGV2_REQUIREMENT in repo.requirements
1579 1624 if not has_revlogv2:
1580 1625 versions.discard(b'04')
1581 1626 if not needv03:
1582 1627 versions.discard(b'03')
1583 1628 return versions
1584 1629
1585 1630
1586 1631 # Changegroup versions that can be applied to the repo
1587 1632 def supportedincomingversions(repo):
1588 1633 return allsupportedversions(repo)
1589 1634
1590 1635
1591 1636 # Changegroup versions that can be created from the repo
1592 1637 def supportedoutgoingversions(repo):
1593 1638 versions = allsupportedversions(repo)
1594 1639 if scmutil.istreemanifest(repo):
1595 1640 # Versions 01 and 02 support only flat manifests and it's just too
1596 1641 # expensive to convert between the flat manifest and tree manifest on
1597 1642 # the fly. Since tree manifests are hashed differently, all of history
1598 1643 # would have to be converted. Instead, we simply don't even pretend to
1599 1644 # support versions 01 and 02.
1600 1645 versions.discard(b'01')
1601 1646 versions.discard(b'02')
1602 1647 if requirements.NARROW_REQUIREMENT in repo.requirements:
1603 1648 # Versions 01 and 02 don't support revlog flags, and we need to
1604 1649 # support that for stripping and unbundling to work.
1605 1650 versions.discard(b'01')
1606 1651 versions.discard(b'02')
1607 1652 if LFS_REQUIREMENT in repo.requirements:
1608 1653 # Versions 01 and 02 don't support revlog flags, and we need to
1609 1654 # mark LFS entries with REVIDX_EXTSTORED.
1610 1655 versions.discard(b'01')
1611 1656 versions.discard(b'02')
1612 1657
1613 1658 return versions
1614 1659
1615 1660
1616 1661 def localversion(repo):
1617 1662 # Finds the best version to use for bundles that are meant to be used
1618 1663 # locally, such as those from strip and shelve, and temporary bundles.
1619 1664 return max(supportedoutgoingversions(repo))
1620 1665
1621 1666
1622 1667 def safeversion(repo):
1623 1668 # Finds the smallest version that it's safe to assume clients of the repo
1624 1669 # will support. For example, all hg versions that support generaldelta also
1625 1670 # support changegroup 02.
1626 1671 versions = supportedoutgoingversions(repo)
1627 1672 if requirements.GENERALDELTA_REQUIREMENT in repo.requirements:
1628 1673 versions.discard(b'01')
1629 1674 assert versions
1630 1675 return min(versions)
1631 1676
1632 1677
1633 1678 def getbundler(
1634 1679 version,
1635 1680 repo,
1636 1681 bundlecaps=None,
1637 1682 oldmatcher=None,
1638 1683 matcher=None,
1639 1684 ellipses=False,
1640 1685 shallow=False,
1641 1686 ellipsisroots=None,
1642 1687 fullnodes=None,
1643 1688 remote_sidedata=None,
1644 1689 ):
1645 1690 assert version in supportedoutgoingversions(repo)
1646 1691
1647 1692 if matcher is None:
1648 1693 matcher = matchmod.always()
1649 1694 if oldmatcher is None:
1650 1695 oldmatcher = matchmod.never()
1651 1696
1652 1697 if version == b'01' and not matcher.always():
1653 1698 raise error.ProgrammingError(
1654 1699 b'version 01 changegroups do not support sparse file matchers'
1655 1700 )
1656 1701
1657 1702 if ellipses and version in (b'01', b'02'):
1658 1703 raise error.Abort(
1659 1704 _(
1660 1705 b'ellipsis nodes require at least cg3 on client and server, '
1661 1706 b'but negotiated version %s'
1662 1707 )
1663 1708 % version
1664 1709 )
1665 1710
1666 1711 # Requested files could include files not in the local store. So
1667 1712 # filter those out.
1668 1713 matcher = repo.narrowmatch(matcher)
1669 1714
1670 1715 fn = _packermap[version][0]
1671 1716 return fn(
1672 1717 repo,
1673 1718 oldmatcher,
1674 1719 matcher,
1675 1720 bundlecaps,
1676 1721 ellipses=ellipses,
1677 1722 shallow=shallow,
1678 1723 ellipsisroots=ellipsisroots,
1679 1724 fullnodes=fullnodes,
1680 1725 remote_sidedata=remote_sidedata,
1681 1726 )
1682 1727
1683 1728
1684 1729 def getunbundler(version, fh, alg, extras=None):
1685 1730 return _packermap[version][1](fh, alg, extras=extras)
1686 1731
1687 1732
1688 1733 def _changegroupinfo(repo, nodes, source):
1689 1734 if repo.ui.verbose or source == b'bundle':
1690 1735 repo.ui.status(_(b"%d changesets found\n") % len(nodes))
1691 1736 if repo.ui.debugflag:
1692 1737 repo.ui.debug(b"list of changesets:\n")
1693 1738 for node in nodes:
1694 1739 repo.ui.debug(b"%s\n" % hex(node))
1695 1740
1696 1741
1697 1742 def makechangegroup(
1698 1743 repo, outgoing, version, source, fastpath=False, bundlecaps=None
1699 1744 ):
1700 1745 cgstream = makestream(
1701 1746 repo,
1702 1747 outgoing,
1703 1748 version,
1704 1749 source,
1705 1750 fastpath=fastpath,
1706 1751 bundlecaps=bundlecaps,
1707 1752 )
1708 1753 return getunbundler(
1709 1754 version,
1710 1755 util.chunkbuffer(cgstream),
1711 1756 None,
1712 1757 {b'clcount': len(outgoing.missing)},
1713 1758 )
1714 1759
1715 1760
1716 1761 def makestream(
1717 1762 repo,
1718 1763 outgoing,
1719 1764 version,
1720 1765 source,
1721 1766 fastpath=False,
1722 1767 bundlecaps=None,
1723 1768 matcher=None,
1724 1769 remote_sidedata=None,
1725 1770 ):
1726 1771 bundler = getbundler(
1727 1772 version,
1728 1773 repo,
1729 1774 bundlecaps=bundlecaps,
1730 1775 matcher=matcher,
1731 1776 remote_sidedata=remote_sidedata,
1732 1777 )
1733 1778
1734 1779 repo = repo.unfiltered()
1735 1780 commonrevs = outgoing.common
1736 1781 csets = outgoing.missing
1737 1782 heads = outgoing.ancestorsof
1738 1783 # We go through the fast path if we get told to, or if all (unfiltered
1739 1784 # heads have been requested (since we then know there all linkrevs will
1740 1785 # be pulled by the client).
1741 1786 heads.sort()
1742 1787 fastpathlinkrev = fastpath or (
1743 1788 repo.filtername is None and heads == sorted(repo.heads())
1744 1789 )
1745 1790
1746 1791 repo.hook(b'preoutgoing', throw=True, source=source)
1747 1792 _changegroupinfo(repo, csets, source)
1748 1793 return bundler.generate(commonrevs, csets, fastpathlinkrev, source)
1749 1794
1750 1795
1751 1796 def _addchangegroupfiles(repo, source, revmap, trp, expectedfiles, needfiles):
1752 1797 revisions = 0
1753 1798 files = 0
1754 1799 progress = repo.ui.makeprogress(
1755 1800 _(b'files'), unit=_(b'files'), total=expectedfiles
1756 1801 )
1757 1802 for chunkdata in iter(source.filelogheader, {}):
1758 1803 files += 1
1759 1804 f = chunkdata[b"filename"]
1760 1805 repo.ui.debug(b"adding %s revisions\n" % f)
1761 1806 progress.increment()
1762 1807 fl = repo.file(f)
1763 1808 o = len(fl)
1764 1809 try:
1765 1810 deltas = source.deltaiter()
1766 1811 if not fl.addgroup(deltas, revmap, trp):
1767 1812 raise error.Abort(_(b"received file revlog group is empty"))
1768 1813 except error.CensoredBaseError as e:
1769 1814 raise error.Abort(_(b"received delta base is censored: %s") % e)
1770 1815 revisions += len(fl) - o
1771 1816 if f in needfiles:
1772 1817 needs = needfiles[f]
1773 1818 for new in pycompat.xrange(o, len(fl)):
1774 1819 n = fl.node(new)
1775 1820 if n in needs:
1776 1821 needs.remove(n)
1777 1822 else:
1778 1823 raise error.Abort(_(b"received spurious file revlog entry"))
1779 1824 if not needs:
1780 1825 del needfiles[f]
1781 1826 progress.complete()
1782 1827
1783 1828 for f, needs in pycompat.iteritems(needfiles):
1784 1829 fl = repo.file(f)
1785 1830 for n in needs:
1786 1831 try:
1787 1832 fl.rev(n)
1788 1833 except error.LookupError:
1789 1834 raise error.Abort(
1790 1835 _(b'missing file data for %s:%s - run hg verify')
1791 1836 % (f, hex(n))
1792 1837 )
1793 1838
1794 1839 return revisions, files
1840
1841
1842 def get_sidedata_helpers(repo, remote_sd_categories, pull=False):
1843 # Computers for computing sidedata on-the-fly
1844 sd_computers = collections.defaultdict(list)
1845 # Computers for categories to remove from sidedata
1846 sd_removers = collections.defaultdict(list)
1847
1848 to_generate = remote_sd_categories - repo._wanted_sidedata
1849 to_remove = repo._wanted_sidedata - remote_sd_categories
1850 if pull:
1851 to_generate, to_remove = to_remove, to_generate
1852
1853 for revlog_kind, computers in repo._sidedata_computers.items():
1854 for category, computer in computers.items():
1855 if category in to_generate:
1856 sd_computers[revlog_kind].append(computer)
1857 if category in to_remove:
1858 sd_removers[revlog_kind].append(computer)
1859
1860 sidedata_helpers = (repo, sd_computers, sd_removers)
1861 return sidedata_helpers
@@ -1,292 +1,294 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 nullid,
13 13 nullrev,
14 14 )
15 15 from . import (
16 16 error,
17 17 revlog,
18 18 )
19 19 from .interfaces import (
20 20 repository,
21 21 util as interfaceutil,
22 22 )
23 23 from .utils import storageutil
24 24
25 25
26 26 @interfaceutil.implementer(repository.ifilestorage)
27 27 class filelog(object):
28 28 def __init__(self, opener, path):
29 29 self._revlog = revlog.revlog(
30 30 opener, b'/'.join((b'data', path + b'.i')), censorable=True
31 31 )
32 32 # Full name of the user visible file, relative to the repository root.
33 33 # Used by LFS.
34 34 self._revlog.filename = path
35 35 self._revlog.revlog_kind = b'filelog'
36 36
37 37 def __len__(self):
38 38 return len(self._revlog)
39 39
40 40 def __iter__(self):
41 41 return self._revlog.__iter__()
42 42
43 43 def hasnode(self, node):
44 44 if node in (nullid, nullrev):
45 45 return False
46 46
47 47 try:
48 48 self._revlog.rev(node)
49 49 return True
50 50 except (TypeError, ValueError, IndexError, error.LookupError):
51 51 return False
52 52
53 53 def revs(self, start=0, stop=None):
54 54 return self._revlog.revs(start=start, stop=stop)
55 55
56 56 def parents(self, node):
57 57 return self._revlog.parents(node)
58 58
59 59 def parentrevs(self, rev):
60 60 return self._revlog.parentrevs(rev)
61 61
62 62 def rev(self, node):
63 63 return self._revlog.rev(node)
64 64
65 65 def node(self, rev):
66 66 return self._revlog.node(rev)
67 67
68 68 def lookup(self, node):
69 69 return storageutil.fileidlookup(
70 70 self._revlog, node, self._revlog.indexfile
71 71 )
72 72
73 73 def linkrev(self, rev):
74 74 return self._revlog.linkrev(rev)
75 75
76 76 def commonancestorsheads(self, node1, node2):
77 77 return self._revlog.commonancestorsheads(node1, node2)
78 78
79 79 # Used by dagop.blockdescendants().
80 80 def descendants(self, revs):
81 81 return self._revlog.descendants(revs)
82 82
83 83 def heads(self, start=None, stop=None):
84 84 return self._revlog.heads(start, stop)
85 85
86 86 # Used by hgweb, children extension.
87 87 def children(self, node):
88 88 return self._revlog.children(node)
89 89
90 90 def iscensored(self, rev):
91 91 return self._revlog.iscensored(rev)
92 92
93 93 def revision(self, node, _df=None, raw=False):
94 94 return self._revlog.revision(node, _df=_df, raw=raw)
95 95
96 96 def rawdata(self, node, _df=None):
97 97 return self._revlog.rawdata(node, _df=_df)
98 98
99 99 def emitrevisions(
100 100 self,
101 101 nodes,
102 102 nodesorder=None,
103 103 revisiondata=False,
104 104 assumehaveparentrevisions=False,
105 105 deltamode=repository.CG_DELTAMODE_STD,
106 sidedata_helpers=None,
106 107 ):
107 108 return self._revlog.emitrevisions(
108 109 nodes,
109 110 nodesorder=nodesorder,
110 111 revisiondata=revisiondata,
111 112 assumehaveparentrevisions=assumehaveparentrevisions,
112 113 deltamode=deltamode,
114 sidedata_helpers=sidedata_helpers,
113 115 )
114 116
115 117 def addrevision(
116 118 self,
117 119 revisiondata,
118 120 transaction,
119 121 linkrev,
120 122 p1,
121 123 p2,
122 124 node=None,
123 125 flags=revlog.REVIDX_DEFAULT_FLAGS,
124 126 cachedelta=None,
125 127 ):
126 128 return self._revlog.addrevision(
127 129 revisiondata,
128 130 transaction,
129 131 linkrev,
130 132 p1,
131 133 p2,
132 134 node=node,
133 135 flags=flags,
134 136 cachedelta=cachedelta,
135 137 )
136 138
137 139 def addgroup(
138 140 self,
139 141 deltas,
140 142 linkmapper,
141 143 transaction,
142 144 addrevisioncb=None,
143 145 duplicaterevisioncb=None,
144 146 maybemissingparents=False,
145 147 ):
146 148 if maybemissingparents:
147 149 raise error.Abort(
148 150 _(
149 151 b'revlog storage does not support missing '
150 152 b'parents write mode'
151 153 )
152 154 )
153 155
154 156 return self._revlog.addgroup(
155 157 deltas,
156 158 linkmapper,
157 159 transaction,
158 160 addrevisioncb=addrevisioncb,
159 161 duplicaterevisioncb=duplicaterevisioncb,
160 162 )
161 163
162 164 def getstrippoint(self, minlink):
163 165 return self._revlog.getstrippoint(minlink)
164 166
165 167 def strip(self, minlink, transaction):
166 168 return self._revlog.strip(minlink, transaction)
167 169
168 170 def censorrevision(self, tr, node, tombstone=b''):
169 171 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
170 172
171 173 def files(self):
172 174 return self._revlog.files()
173 175
174 176 def read(self, node):
175 177 return storageutil.filtermetadata(self.revision(node))
176 178
177 179 def add(self, text, meta, transaction, link, p1=None, p2=None):
178 180 if meta or text.startswith(b'\1\n'):
179 181 text = storageutil.packmeta(meta, text)
180 182 rev = self.addrevision(text, transaction, link, p1, p2)
181 183 return self.node(rev)
182 184
183 185 def renamed(self, node):
184 186 return storageutil.filerevisioncopied(self, node)
185 187
186 188 def size(self, rev):
187 189 """return the size of a given revision"""
188 190
189 191 # for revisions with renames, we have to go the slow way
190 192 node = self.node(rev)
191 193 if self.renamed(node):
192 194 return len(self.read(node))
193 195 if self.iscensored(rev):
194 196 return 0
195 197
196 198 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
197 199 return self._revlog.size(rev)
198 200
199 201 def cmp(self, node, text):
200 202 """compare text with a given file revision
201 203
202 204 returns True if text is different than what is stored.
203 205 """
204 206 return not storageutil.filedataequivalent(self, node, text)
205 207
206 208 def verifyintegrity(self, state):
207 209 return self._revlog.verifyintegrity(state)
208 210
209 211 def storageinfo(
210 212 self,
211 213 exclusivefiles=False,
212 214 sharedfiles=False,
213 215 revisionscount=False,
214 216 trackedsize=False,
215 217 storedsize=False,
216 218 ):
217 219 return self._revlog.storageinfo(
218 220 exclusivefiles=exclusivefiles,
219 221 sharedfiles=sharedfiles,
220 222 revisionscount=revisionscount,
221 223 trackedsize=trackedsize,
222 224 storedsize=storedsize,
223 225 )
224 226
225 227 # TODO these aren't part of the interface and aren't internal methods.
226 228 # Callers should be fixed to not use them.
227 229
228 230 # Used by bundlefilelog, unionfilelog.
229 231 @property
230 232 def indexfile(self):
231 233 return self._revlog.indexfile
232 234
233 235 @indexfile.setter
234 236 def indexfile(self, value):
235 237 self._revlog.indexfile = value
236 238
237 239 # Used by repo upgrade.
238 240 def clone(self, tr, destrevlog, **kwargs):
239 241 if not isinstance(destrevlog, filelog):
240 242 raise error.ProgrammingError(b'expected filelog to clone()')
241 243
242 244 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
243 245
244 246
245 247 class narrowfilelog(filelog):
246 248 """Filelog variation to be used with narrow stores."""
247 249
248 250 def __init__(self, opener, path, narrowmatch):
249 251 super(narrowfilelog, self).__init__(opener, path)
250 252 self._narrowmatch = narrowmatch
251 253
252 254 def renamed(self, node):
253 255 res = super(narrowfilelog, self).renamed(node)
254 256
255 257 # Renames that come from outside the narrowspec are problematic
256 258 # because we may lack the base text for the rename. This can result
257 259 # in code attempting to walk the ancestry or compute a diff
258 260 # encountering a missing revision. We address this by silently
259 261 # removing rename metadata if the source file is outside the
260 262 # narrow spec.
261 263 #
262 264 # A better solution would be to see if the base revision is available,
263 265 # rather than assuming it isn't.
264 266 #
265 267 # An even better solution would be to teach all consumers of rename
266 268 # metadata that the base revision may not be available.
267 269 #
268 270 # TODO consider better ways of doing this.
269 271 if res and not self._narrowmatch(res[0]):
270 272 return None
271 273
272 274 return res
273 275
274 276 def size(self, rev):
275 277 # Because we have a custom renamed() that may lie, we need to call
276 278 # the base renamed() to report accurate results.
277 279 node = self.node(rev)
278 280 if super(narrowfilelog, self).renamed(node):
279 281 return len(self.read(node))
280 282 else:
281 283 return super(narrowfilelog, self).size(rev)
282 284
283 285 def cmp(self, node, text):
284 286 # We don't call `super` because narrow parents can be buggy in case of a
285 287 # ambiguous dirstate. Always take the slow path until there is a better
286 288 # fix, see issue6150.
287 289
288 290 # Censored files compare against the empty file.
289 291 if self.iscensored(self.rev(node)):
290 292 return text != b''
291 293
292 294 return self.read(node) != text
@@ -1,2355 +1,2357 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 flags = n[-1:]
62 62 if flags in _manifestflags:
63 63 n = n[:-1]
64 64 nl -= 1
65 65 else:
66 66 flags = b''
67 67 if nl not in (40, 64):
68 68 raise ValueError(b'Invalid manifest line')
69 69
70 70 yield f, bin(n), flags
71 71
72 72
73 73 def _text(it):
74 74 files = []
75 75 lines = []
76 76 for f, n, fl in it:
77 77 files.append(f)
78 78 # if this is changed to support newlines in filenames,
79 79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 81
82 82 _checkforbidden(files)
83 83 return b''.join(lines)
84 84
85 85
86 86 class lazymanifestiter(object):
87 87 def __init__(self, lm):
88 88 self.pos = 0
89 89 self.lm = lm
90 90
91 91 def __iter__(self):
92 92 return self
93 93
94 94 def next(self):
95 95 try:
96 96 data, pos = self.lm._get(self.pos)
97 97 except IndexError:
98 98 raise StopIteration
99 99 if pos == -1:
100 100 self.pos += 1
101 101 return data[0]
102 102 self.pos += 1
103 103 zeropos = data.find(b'\x00', pos)
104 104 return data[pos:zeropos]
105 105
106 106 __next__ = next
107 107
108 108
109 109 class lazymanifestiterentries(object):
110 110 def __init__(self, lm):
111 111 self.lm = lm
112 112 self.pos = 0
113 113
114 114 def __iter__(self):
115 115 return self
116 116
117 117 def next(self):
118 118 try:
119 119 data, pos = self.lm._get(self.pos)
120 120 except IndexError:
121 121 raise StopIteration
122 122 if pos == -1:
123 123 self.pos += 1
124 124 return data
125 125 zeropos = data.find(b'\x00', pos)
126 126 nlpos = data.find(b'\n', pos)
127 127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 128 raise error.StorageError(b'Invalid manifest line')
129 129 flags = data[nlpos - 1 : nlpos]
130 130 if flags in _manifestflags:
131 131 hlen = nlpos - zeropos - 2
132 132 else:
133 133 hlen = nlpos - zeropos - 1
134 134 flags = b''
135 135 if hlen not in (40, 64):
136 136 raise error.StorageError(b'Invalid manifest line')
137 137 hashval = unhexlify(
138 138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 139 )
140 140 self.pos += 1
141 141 return (data[pos:zeropos], hashval, flags)
142 142
143 143 __next__ = next
144 144
145 145
146 146 def unhexlify(data, extra, pos, length):
147 147 s = bin(data[pos : pos + length])
148 148 if extra:
149 149 s += chr(extra & 0xFF)
150 150 return s
151 151
152 152
153 153 def _cmp(a, b):
154 154 return (a > b) - (a < b)
155 155
156 156
157 157 _manifestflags = {b'', b'l', b't', b'x'}
158 158
159 159
160 160 class _lazymanifest(object):
161 161 """A pure python manifest backed by a byte string. It is supplimented with
162 162 internal lists as it is modified, until it is compacted back to a pure byte
163 163 string.
164 164
165 165 ``data`` is the initial manifest data.
166 166
167 167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 168 values are offsets into ``data``, negative values are offsets into the
169 169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 170 ``positions``. The values are encoded such that when walking the list and
171 171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 172 sorted by filename.
173 173
174 174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 175 modified since the manifest was created or compacted.
176 176 """
177 177
178 178 def __init__(
179 179 self,
180 180 data,
181 181 positions=None,
182 182 extrainfo=None,
183 183 extradata=None,
184 184 hasremovals=False,
185 185 ):
186 186 if positions is None:
187 187 self.positions = self.findlines(data)
188 188 self.extrainfo = [0] * len(self.positions)
189 189 self.data = data
190 190 self.extradata = []
191 191 self.hasremovals = False
192 192 else:
193 193 self.positions = positions[:]
194 194 self.extrainfo = extrainfo[:]
195 195 self.extradata = extradata[:]
196 196 self.data = data
197 197 self.hasremovals = hasremovals
198 198
199 199 def findlines(self, data):
200 200 if not data:
201 201 return []
202 202 pos = data.find(b"\n")
203 203 if pos == -1 or data[-1:] != b'\n':
204 204 raise ValueError(b"Manifest did not end in a newline.")
205 205 positions = [0]
206 206 prev = data[: data.find(b'\x00')]
207 207 while pos < len(data) - 1 and pos != -1:
208 208 positions.append(pos + 1)
209 209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
210 210 if nexts < prev:
211 211 raise ValueError(b"Manifest lines not in sorted order.")
212 212 prev = nexts
213 213 pos = data.find(b"\n", pos + 1)
214 214 return positions
215 215
216 216 def _get(self, index):
217 217 # get the position encoded in pos:
218 218 # positive number is an index in 'data'
219 219 # negative number is in extrapieces
220 220 pos = self.positions[index]
221 221 if pos >= 0:
222 222 return self.data, pos
223 223 return self.extradata[-pos - 1], -1
224 224
225 225 def _getkey(self, pos):
226 226 if pos >= 0:
227 227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
228 228 return self.extradata[-pos - 1][0]
229 229
230 230 def bsearch(self, key):
231 231 first = 0
232 232 last = len(self.positions) - 1
233 233
234 234 while first <= last:
235 235 midpoint = (first + last) // 2
236 236 nextpos = self.positions[midpoint]
237 237 candidate = self._getkey(nextpos)
238 238 r = _cmp(key, candidate)
239 239 if r == 0:
240 240 return midpoint
241 241 else:
242 242 if r < 0:
243 243 last = midpoint - 1
244 244 else:
245 245 first = midpoint + 1
246 246 return -1
247 247
248 248 def bsearch2(self, key):
249 249 # same as the above, but will always return the position
250 250 # done for performance reasons
251 251 first = 0
252 252 last = len(self.positions) - 1
253 253
254 254 while first <= last:
255 255 midpoint = (first + last) // 2
256 256 nextpos = self.positions[midpoint]
257 257 candidate = self._getkey(nextpos)
258 258 r = _cmp(key, candidate)
259 259 if r == 0:
260 260 return (midpoint, True)
261 261 else:
262 262 if r < 0:
263 263 last = midpoint - 1
264 264 else:
265 265 first = midpoint + 1
266 266 return (first, False)
267 267
268 268 def __contains__(self, key):
269 269 return self.bsearch(key) != -1
270 270
271 271 def __getitem__(self, key):
272 272 if not isinstance(key, bytes):
273 273 raise TypeError(b"getitem: manifest keys must be a bytes.")
274 274 needle = self.bsearch(key)
275 275 if needle == -1:
276 276 raise KeyError
277 277 data, pos = self._get(needle)
278 278 if pos == -1:
279 279 return (data[1], data[2])
280 280 zeropos = data.find(b'\x00', pos)
281 281 nlpos = data.find(b'\n', zeropos)
282 282 assert 0 <= needle <= len(self.positions)
283 283 assert len(self.extrainfo) == len(self.positions)
284 284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
285 285 raise error.StorageError(b'Invalid manifest line')
286 286 hlen = nlpos - zeropos - 1
287 287 flags = data[nlpos - 1 : nlpos]
288 288 if flags in _manifestflags:
289 289 hlen -= 1
290 290 else:
291 291 flags = b''
292 292 if hlen not in (40, 64):
293 293 raise error.StorageError(b'Invalid manifest line')
294 294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
295 295 return (hashval, flags)
296 296
297 297 def __delitem__(self, key):
298 298 needle, found = self.bsearch2(key)
299 299 if not found:
300 300 raise KeyError
301 301 cur = self.positions[needle]
302 302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
303 303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
304 304 if cur >= 0:
305 305 # This does NOT unsort the list as far as the search functions are
306 306 # concerned, as they only examine lines mapped by self.positions.
307 307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
308 308 self.hasremovals = True
309 309
310 310 def __setitem__(self, key, value):
311 311 if not isinstance(key, bytes):
312 312 raise TypeError(b"setitem: manifest keys must be a byte string.")
313 313 if not isinstance(value, tuple) or len(value) != 2:
314 314 raise TypeError(
315 315 b"Manifest values must be a tuple of (node, flags)."
316 316 )
317 317 hashval = value[0]
318 318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
319 319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
320 320 flags = value[1]
321 321 if not isinstance(flags, bytes) or len(flags) > 1:
322 322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
323 323 needle, found = self.bsearch2(key)
324 324 if found:
325 325 # put the item
326 326 pos = self.positions[needle]
327 327 if pos < 0:
328 328 self.extradata[-pos - 1] = (key, hashval, value[1])
329 329 else:
330 330 # just don't bother
331 331 self.extradata.append((key, hashval, value[1]))
332 332 self.positions[needle] = -len(self.extradata)
333 333 else:
334 334 # not found, put it in with extra positions
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions = (
337 337 self.positions[:needle]
338 338 + [-len(self.extradata)]
339 339 + self.positions[needle:]
340 340 )
341 341 self.extrainfo = (
342 342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
343 343 )
344 344
345 345 def copy(self):
346 346 # XXX call _compact like in C?
347 347 return _lazymanifest(
348 348 self.data,
349 349 self.positions,
350 350 self.extrainfo,
351 351 self.extradata,
352 352 self.hasremovals,
353 353 )
354 354
355 355 def _compact(self):
356 356 # hopefully not called TOO often
357 357 if len(self.extradata) == 0 and not self.hasremovals:
358 358 return
359 359 l = []
360 360 i = 0
361 361 offset = 0
362 362 self.extrainfo = [0] * len(self.positions)
363 363 while i < len(self.positions):
364 364 if self.positions[i] >= 0:
365 365 cur = self.positions[i]
366 366 last_cut = cur
367 367
368 368 # Collect all contiguous entries in the buffer at the current
369 369 # offset, breaking out only for added/modified items held in
370 370 # extradata, or a deleted line prior to the next position.
371 371 while True:
372 372 self.positions[i] = offset
373 373 i += 1
374 374 if i == len(self.positions) or self.positions[i] < 0:
375 375 break
376 376
377 377 # A removed file has no positions[] entry, but does have an
378 378 # overwritten first byte. Break out and find the end of the
379 379 # current good entry/entries if there is a removed file
380 380 # before the next position.
381 381 if (
382 382 self.hasremovals
383 383 and self.data.find(b'\n\x00', cur, self.positions[i])
384 384 != -1
385 385 ):
386 386 break
387 387
388 388 offset += self.positions[i] - cur
389 389 cur = self.positions[i]
390 390 end_cut = self.data.find(b'\n', cur)
391 391 if end_cut != -1:
392 392 end_cut += 1
393 393 offset += end_cut - cur
394 394 l.append(self.data[last_cut:end_cut])
395 395 else:
396 396 while i < len(self.positions) and self.positions[i] < 0:
397 397 cur = self.positions[i]
398 398 t = self.extradata[-cur - 1]
399 399 l.append(self._pack(t))
400 400 self.positions[i] = offset
401 401 # Hashes are either 20 bytes (old sha1s) or 32
402 402 # bytes (new non-sha1).
403 403 hlen = 20
404 404 if len(t[1]) > 25:
405 405 hlen = 32
406 406 if len(t[1]) > hlen:
407 407 self.extrainfo[i] = ord(t[1][hlen + 1])
408 408 offset += len(l[-1])
409 409 i += 1
410 410 self.data = b''.join(l)
411 411 self.hasremovals = False
412 412 self.extradata = []
413 413
414 414 def _pack(self, d):
415 415 n = d[1]
416 416 assert len(n) in (20, 32)
417 417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
418 418
419 419 def text(self):
420 420 self._compact()
421 421 return self.data
422 422
423 423 def diff(self, m2, clean=False):
424 424 '''Finds changes between the current manifest and m2.'''
425 425 # XXX think whether efficiency matters here
426 426 diff = {}
427 427
428 428 for fn, e1, flags in self.iterentries():
429 429 if fn not in m2:
430 430 diff[fn] = (e1, flags), (None, b'')
431 431 else:
432 432 e2 = m2[fn]
433 433 if (e1, flags) != e2:
434 434 diff[fn] = (e1, flags), e2
435 435 elif clean:
436 436 diff[fn] = None
437 437
438 438 for fn, e2, flags in m2.iterentries():
439 439 if fn not in self:
440 440 diff[fn] = (None, b''), (e2, flags)
441 441
442 442 return diff
443 443
444 444 def iterentries(self):
445 445 return lazymanifestiterentries(self)
446 446
447 447 def iterkeys(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __iter__(self):
451 451 return lazymanifestiter(self)
452 452
453 453 def __len__(self):
454 454 return len(self.positions)
455 455
456 456 def filtercopy(self, filterfn):
457 457 # XXX should be optimized
458 458 c = _lazymanifest(b'')
459 459 for f, n, fl in self.iterentries():
460 460 if filterfn(f):
461 461 c[f] = n, fl
462 462 return c
463 463
464 464
465 465 try:
466 466 _lazymanifest = parsers.lazymanifest
467 467 except AttributeError:
468 468 pass
469 469
470 470
471 471 @interfaceutil.implementer(repository.imanifestdict)
472 472 class manifestdict(object):
473 473 def __init__(self, data=b''):
474 474 self._lm = _lazymanifest(data)
475 475
476 476 def __getitem__(self, key):
477 477 return self._lm[key][0]
478 478
479 479 def find(self, key):
480 480 return self._lm[key]
481 481
482 482 def __len__(self):
483 483 return len(self._lm)
484 484
485 485 def __nonzero__(self):
486 486 # nonzero is covered by the __len__ function, but implementing it here
487 487 # makes it easier for extensions to override.
488 488 return len(self._lm) != 0
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __setitem__(self, key, node):
493 493 self._lm[key] = node, self.flags(key)
494 494
495 495 def __contains__(self, key):
496 496 if key is None:
497 497 return False
498 498 return key in self._lm
499 499
500 500 def __delitem__(self, key):
501 501 del self._lm[key]
502 502
503 503 def __iter__(self):
504 504 return self._lm.__iter__()
505 505
506 506 def iterkeys(self):
507 507 return self._lm.iterkeys()
508 508
509 509 def keys(self):
510 510 return list(self.iterkeys())
511 511
512 512 def filesnotin(self, m2, match=None):
513 513 '''Set of files in this manifest that are not in the other'''
514 514 if match is not None:
515 515 match = matchmod.badmatch(match, lambda path, msg: None)
516 516 sm2 = set(m2.walk(match))
517 517 return {f for f in self.walk(match) if f not in sm2}
518 518 return {f for f in self if f not in m2}
519 519
520 520 @propertycache
521 521 def _dirs(self):
522 522 return pathutil.dirs(self)
523 523
524 524 def dirs(self):
525 525 return self._dirs
526 526
527 527 def hasdir(self, dir):
528 528 return dir in self._dirs
529 529
530 530 def _filesfastpath(self, match):
531 531 """Checks whether we can correctly and quickly iterate over matcher
532 532 files instead of over manifest files."""
533 533 files = match.files()
534 534 return len(files) < 100 and (
535 535 match.isexact()
536 536 or (match.prefix() and all(fn in self for fn in files))
537 537 )
538 538
539 539 def walk(self, match):
540 540 """Generates matching file names.
541 541
542 542 Equivalent to manifest.matches(match).iterkeys(), but without creating
543 543 an entirely new manifest.
544 544
545 545 It also reports nonexistent files by marking them bad with match.bad().
546 546 """
547 547 if match.always():
548 548 for f in iter(self):
549 549 yield f
550 550 return
551 551
552 552 fset = set(match.files())
553 553
554 554 # avoid the entire walk if we're only looking for specific files
555 555 if self._filesfastpath(match):
556 556 for fn in sorted(fset):
557 557 if fn in self:
558 558 yield fn
559 559 return
560 560
561 561 for fn in self:
562 562 if fn in fset:
563 563 # specified pattern is the exact name
564 564 fset.remove(fn)
565 565 if match(fn):
566 566 yield fn
567 567
568 568 # for dirstate.walk, files=[''] means "walk the whole tree".
569 569 # follow that here, too
570 570 fset.discard(b'')
571 571
572 572 for fn in sorted(fset):
573 573 if not self.hasdir(fn):
574 574 match.bad(fn, None)
575 575
576 576 def _matches(self, match):
577 577 '''generate a new manifest filtered by the match argument'''
578 578 if match.always():
579 579 return self.copy()
580 580
581 581 if self._filesfastpath(match):
582 582 m = manifestdict()
583 583 lm = self._lm
584 584 for fn in match.files():
585 585 if fn in lm:
586 586 m._lm[fn] = lm[fn]
587 587 return m
588 588
589 589 m = manifestdict()
590 590 m._lm = self._lm.filtercopy(match)
591 591 return m
592 592
593 593 def diff(self, m2, match=None, clean=False):
594 594 """Finds changes between the current manifest and m2.
595 595
596 596 Args:
597 597 m2: the manifest to which this manifest should be compared.
598 598 clean: if true, include files unchanged between these manifests
599 599 with a None value in the returned dictionary.
600 600
601 601 The result is returned as a dict with filename as key and
602 602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
603 603 nodeid in the current/other manifest and fl1/fl2 is the flag
604 604 in the current/other manifest. Where the file does not exist,
605 605 the nodeid will be None and the flags will be the empty
606 606 string.
607 607 """
608 608 if match:
609 609 m1 = self._matches(match)
610 610 m2 = m2._matches(match)
611 611 return m1.diff(m2, clean=clean)
612 612 return self._lm.diff(m2._lm, clean)
613 613
614 614 def setflag(self, key, flag):
615 615 if flag not in _manifestflags:
616 616 raise TypeError(b"Invalid manifest flag set.")
617 617 self._lm[key] = self[key], flag
618 618
619 619 def get(self, key, default=None):
620 620 try:
621 621 return self._lm[key][0]
622 622 except KeyError:
623 623 return default
624 624
625 625 def flags(self, key):
626 626 try:
627 627 return self._lm[key][1]
628 628 except KeyError:
629 629 return b''
630 630
631 631 def copy(self):
632 632 c = manifestdict()
633 633 c._lm = self._lm.copy()
634 634 return c
635 635
636 636 def items(self):
637 637 return (x[:2] for x in self._lm.iterentries())
638 638
639 639 def iteritems(self):
640 640 return (x[:2] for x in self._lm.iterentries())
641 641
642 642 def iterentries(self):
643 643 return self._lm.iterentries()
644 644
645 645 def text(self):
646 646 # most likely uses native version
647 647 return self._lm.text()
648 648
649 649 def fastdelta(self, base, changes):
650 650 """Given a base manifest text as a bytearray and a list of changes
651 651 relative to that text, compute a delta that can be used by revlog.
652 652 """
653 653 delta = []
654 654 dstart = None
655 655 dend = None
656 656 dline = [b""]
657 657 start = 0
658 658 # zero copy representation of base as a buffer
659 659 addbuf = util.buffer(base)
660 660
661 661 changes = list(changes)
662 662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
663 663 # start with a readonly loop that finds the offset of
664 664 # each line and creates the deltas
665 665 for f, todelete in changes:
666 666 # bs will either be the index of the item or the insert point
667 667 start, end = _msearch(addbuf, f, start)
668 668 if not todelete:
669 669 h, fl = self._lm[f]
670 670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
671 671 else:
672 672 if start == end:
673 673 # item we want to delete was not found, error out
674 674 raise AssertionError(
675 675 _(b"failed to remove %s from manifest") % f
676 676 )
677 677 l = b""
678 678 if dstart is not None and dstart <= start and dend >= start:
679 679 if dend < end:
680 680 dend = end
681 681 if l:
682 682 dline.append(l)
683 683 else:
684 684 if dstart is not None:
685 685 delta.append([dstart, dend, b"".join(dline)])
686 686 dstart = start
687 687 dend = end
688 688 dline = [l]
689 689
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 # apply the delta to the base, and get a delta for addrevision
693 693 deltatext, arraytext = _addlistdelta(base, delta)
694 694 else:
695 695 # For large changes, it's much cheaper to just build the text and
696 696 # diff it.
697 697 arraytext = bytearray(self.text())
698 698 deltatext = mdiff.textdiff(
699 699 util.buffer(base), util.buffer(arraytext)
700 700 )
701 701
702 702 return arraytext, deltatext
703 703
704 704
705 705 def _msearch(m, s, lo=0, hi=None):
706 706 """return a tuple (start, end) that says where to find s within m.
707 707
708 708 If the string is found m[start:end] are the line containing
709 709 that string. If start == end the string was not found and
710 710 they indicate the proper sorted insertion point.
711 711
712 712 m should be a buffer, a memoryview or a byte string.
713 713 s is a byte string"""
714 714
715 715 def advance(i, c):
716 716 while i < lenm and m[i : i + 1] != c:
717 717 i += 1
718 718 return i
719 719
720 720 if not s:
721 721 return (lo, lo)
722 722 lenm = len(m)
723 723 if not hi:
724 724 hi = lenm
725 725 while lo < hi:
726 726 mid = (lo + hi) // 2
727 727 start = mid
728 728 while start > 0 and m[start - 1 : start] != b'\n':
729 729 start -= 1
730 730 end = advance(start, b'\0')
731 731 if bytes(m[start:end]) < s:
732 732 # we know that after the null there are 40 bytes of sha1
733 733 # this translates to the bisect lo = mid + 1
734 734 lo = advance(end + 40, b'\n') + 1
735 735 else:
736 736 # this translates to the bisect hi = mid
737 737 hi = start
738 738 end = advance(lo, b'\0')
739 739 found = m[lo:end]
740 740 if s == found:
741 741 # we know that after the null there are 40 bytes of sha1
742 742 end = advance(end + 40, b'\n')
743 743 return (lo, end + 1)
744 744 else:
745 745 return (lo, lo)
746 746
747 747
748 748 def _checkforbidden(l):
749 749 """Check filenames for illegal characters."""
750 750 for f in l:
751 751 if b'\n' in f or b'\r' in f:
752 752 raise error.StorageError(
753 753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
754 754 % pycompat.bytestr(f)
755 755 )
756 756
757 757
758 758 # apply the changes collected during the bisect loop to our addlist
759 759 # return a delta suitable for addrevision
760 760 def _addlistdelta(addlist, x):
761 761 # for large addlist arrays, building a new array is cheaper
762 762 # than repeatedly modifying the existing one
763 763 currentposition = 0
764 764 newaddlist = bytearray()
765 765
766 766 for start, end, content in x:
767 767 newaddlist += addlist[currentposition:start]
768 768 if content:
769 769 newaddlist += bytearray(content)
770 770
771 771 currentposition = end
772 772
773 773 newaddlist += addlist[currentposition:]
774 774
775 775 deltatext = b"".join(
776 776 struct.pack(b">lll", start, end, len(content)) + content
777 777 for start, end, content in x
778 778 )
779 779 return deltatext, newaddlist
780 780
781 781
782 782 def _splittopdir(f):
783 783 if b'/' in f:
784 784 dir, subpath = f.split(b'/', 1)
785 785 return dir + b'/', subpath
786 786 else:
787 787 return b'', f
788 788
789 789
790 790 _noop = lambda s: None
791 791
792 792
793 793 @interfaceutil.implementer(repository.imanifestdict)
794 794 class treemanifest(object):
795 795 def __init__(self, dir=b'', text=b''):
796 796 self._dir = dir
797 797 self._node = nullid
798 798 self._loadfunc = _noop
799 799 self._copyfunc = _noop
800 800 self._dirty = False
801 801 self._dirs = {}
802 802 self._lazydirs = {}
803 803 # Using _lazymanifest here is a little slower than plain old dicts
804 804 self._files = {}
805 805 self._flags = {}
806 806 if text:
807 807
808 808 def readsubtree(subdir, subm):
809 809 raise AssertionError(
810 810 b'treemanifest constructor only accepts flat manifests'
811 811 )
812 812
813 813 self.parse(text, readsubtree)
814 814 self._dirty = True # Mark flat manifest dirty after parsing
815 815
816 816 def _subpath(self, path):
817 817 return self._dir + path
818 818
819 819 def _loadalllazy(self):
820 820 selfdirs = self._dirs
821 821 subpath = self._subpath
822 822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
823 823 self._lazydirs
824 824 ):
825 825 if docopy:
826 826 selfdirs[d] = readsubtree(subpath(d), node).copy()
827 827 else:
828 828 selfdirs[d] = readsubtree(subpath(d), node)
829 829 self._lazydirs = {}
830 830
831 831 def _loadlazy(self, d):
832 832 v = self._lazydirs.get(d)
833 833 if v:
834 834 node, readsubtree, docopy = v
835 835 if docopy:
836 836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
837 837 else:
838 838 self._dirs[d] = readsubtree(self._subpath(d), node)
839 839 del self._lazydirs[d]
840 840
841 841 def _loadchildrensetlazy(self, visit):
842 842 if not visit:
843 843 return None
844 844 if visit == b'all' or visit == b'this':
845 845 self._loadalllazy()
846 846 return None
847 847
848 848 loadlazy = self._loadlazy
849 849 for k in visit:
850 850 loadlazy(k + b'/')
851 851 return visit
852 852
853 853 def _loaddifflazy(self, t1, t2):
854 854 """load items in t1 and t2 if they're needed for diffing.
855 855
856 856 The criteria currently is:
857 857 - if it's not present in _lazydirs in either t1 or t2, load it in the
858 858 other (it may already be loaded or it may not exist, doesn't matter)
859 859 - if it's present in _lazydirs in both, compare the nodeid; if it
860 860 differs, load it in both
861 861 """
862 862 toloadlazy = []
863 863 for d, v1 in pycompat.iteritems(t1._lazydirs):
864 864 v2 = t2._lazydirs.get(d)
865 865 if not v2 or v2[0] != v1[0]:
866 866 toloadlazy.append(d)
867 867 for d, v1 in pycompat.iteritems(t2._lazydirs):
868 868 if d not in t1._lazydirs:
869 869 toloadlazy.append(d)
870 870
871 871 for d in toloadlazy:
872 872 t1._loadlazy(d)
873 873 t2._loadlazy(d)
874 874
875 875 def __len__(self):
876 876 self._load()
877 877 size = len(self._files)
878 878 self._loadalllazy()
879 879 for m in self._dirs.values():
880 880 size += m.__len__()
881 881 return size
882 882
883 883 def __nonzero__(self):
884 884 # Faster than "__len() != 0" since it avoids loading sub-manifests
885 885 return not self._isempty()
886 886
887 887 __bool__ = __nonzero__
888 888
889 889 def _isempty(self):
890 890 self._load() # for consistency; already loaded by all callers
891 891 # See if we can skip loading everything.
892 892 if self._files or (
893 893 self._dirs and any(not m._isempty() for m in self._dirs.values())
894 894 ):
895 895 return False
896 896 self._loadalllazy()
897 897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
898 898
899 899 @encoding.strmethod
900 900 def __repr__(self):
901 901 return (
902 902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
903 903 % (
904 904 self._dir,
905 905 hex(self._node),
906 906 bool(self._loadfunc is _noop),
907 907 self._dirty,
908 908 id(self),
909 909 )
910 910 )
911 911
912 912 def dir(self):
913 913 """The directory that this tree manifest represents, including a
914 914 trailing '/'. Empty string for the repo root directory."""
915 915 return self._dir
916 916
917 917 def node(self):
918 918 """This node of this instance. nullid for unsaved instances. Should
919 919 be updated when the instance is read or written from a revlog.
920 920 """
921 921 assert not self._dirty
922 922 return self._node
923 923
924 924 def setnode(self, node):
925 925 self._node = node
926 926 self._dirty = False
927 927
928 928 def iterentries(self):
929 929 self._load()
930 930 self._loadalllazy()
931 931 for p, n in sorted(
932 932 itertools.chain(self._dirs.items(), self._files.items())
933 933 ):
934 934 if p in self._files:
935 935 yield self._subpath(p), n, self._flags.get(p, b'')
936 936 else:
937 937 for x in n.iterentries():
938 938 yield x
939 939
940 940 def items(self):
941 941 self._load()
942 942 self._loadalllazy()
943 943 for p, n in sorted(
944 944 itertools.chain(self._dirs.items(), self._files.items())
945 945 ):
946 946 if p in self._files:
947 947 yield self._subpath(p), n
948 948 else:
949 949 for f, sn in pycompat.iteritems(n):
950 950 yield f, sn
951 951
952 952 iteritems = items
953 953
954 954 def iterkeys(self):
955 955 self._load()
956 956 self._loadalllazy()
957 957 for p in sorted(itertools.chain(self._dirs, self._files)):
958 958 if p in self._files:
959 959 yield self._subpath(p)
960 960 else:
961 961 for f in self._dirs[p]:
962 962 yield f
963 963
964 964 def keys(self):
965 965 return list(self.iterkeys())
966 966
967 967 def __iter__(self):
968 968 return self.iterkeys()
969 969
970 970 def __contains__(self, f):
971 971 if f is None:
972 972 return False
973 973 self._load()
974 974 dir, subpath = _splittopdir(f)
975 975 if dir:
976 976 self._loadlazy(dir)
977 977
978 978 if dir not in self._dirs:
979 979 return False
980 980
981 981 return self._dirs[dir].__contains__(subpath)
982 982 else:
983 983 return f in self._files
984 984
985 985 def get(self, f, default=None):
986 986 self._load()
987 987 dir, subpath = _splittopdir(f)
988 988 if dir:
989 989 self._loadlazy(dir)
990 990
991 991 if dir not in self._dirs:
992 992 return default
993 993 return self._dirs[dir].get(subpath, default)
994 994 else:
995 995 return self._files.get(f, default)
996 996
997 997 def __getitem__(self, f):
998 998 self._load()
999 999 dir, subpath = _splittopdir(f)
1000 1000 if dir:
1001 1001 self._loadlazy(dir)
1002 1002
1003 1003 return self._dirs[dir].__getitem__(subpath)
1004 1004 else:
1005 1005 return self._files[f]
1006 1006
1007 1007 def flags(self, f):
1008 1008 self._load()
1009 1009 dir, subpath = _splittopdir(f)
1010 1010 if dir:
1011 1011 self._loadlazy(dir)
1012 1012
1013 1013 if dir not in self._dirs:
1014 1014 return b''
1015 1015 return self._dirs[dir].flags(subpath)
1016 1016 else:
1017 1017 if f in self._lazydirs or f in self._dirs:
1018 1018 return b''
1019 1019 return self._flags.get(f, b'')
1020 1020
1021 1021 def find(self, f):
1022 1022 self._load()
1023 1023 dir, subpath = _splittopdir(f)
1024 1024 if dir:
1025 1025 self._loadlazy(dir)
1026 1026
1027 1027 return self._dirs[dir].find(subpath)
1028 1028 else:
1029 1029 return self._files[f], self._flags.get(f, b'')
1030 1030
1031 1031 def __delitem__(self, f):
1032 1032 self._load()
1033 1033 dir, subpath = _splittopdir(f)
1034 1034 if dir:
1035 1035 self._loadlazy(dir)
1036 1036
1037 1037 self._dirs[dir].__delitem__(subpath)
1038 1038 # If the directory is now empty, remove it
1039 1039 if self._dirs[dir]._isempty():
1040 1040 del self._dirs[dir]
1041 1041 else:
1042 1042 del self._files[f]
1043 1043 if f in self._flags:
1044 1044 del self._flags[f]
1045 1045 self._dirty = True
1046 1046
1047 1047 def __setitem__(self, f, n):
1048 1048 assert n is not None
1049 1049 self._load()
1050 1050 dir, subpath = _splittopdir(f)
1051 1051 if dir:
1052 1052 self._loadlazy(dir)
1053 1053 if dir not in self._dirs:
1054 1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1055 1055 self._dirs[dir].__setitem__(subpath, n)
1056 1056 else:
1057 1057 # manifest nodes are either 20 bytes or 32 bytes,
1058 1058 # depending on the hash in use. Assert this as historically
1059 1059 # sometimes extra bytes were added.
1060 1060 assert len(n) in (20, 32)
1061 1061 self._files[f] = n
1062 1062 self._dirty = True
1063 1063
1064 1064 def _load(self):
1065 1065 if self._loadfunc is not _noop:
1066 1066 lf, self._loadfunc = self._loadfunc, _noop
1067 1067 lf(self)
1068 1068 elif self._copyfunc is not _noop:
1069 1069 cf, self._copyfunc = self._copyfunc, _noop
1070 1070 cf(self)
1071 1071
1072 1072 def setflag(self, f, flags):
1073 1073 """Set the flags (symlink, executable) for path f."""
1074 1074 if flags not in _manifestflags:
1075 1075 raise TypeError(b"Invalid manifest flag set.")
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080 if dir not in self._dirs:
1081 1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1082 1082 self._dirs[dir].setflag(subpath, flags)
1083 1083 else:
1084 1084 self._flags[f] = flags
1085 1085 self._dirty = True
1086 1086
1087 1087 def copy(self):
1088 1088 copy = treemanifest(self._dir)
1089 1089 copy._node = self._node
1090 1090 copy._dirty = self._dirty
1091 1091 if self._copyfunc is _noop:
1092 1092
1093 1093 def _copyfunc(s):
1094 1094 self._load()
1095 1095 s._lazydirs = {
1096 1096 d: (n, r, True)
1097 1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1098 1098 }
1099 1099 sdirs = s._dirs
1100 1100 for d, v in pycompat.iteritems(self._dirs):
1101 1101 sdirs[d] = v.copy()
1102 1102 s._files = dict.copy(self._files)
1103 1103 s._flags = dict.copy(self._flags)
1104 1104
1105 1105 if self._loadfunc is _noop:
1106 1106 _copyfunc(copy)
1107 1107 else:
1108 1108 copy._copyfunc = _copyfunc
1109 1109 else:
1110 1110 copy._copyfunc = self._copyfunc
1111 1111 return copy
1112 1112
1113 1113 def filesnotin(self, m2, match=None):
1114 1114 '''Set of files in this manifest that are not in the other'''
1115 1115 if match and not match.always():
1116 1116 m1 = self._matches(match)
1117 1117 m2 = m2._matches(match)
1118 1118 return m1.filesnotin(m2)
1119 1119
1120 1120 files = set()
1121 1121
1122 1122 def _filesnotin(t1, t2):
1123 1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1124 1124 return
1125 1125 t1._load()
1126 1126 t2._load()
1127 1127 self._loaddifflazy(t1, t2)
1128 1128 for d, m1 in pycompat.iteritems(t1._dirs):
1129 1129 if d in t2._dirs:
1130 1130 m2 = t2._dirs[d]
1131 1131 _filesnotin(m1, m2)
1132 1132 else:
1133 1133 files.update(m1.iterkeys())
1134 1134
1135 1135 for fn in t1._files:
1136 1136 if fn not in t2._files:
1137 1137 files.add(t1._subpath(fn))
1138 1138
1139 1139 _filesnotin(self, m2)
1140 1140 return files
1141 1141
1142 1142 @propertycache
1143 1143 def _alldirs(self):
1144 1144 return pathutil.dirs(self)
1145 1145
1146 1146 def dirs(self):
1147 1147 return self._alldirs
1148 1148
1149 1149 def hasdir(self, dir):
1150 1150 self._load()
1151 1151 topdir, subdir = _splittopdir(dir)
1152 1152 if topdir:
1153 1153 self._loadlazy(topdir)
1154 1154 if topdir in self._dirs:
1155 1155 return self._dirs[topdir].hasdir(subdir)
1156 1156 return False
1157 1157 dirslash = dir + b'/'
1158 1158 return dirslash in self._dirs or dirslash in self._lazydirs
1159 1159
1160 1160 def walk(self, match):
1161 1161 """Generates matching file names.
1162 1162
1163 1163 It also reports nonexistent files by marking them bad with match.bad().
1164 1164 """
1165 1165 if match.always():
1166 1166 for f in iter(self):
1167 1167 yield f
1168 1168 return
1169 1169
1170 1170 fset = set(match.files())
1171 1171
1172 1172 for fn in self._walk(match):
1173 1173 if fn in fset:
1174 1174 # specified pattern is the exact name
1175 1175 fset.remove(fn)
1176 1176 yield fn
1177 1177
1178 1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1179 1179 # follow that here, too
1180 1180 fset.discard(b'')
1181 1181
1182 1182 for fn in sorted(fset):
1183 1183 if not self.hasdir(fn):
1184 1184 match.bad(fn, None)
1185 1185
1186 1186 def _walk(self, match):
1187 1187 '''Recursively generates matching file names for walk().'''
1188 1188 visit = match.visitchildrenset(self._dir[:-1])
1189 1189 if not visit:
1190 1190 return
1191 1191
1192 1192 # yield this dir's files and walk its submanifests
1193 1193 self._load()
1194 1194 visit = self._loadchildrensetlazy(visit)
1195 1195 for p in sorted(list(self._dirs) + list(self._files)):
1196 1196 if p in self._files:
1197 1197 fullp = self._subpath(p)
1198 1198 if match(fullp):
1199 1199 yield fullp
1200 1200 else:
1201 1201 if not visit or p[:-1] in visit:
1202 1202 for f in self._dirs[p]._walk(match):
1203 1203 yield f
1204 1204
1205 1205 def _matches(self, match):
1206 1206 """recursively generate a new manifest filtered by the match argument."""
1207 1207 if match.always():
1208 1208 return self.copy()
1209 1209 return self._matches_inner(match)
1210 1210
1211 1211 def _matches_inner(self, match):
1212 1212 if match.always():
1213 1213 return self.copy()
1214 1214
1215 1215 visit = match.visitchildrenset(self._dir[:-1])
1216 1216 if visit == b'all':
1217 1217 return self.copy()
1218 1218 ret = treemanifest(self._dir)
1219 1219 if not visit:
1220 1220 return ret
1221 1221
1222 1222 self._load()
1223 1223 for fn in self._files:
1224 1224 # While visitchildrenset *usually* lists only subdirs, this is
1225 1225 # actually up to the matcher and may have some files in the set().
1226 1226 # If visit == 'this', we should obviously look at the files in this
1227 1227 # directory; if visit is a set, and fn is in it, we should inspect
1228 1228 # fn (but no need to inspect things not in the set).
1229 1229 if visit != b'this' and fn not in visit:
1230 1230 continue
1231 1231 fullp = self._subpath(fn)
1232 1232 # visitchildrenset isn't perfect, we still need to call the regular
1233 1233 # matcher code to further filter results.
1234 1234 if not match(fullp):
1235 1235 continue
1236 1236 ret._files[fn] = self._files[fn]
1237 1237 if fn in self._flags:
1238 1238 ret._flags[fn] = self._flags[fn]
1239 1239
1240 1240 visit = self._loadchildrensetlazy(visit)
1241 1241 for dir, subm in pycompat.iteritems(self._dirs):
1242 1242 if visit and dir[:-1] not in visit:
1243 1243 continue
1244 1244 m = subm._matches_inner(match)
1245 1245 if not m._isempty():
1246 1246 ret._dirs[dir] = m
1247 1247
1248 1248 if not ret._isempty():
1249 1249 ret._dirty = True
1250 1250 return ret
1251 1251
1252 1252 def fastdelta(self, base, changes):
1253 1253 raise FastdeltaUnavailable()
1254 1254
1255 1255 def diff(self, m2, match=None, clean=False):
1256 1256 """Finds changes between the current manifest and m2.
1257 1257
1258 1258 Args:
1259 1259 m2: the manifest to which this manifest should be compared.
1260 1260 clean: if true, include files unchanged between these manifests
1261 1261 with a None value in the returned dictionary.
1262 1262
1263 1263 The result is returned as a dict with filename as key and
1264 1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1265 1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1266 1266 in the current/other manifest. Where the file does not exist,
1267 1267 the nodeid will be None and the flags will be the empty
1268 1268 string.
1269 1269 """
1270 1270 if match and not match.always():
1271 1271 m1 = self._matches(match)
1272 1272 m2 = m2._matches(match)
1273 1273 return m1.diff(m2, clean=clean)
1274 1274 result = {}
1275 1275 emptytree = treemanifest()
1276 1276
1277 1277 def _iterativediff(t1, t2, stack):
1278 1278 """compares two tree manifests and append new tree-manifests which
1279 1279 needs to be compared to stack"""
1280 1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1281 1281 return
1282 1282 t1._load()
1283 1283 t2._load()
1284 1284 self._loaddifflazy(t1, t2)
1285 1285
1286 1286 for d, m1 in pycompat.iteritems(t1._dirs):
1287 1287 m2 = t2._dirs.get(d, emptytree)
1288 1288 stack.append((m1, m2))
1289 1289
1290 1290 for d, m2 in pycompat.iteritems(t2._dirs):
1291 1291 if d not in t1._dirs:
1292 1292 stack.append((emptytree, m2))
1293 1293
1294 1294 for fn, n1 in pycompat.iteritems(t1._files):
1295 1295 fl1 = t1._flags.get(fn, b'')
1296 1296 n2 = t2._files.get(fn, None)
1297 1297 fl2 = t2._flags.get(fn, b'')
1298 1298 if n1 != n2 or fl1 != fl2:
1299 1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1300 1300 elif clean:
1301 1301 result[t1._subpath(fn)] = None
1302 1302
1303 1303 for fn, n2 in pycompat.iteritems(t2._files):
1304 1304 if fn not in t1._files:
1305 1305 fl2 = t2._flags.get(fn, b'')
1306 1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1307 1307
1308 1308 stackls = []
1309 1309 _iterativediff(self, m2, stackls)
1310 1310 while stackls:
1311 1311 t1, t2 = stackls.pop()
1312 1312 # stackls is populated in the function call
1313 1313 _iterativediff(t1, t2, stackls)
1314 1314 return result
1315 1315
1316 1316 def unmodifiedsince(self, m2):
1317 1317 return not self._dirty and not m2._dirty and self._node == m2._node
1318 1318
1319 1319 def parse(self, text, readsubtree):
1320 1320 selflazy = self._lazydirs
1321 1321 for f, n, fl in _parse(text):
1322 1322 if fl == b't':
1323 1323 f = f + b'/'
1324 1324 # False below means "doesn't need to be copied" and can use the
1325 1325 # cached value from readsubtree directly.
1326 1326 selflazy[f] = (n, readsubtree, False)
1327 1327 elif b'/' in f:
1328 1328 # This is a flat manifest, so use __setitem__ and setflag rather
1329 1329 # than assigning directly to _files and _flags, so we can
1330 1330 # assign a path in a subdirectory, and to mark dirty (compared
1331 1331 # to nullid).
1332 1332 self[f] = n
1333 1333 if fl:
1334 1334 self.setflag(f, fl)
1335 1335 else:
1336 1336 # Assigning to _files and _flags avoids marking as dirty,
1337 1337 # and should be a little faster.
1338 1338 self._files[f] = n
1339 1339 if fl:
1340 1340 self._flags[f] = fl
1341 1341
1342 1342 def text(self):
1343 1343 """Get the full data of this manifest as a bytestring."""
1344 1344 self._load()
1345 1345 return _text(self.iterentries())
1346 1346
1347 1347 def dirtext(self):
1348 1348 """Get the full data of this directory as a bytestring. Make sure that
1349 1349 any submanifests have been written first, so their nodeids are correct.
1350 1350 """
1351 1351 self._load()
1352 1352 flags = self.flags
1353 1353 lazydirs = [
1354 1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1355 1355 ]
1356 1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1357 1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1358 1358 return _text(sorted(dirs + files + lazydirs))
1359 1359
1360 1360 def read(self, gettext, readsubtree):
1361 1361 def _load_for_read(s):
1362 1362 s.parse(gettext(), readsubtree)
1363 1363 s._dirty = False
1364 1364
1365 1365 self._loadfunc = _load_for_read
1366 1366
1367 1367 def writesubtrees(self, m1, m2, writesubtree, match):
1368 1368 self._load() # for consistency; should never have any effect here
1369 1369 m1._load()
1370 1370 m2._load()
1371 1371 emptytree = treemanifest()
1372 1372
1373 1373 def getnode(m, d):
1374 1374 ld = m._lazydirs.get(d)
1375 1375 if ld:
1376 1376 return ld[0]
1377 1377 return m._dirs.get(d, emptytree)._node
1378 1378
1379 1379 # let's skip investigating things that `match` says we do not need.
1380 1380 visit = match.visitchildrenset(self._dir[:-1])
1381 1381 visit = self._loadchildrensetlazy(visit)
1382 1382 if visit == b'this' or visit == b'all':
1383 1383 visit = None
1384 1384 for d, subm in pycompat.iteritems(self._dirs):
1385 1385 if visit and d[:-1] not in visit:
1386 1386 continue
1387 1387 subp1 = getnode(m1, d)
1388 1388 subp2 = getnode(m2, d)
1389 1389 if subp1 == nullid:
1390 1390 subp1, subp2 = subp2, subp1
1391 1391 writesubtree(subm, subp1, subp2, match)
1392 1392
1393 1393 def walksubtrees(self, matcher=None):
1394 1394 """Returns an iterator of the subtrees of this manifest, including this
1395 1395 manifest itself.
1396 1396
1397 1397 If `matcher` is provided, it only returns subtrees that match.
1398 1398 """
1399 1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1400 1400 return
1401 1401 if not matcher or matcher(self._dir[:-1]):
1402 1402 yield self
1403 1403
1404 1404 self._load()
1405 1405 # OPT: use visitchildrenset to avoid loading everything.
1406 1406 self._loadalllazy()
1407 1407 for d, subm in pycompat.iteritems(self._dirs):
1408 1408 for subtree in subm.walksubtrees(matcher=matcher):
1409 1409 yield subtree
1410 1410
1411 1411
1412 1412 class manifestfulltextcache(util.lrucachedict):
1413 1413 """File-backed LRU cache for the manifest cache
1414 1414
1415 1415 File consists of entries, up to EOF:
1416 1416
1417 1417 - 20 bytes node, 4 bytes length, <length> manifest data
1418 1418
1419 1419 These are written in reverse cache order (oldest to newest).
1420 1420
1421 1421 """
1422 1422
1423 1423 _file = b'manifestfulltextcache'
1424 1424
1425 1425 def __init__(self, max):
1426 1426 super(manifestfulltextcache, self).__init__(max)
1427 1427 self._dirty = False
1428 1428 self._read = False
1429 1429 self._opener = None
1430 1430
1431 1431 def read(self):
1432 1432 if self._read or self._opener is None:
1433 1433 return
1434 1434
1435 1435 try:
1436 1436 with self._opener(self._file) as fp:
1437 1437 set = super(manifestfulltextcache, self).__setitem__
1438 1438 # ignore trailing data, this is a cache, corruption is skipped
1439 1439 while True:
1440 1440 # TODO do we need to do work here for sha1 portability?
1441 1441 node = fp.read(20)
1442 1442 if len(node) < 20:
1443 1443 break
1444 1444 try:
1445 1445 size = struct.unpack(b'>L', fp.read(4))[0]
1446 1446 except struct.error:
1447 1447 break
1448 1448 value = bytearray(fp.read(size))
1449 1449 if len(value) != size:
1450 1450 break
1451 1451 set(node, value)
1452 1452 except IOError:
1453 1453 # the file is allowed to be missing
1454 1454 pass
1455 1455
1456 1456 self._read = True
1457 1457 self._dirty = False
1458 1458
1459 1459 def write(self):
1460 1460 if not self._dirty or self._opener is None:
1461 1461 return
1462 1462 # rotate backwards to the first used node
1463 1463 try:
1464 1464 with self._opener(
1465 1465 self._file, b'w', atomictemp=True, checkambig=True
1466 1466 ) as fp:
1467 1467 node = self._head.prev
1468 1468 while True:
1469 1469 if node.key in self._cache:
1470 1470 fp.write(node.key)
1471 1471 fp.write(struct.pack(b'>L', len(node.value)))
1472 1472 fp.write(node.value)
1473 1473 if node is self._head:
1474 1474 break
1475 1475 node = node.prev
1476 1476 except IOError:
1477 1477 # We could not write the cache (eg: permission error)
1478 1478 # the content can be missing.
1479 1479 #
1480 1480 # We could try harder and see if we could recreate a wcache
1481 1481 # directory were we coudl write too.
1482 1482 #
1483 1483 # XXX the error pass silently, having some way to issue an error
1484 1484 # log `ui.log` would be nice.
1485 1485 pass
1486 1486
1487 1487 def __len__(self):
1488 1488 if not self._read:
1489 1489 self.read()
1490 1490 return super(manifestfulltextcache, self).__len__()
1491 1491
1492 1492 def __contains__(self, k):
1493 1493 if not self._read:
1494 1494 self.read()
1495 1495 return super(manifestfulltextcache, self).__contains__(k)
1496 1496
1497 1497 def __iter__(self):
1498 1498 if not self._read:
1499 1499 self.read()
1500 1500 return super(manifestfulltextcache, self).__iter__()
1501 1501
1502 1502 def __getitem__(self, k):
1503 1503 if not self._read:
1504 1504 self.read()
1505 1505 # the cache lru order can change on read
1506 1506 setdirty = self._cache.get(k) is not self._head
1507 1507 value = super(manifestfulltextcache, self).__getitem__(k)
1508 1508 if setdirty:
1509 1509 self._dirty = True
1510 1510 return value
1511 1511
1512 1512 def __setitem__(self, k, v):
1513 1513 if not self._read:
1514 1514 self.read()
1515 1515 super(manifestfulltextcache, self).__setitem__(k, v)
1516 1516 self._dirty = True
1517 1517
1518 1518 def __delitem__(self, k):
1519 1519 if not self._read:
1520 1520 self.read()
1521 1521 super(manifestfulltextcache, self).__delitem__(k)
1522 1522 self._dirty = True
1523 1523
1524 1524 def get(self, k, default=None):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 return super(manifestfulltextcache, self).get(k, default=default)
1528 1528
1529 1529 def clear(self, clear_persisted_data=False):
1530 1530 super(manifestfulltextcache, self).clear()
1531 1531 if clear_persisted_data:
1532 1532 self._dirty = True
1533 1533 self.write()
1534 1534 self._read = False
1535 1535
1536 1536
1537 1537 # and upper bound of what we expect from compression
1538 1538 # (real live value seems to be "3")
1539 1539 MAXCOMPRESSION = 3
1540 1540
1541 1541
1542 1542 class FastdeltaUnavailable(Exception):
1543 1543 """Exception raised when fastdelta isn't usable on a manifest."""
1544 1544
1545 1545
1546 1546 @interfaceutil.implementer(repository.imanifeststorage)
1547 1547 class manifestrevlog(object):
1548 1548 """A revlog that stores manifest texts. This is responsible for caching the
1549 1549 full-text manifest contents.
1550 1550 """
1551 1551
1552 1552 def __init__(
1553 1553 self,
1554 1554 opener,
1555 1555 tree=b'',
1556 1556 dirlogcache=None,
1557 1557 indexfile=None,
1558 1558 treemanifest=False,
1559 1559 ):
1560 1560 """Constructs a new manifest revlog
1561 1561
1562 1562 `indexfile` - used by extensions to have two manifests at once, like
1563 1563 when transitioning between flatmanifeset and treemanifests.
1564 1564
1565 1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1566 1566 options can also be used to make this a tree manifest revlog. The opener
1567 1567 option takes precedence, so if it is set to True, we ignore whatever
1568 1568 value is passed in to the constructor.
1569 1569 """
1570 1570 # During normal operations, we expect to deal with not more than four
1571 1571 # revs at a time (such as during commit --amend). When rebasing large
1572 1572 # stacks of commits, the number can go up, hence the config knob below.
1573 1573 cachesize = 4
1574 1574 optiontreemanifest = False
1575 1575 opts = getattr(opener, 'options', None)
1576 1576 if opts is not None:
1577 1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1578 1578 optiontreemanifest = opts.get(b'treemanifest', False)
1579 1579
1580 1580 self._treeondisk = optiontreemanifest or treemanifest
1581 1581
1582 1582 self._fulltextcache = manifestfulltextcache(cachesize)
1583 1583
1584 1584 if tree:
1585 1585 assert self._treeondisk, b'opts is %r' % opts
1586 1586
1587 1587 if indexfile is None:
1588 1588 indexfile = b'00manifest.i'
1589 1589 if tree:
1590 1590 indexfile = b"meta/" + tree + indexfile
1591 1591
1592 1592 self.tree = tree
1593 1593
1594 1594 # The dirlogcache is kept on the root manifest log
1595 1595 if tree:
1596 1596 self._dirlogcache = dirlogcache
1597 1597 else:
1598 1598 self._dirlogcache = {b'': self}
1599 1599
1600 1600 self._revlog = revlog.revlog(
1601 1601 opener,
1602 1602 indexfile,
1603 1603 # only root indexfile is cached
1604 1604 checkambig=not bool(tree),
1605 1605 mmaplargeindex=True,
1606 1606 upperboundcomp=MAXCOMPRESSION,
1607 1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1608 1608 )
1609 1609
1610 1610 self.index = self._revlog.index
1611 1611 self.version = self._revlog.version
1612 1612 self._generaldelta = self._revlog._generaldelta
1613 1613 self._revlog.revlog_kind = b'manifest'
1614 1614
1615 1615 def _setupmanifestcachehooks(self, repo):
1616 1616 """Persist the manifestfulltextcache on lock release"""
1617 1617 if not util.safehasattr(repo, b'_wlockref'):
1618 1618 return
1619 1619
1620 1620 self._fulltextcache._opener = repo.wcachevfs
1621 1621 if repo._currentlock(repo._wlockref) is None:
1622 1622 return
1623 1623
1624 1624 reporef = weakref.ref(repo)
1625 1625 manifestrevlogref = weakref.ref(self)
1626 1626
1627 1627 def persistmanifestcache(success):
1628 1628 # Repo is in an unknown state, do not persist.
1629 1629 if not success:
1630 1630 return
1631 1631
1632 1632 repo = reporef()
1633 1633 self = manifestrevlogref()
1634 1634 if repo is None or self is None:
1635 1635 return
1636 1636 if repo.manifestlog.getstorage(b'') is not self:
1637 1637 # there's a different manifest in play now, abort
1638 1638 return
1639 1639 self._fulltextcache.write()
1640 1640
1641 1641 repo._afterlock(persistmanifestcache)
1642 1642
1643 1643 @property
1644 1644 def fulltextcache(self):
1645 1645 return self._fulltextcache
1646 1646
1647 1647 def clearcaches(self, clear_persisted_data=False):
1648 1648 self._revlog.clearcaches()
1649 1649 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1650 1650 self._dirlogcache = {self.tree: self}
1651 1651
1652 1652 def dirlog(self, d):
1653 1653 if d:
1654 1654 assert self._treeondisk
1655 1655 if d not in self._dirlogcache:
1656 1656 mfrevlog = manifestrevlog(
1657 1657 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1658 1658 )
1659 1659 self._dirlogcache[d] = mfrevlog
1660 1660 return self._dirlogcache[d]
1661 1661
1662 1662 def add(
1663 1663 self,
1664 1664 m,
1665 1665 transaction,
1666 1666 link,
1667 1667 p1,
1668 1668 p2,
1669 1669 added,
1670 1670 removed,
1671 1671 readtree=None,
1672 1672 match=None,
1673 1673 ):
1674 1674 """add some manifest entry in to the manifest log
1675 1675
1676 1676 input:
1677 1677
1678 1678 m: the manifest dict we want to store
1679 1679 transaction: the open transaction
1680 1680 p1: manifest-node of p1
1681 1681 p2: manifest-node of p2
1682 1682 added: file added/changed compared to parent
1683 1683 removed: file removed compared to parent
1684 1684
1685 1685 tree manifest input:
1686 1686
1687 1687 readtree: a function to read a subtree
1688 1688 match: a filematcher for the subpart of the tree manifest
1689 1689 """
1690 1690 try:
1691 1691 if p1 not in self.fulltextcache:
1692 1692 raise FastdeltaUnavailable()
1693 1693 # If our first parent is in the manifest cache, we can
1694 1694 # compute a delta here using properties we know about the
1695 1695 # manifest up-front, which may save time later for the
1696 1696 # revlog layer.
1697 1697
1698 1698 _checkforbidden(added)
1699 1699 # combine the changed lists into one sorted iterator
1700 1700 work = heapq.merge(
1701 1701 [(x, False) for x in sorted(added)],
1702 1702 [(x, True) for x in sorted(removed)],
1703 1703 )
1704 1704
1705 1705 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1706 1706 cachedelta = self._revlog.rev(p1), deltatext
1707 1707 text = util.buffer(arraytext)
1708 1708 rev = self._revlog.addrevision(
1709 1709 text, transaction, link, p1, p2, cachedelta
1710 1710 )
1711 1711 n = self._revlog.node(rev)
1712 1712 except FastdeltaUnavailable:
1713 1713 # The first parent manifest isn't already loaded or the
1714 1714 # manifest implementation doesn't support fastdelta, so
1715 1715 # we'll just encode a fulltext of the manifest and pass
1716 1716 # that through to the revlog layer, and let it handle the
1717 1717 # delta process.
1718 1718 if self._treeondisk:
1719 1719 assert readtree, b"readtree must be set for treemanifest writes"
1720 1720 assert match, b"match must be specified for treemanifest writes"
1721 1721 m1 = readtree(self.tree, p1)
1722 1722 m2 = readtree(self.tree, p2)
1723 1723 n = self._addtree(
1724 1724 m, transaction, link, m1, m2, readtree, match=match
1725 1725 )
1726 1726 arraytext = None
1727 1727 else:
1728 1728 text = m.text()
1729 1729 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1730 1730 n = self._revlog.node(rev)
1731 1731 arraytext = bytearray(text)
1732 1732
1733 1733 if arraytext is not None:
1734 1734 self.fulltextcache[n] = arraytext
1735 1735
1736 1736 return n
1737 1737
1738 1738 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1739 1739 # If the manifest is unchanged compared to one parent,
1740 1740 # don't write a new revision
1741 1741 if self.tree != b'' and (
1742 1742 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1743 1743 ):
1744 1744 return m.node()
1745 1745
1746 1746 def writesubtree(subm, subp1, subp2, match):
1747 1747 sublog = self.dirlog(subm.dir())
1748 1748 sublog.add(
1749 1749 subm,
1750 1750 transaction,
1751 1751 link,
1752 1752 subp1,
1753 1753 subp2,
1754 1754 None,
1755 1755 None,
1756 1756 readtree=readtree,
1757 1757 match=match,
1758 1758 )
1759 1759
1760 1760 m.writesubtrees(m1, m2, writesubtree, match)
1761 1761 text = m.dirtext()
1762 1762 n = None
1763 1763 if self.tree != b'':
1764 1764 # Double-check whether contents are unchanged to one parent
1765 1765 if text == m1.dirtext():
1766 1766 n = m1.node()
1767 1767 elif text == m2.dirtext():
1768 1768 n = m2.node()
1769 1769
1770 1770 if not n:
1771 1771 rev = self._revlog.addrevision(
1772 1772 text, transaction, link, m1.node(), m2.node()
1773 1773 )
1774 1774 n = self._revlog.node(rev)
1775 1775
1776 1776 # Save nodeid so parent manifest can calculate its nodeid
1777 1777 m.setnode(n)
1778 1778 return n
1779 1779
1780 1780 def __len__(self):
1781 1781 return len(self._revlog)
1782 1782
1783 1783 def __iter__(self):
1784 1784 return self._revlog.__iter__()
1785 1785
1786 1786 def rev(self, node):
1787 1787 return self._revlog.rev(node)
1788 1788
1789 1789 def node(self, rev):
1790 1790 return self._revlog.node(rev)
1791 1791
1792 1792 def lookup(self, value):
1793 1793 return self._revlog.lookup(value)
1794 1794
1795 1795 def parentrevs(self, rev):
1796 1796 return self._revlog.parentrevs(rev)
1797 1797
1798 1798 def parents(self, node):
1799 1799 return self._revlog.parents(node)
1800 1800
1801 1801 def linkrev(self, rev):
1802 1802 return self._revlog.linkrev(rev)
1803 1803
1804 1804 def checksize(self):
1805 1805 return self._revlog.checksize()
1806 1806
1807 1807 def revision(self, node, _df=None, raw=False):
1808 1808 return self._revlog.revision(node, _df=_df, raw=raw)
1809 1809
1810 1810 def rawdata(self, node, _df=None):
1811 1811 return self._revlog.rawdata(node, _df=_df)
1812 1812
1813 1813 def revdiff(self, rev1, rev2):
1814 1814 return self._revlog.revdiff(rev1, rev2)
1815 1815
1816 1816 def cmp(self, node, text):
1817 1817 return self._revlog.cmp(node, text)
1818 1818
1819 1819 def deltaparent(self, rev):
1820 1820 return self._revlog.deltaparent(rev)
1821 1821
1822 1822 def emitrevisions(
1823 1823 self,
1824 1824 nodes,
1825 1825 nodesorder=None,
1826 1826 revisiondata=False,
1827 1827 assumehaveparentrevisions=False,
1828 1828 deltamode=repository.CG_DELTAMODE_STD,
1829 sidedata_helpers=None,
1829 1830 ):
1830 1831 return self._revlog.emitrevisions(
1831 1832 nodes,
1832 1833 nodesorder=nodesorder,
1833 1834 revisiondata=revisiondata,
1834 1835 assumehaveparentrevisions=assumehaveparentrevisions,
1835 1836 deltamode=deltamode,
1837 sidedata_helpers=sidedata_helpers,
1836 1838 )
1837 1839
1838 1840 def addgroup(
1839 1841 self,
1840 1842 deltas,
1841 1843 linkmapper,
1842 1844 transaction,
1843 1845 alwayscache=False,
1844 1846 addrevisioncb=None,
1845 1847 duplicaterevisioncb=None,
1846 1848 ):
1847 1849 return self._revlog.addgroup(
1848 1850 deltas,
1849 1851 linkmapper,
1850 1852 transaction,
1851 1853 alwayscache=alwayscache,
1852 1854 addrevisioncb=addrevisioncb,
1853 1855 duplicaterevisioncb=duplicaterevisioncb,
1854 1856 )
1855 1857
1856 1858 def rawsize(self, rev):
1857 1859 return self._revlog.rawsize(rev)
1858 1860
1859 1861 def getstrippoint(self, minlink):
1860 1862 return self._revlog.getstrippoint(minlink)
1861 1863
1862 1864 def strip(self, minlink, transaction):
1863 1865 return self._revlog.strip(minlink, transaction)
1864 1866
1865 1867 def files(self):
1866 1868 return self._revlog.files()
1867 1869
1868 1870 def clone(self, tr, destrevlog, **kwargs):
1869 1871 if not isinstance(destrevlog, manifestrevlog):
1870 1872 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1871 1873
1872 1874 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1873 1875
1874 1876 def storageinfo(
1875 1877 self,
1876 1878 exclusivefiles=False,
1877 1879 sharedfiles=False,
1878 1880 revisionscount=False,
1879 1881 trackedsize=False,
1880 1882 storedsize=False,
1881 1883 ):
1882 1884 return self._revlog.storageinfo(
1883 1885 exclusivefiles=exclusivefiles,
1884 1886 sharedfiles=sharedfiles,
1885 1887 revisionscount=revisionscount,
1886 1888 trackedsize=trackedsize,
1887 1889 storedsize=storedsize,
1888 1890 )
1889 1891
1890 1892 @property
1891 1893 def indexfile(self):
1892 1894 return self._revlog.indexfile
1893 1895
1894 1896 @indexfile.setter
1895 1897 def indexfile(self, value):
1896 1898 self._revlog.indexfile = value
1897 1899
1898 1900 @property
1899 1901 def opener(self):
1900 1902 return self._revlog.opener
1901 1903
1902 1904 @opener.setter
1903 1905 def opener(self, value):
1904 1906 self._revlog.opener = value
1905 1907
1906 1908
1907 1909 @interfaceutil.implementer(repository.imanifestlog)
1908 1910 class manifestlog(object):
1909 1911 """A collection class representing the collection of manifest snapshots
1910 1912 referenced by commits in the repository.
1911 1913
1912 1914 In this situation, 'manifest' refers to the abstract concept of a snapshot
1913 1915 of the list of files in the given commit. Consumers of the output of this
1914 1916 class do not care about the implementation details of the actual manifests
1915 1917 they receive (i.e. tree or flat or lazily loaded, etc)."""
1916 1918
1917 1919 def __init__(self, opener, repo, rootstore, narrowmatch):
1918 1920 usetreemanifest = False
1919 1921 cachesize = 4
1920 1922
1921 1923 opts = getattr(opener, 'options', None)
1922 1924 if opts is not None:
1923 1925 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1924 1926 cachesize = opts.get(b'manifestcachesize', cachesize)
1925 1927
1926 1928 self._treemanifests = usetreemanifest
1927 1929
1928 1930 self._rootstore = rootstore
1929 1931 self._rootstore._setupmanifestcachehooks(repo)
1930 1932 self._narrowmatch = narrowmatch
1931 1933
1932 1934 # A cache of the manifestctx or treemanifestctx for each directory
1933 1935 self._dirmancache = {}
1934 1936 self._dirmancache[b''] = util.lrucachedict(cachesize)
1935 1937
1936 1938 self._cachesize = cachesize
1937 1939
1938 1940 def __getitem__(self, node):
1939 1941 """Retrieves the manifest instance for the given node. Throws a
1940 1942 LookupError if not found.
1941 1943 """
1942 1944 return self.get(b'', node)
1943 1945
1944 1946 def get(self, tree, node, verify=True):
1945 1947 """Retrieves the manifest instance for the given node. Throws a
1946 1948 LookupError if not found.
1947 1949
1948 1950 `verify` - if True an exception will be thrown if the node is not in
1949 1951 the revlog
1950 1952 """
1951 1953 if node in self._dirmancache.get(tree, ()):
1952 1954 return self._dirmancache[tree][node]
1953 1955
1954 1956 if not self._narrowmatch.always():
1955 1957 if not self._narrowmatch.visitdir(tree[:-1]):
1956 1958 return excludeddirmanifestctx(tree, node)
1957 1959 if tree:
1958 1960 if self._rootstore._treeondisk:
1959 1961 if verify:
1960 1962 # Side-effect is LookupError is raised if node doesn't
1961 1963 # exist.
1962 1964 self.getstorage(tree).rev(node)
1963 1965
1964 1966 m = treemanifestctx(self, tree, node)
1965 1967 else:
1966 1968 raise error.Abort(
1967 1969 _(
1968 1970 b"cannot ask for manifest directory '%s' in a flat "
1969 1971 b"manifest"
1970 1972 )
1971 1973 % tree
1972 1974 )
1973 1975 else:
1974 1976 if verify:
1975 1977 # Side-effect is LookupError is raised if node doesn't exist.
1976 1978 self._rootstore.rev(node)
1977 1979
1978 1980 if self._treemanifests:
1979 1981 m = treemanifestctx(self, b'', node)
1980 1982 else:
1981 1983 m = manifestctx(self, node)
1982 1984
1983 1985 if node != nullid:
1984 1986 mancache = self._dirmancache.get(tree)
1985 1987 if not mancache:
1986 1988 mancache = util.lrucachedict(self._cachesize)
1987 1989 self._dirmancache[tree] = mancache
1988 1990 mancache[node] = m
1989 1991 return m
1990 1992
1991 1993 def getstorage(self, tree):
1992 1994 return self._rootstore.dirlog(tree)
1993 1995
1994 1996 def clearcaches(self, clear_persisted_data=False):
1995 1997 self._dirmancache.clear()
1996 1998 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1997 1999
1998 2000 def rev(self, node):
1999 2001 return self._rootstore.rev(node)
2000 2002
2001 2003 def update_caches(self, transaction):
2002 2004 return self._rootstore._revlog.update_caches(transaction=transaction)
2003 2005
2004 2006
2005 2007 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2006 2008 class memmanifestctx(object):
2007 2009 def __init__(self, manifestlog):
2008 2010 self._manifestlog = manifestlog
2009 2011 self._manifestdict = manifestdict()
2010 2012
2011 2013 def _storage(self):
2012 2014 return self._manifestlog.getstorage(b'')
2013 2015
2014 2016 def copy(self):
2015 2017 memmf = memmanifestctx(self._manifestlog)
2016 2018 memmf._manifestdict = self.read().copy()
2017 2019 return memmf
2018 2020
2019 2021 def read(self):
2020 2022 return self._manifestdict
2021 2023
2022 2024 def write(self, transaction, link, p1, p2, added, removed, match=None):
2023 2025 return self._storage().add(
2024 2026 self._manifestdict,
2025 2027 transaction,
2026 2028 link,
2027 2029 p1,
2028 2030 p2,
2029 2031 added,
2030 2032 removed,
2031 2033 match=match,
2032 2034 )
2033 2035
2034 2036
2035 2037 @interfaceutil.implementer(repository.imanifestrevisionstored)
2036 2038 class manifestctx(object):
2037 2039 """A class representing a single revision of a manifest, including its
2038 2040 contents, its parent revs, and its linkrev.
2039 2041 """
2040 2042
2041 2043 def __init__(self, manifestlog, node):
2042 2044 self._manifestlog = manifestlog
2043 2045 self._data = None
2044 2046
2045 2047 self._node = node
2046 2048
2047 2049 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2048 2050 # but let's add it later when something needs it and we can load it
2049 2051 # lazily.
2050 2052 # self.p1, self.p2 = store.parents(node)
2051 2053 # rev = store.rev(node)
2052 2054 # self.linkrev = store.linkrev(rev)
2053 2055
2054 2056 def _storage(self):
2055 2057 return self._manifestlog.getstorage(b'')
2056 2058
2057 2059 def node(self):
2058 2060 return self._node
2059 2061
2060 2062 def copy(self):
2061 2063 memmf = memmanifestctx(self._manifestlog)
2062 2064 memmf._manifestdict = self.read().copy()
2063 2065 return memmf
2064 2066
2065 2067 @propertycache
2066 2068 def parents(self):
2067 2069 return self._storage().parents(self._node)
2068 2070
2069 2071 def read(self):
2070 2072 if self._data is None:
2071 2073 if self._node == nullid:
2072 2074 self._data = manifestdict()
2073 2075 else:
2074 2076 store = self._storage()
2075 2077 if self._node in store.fulltextcache:
2076 2078 text = pycompat.bytestr(store.fulltextcache[self._node])
2077 2079 else:
2078 2080 text = store.revision(self._node)
2079 2081 arraytext = bytearray(text)
2080 2082 store.fulltextcache[self._node] = arraytext
2081 2083 self._data = manifestdict(text)
2082 2084 return self._data
2083 2085
2084 2086 def readfast(self, shallow=False):
2085 2087 """Calls either readdelta or read, based on which would be less work.
2086 2088 readdelta is called if the delta is against the p1, and therefore can be
2087 2089 read quickly.
2088 2090
2089 2091 If `shallow` is True, nothing changes since this is a flat manifest.
2090 2092 """
2091 2093 store = self._storage()
2092 2094 r = store.rev(self._node)
2093 2095 deltaparent = store.deltaparent(r)
2094 2096 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2095 2097 return self.readdelta()
2096 2098 return self.read()
2097 2099
2098 2100 def readdelta(self, shallow=False):
2099 2101 """Returns a manifest containing just the entries that are present
2100 2102 in this manifest, but not in its p1 manifest. This is efficient to read
2101 2103 if the revlog delta is already p1.
2102 2104
2103 2105 Changing the value of `shallow` has no effect on flat manifests.
2104 2106 """
2105 2107 store = self._storage()
2106 2108 r = store.rev(self._node)
2107 2109 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2108 2110 return manifestdict(d)
2109 2111
2110 2112 def find(self, key):
2111 2113 return self.read().find(key)
2112 2114
2113 2115
2114 2116 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2115 2117 class memtreemanifestctx(object):
2116 2118 def __init__(self, manifestlog, dir=b''):
2117 2119 self._manifestlog = manifestlog
2118 2120 self._dir = dir
2119 2121 self._treemanifest = treemanifest()
2120 2122
2121 2123 def _storage(self):
2122 2124 return self._manifestlog.getstorage(b'')
2123 2125
2124 2126 def copy(self):
2125 2127 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2126 2128 memmf._treemanifest = self._treemanifest.copy()
2127 2129 return memmf
2128 2130
2129 2131 def read(self):
2130 2132 return self._treemanifest
2131 2133
2132 2134 def write(self, transaction, link, p1, p2, added, removed, match=None):
2133 2135 def readtree(dir, node):
2134 2136 return self._manifestlog.get(dir, node).read()
2135 2137
2136 2138 return self._storage().add(
2137 2139 self._treemanifest,
2138 2140 transaction,
2139 2141 link,
2140 2142 p1,
2141 2143 p2,
2142 2144 added,
2143 2145 removed,
2144 2146 readtree=readtree,
2145 2147 match=match,
2146 2148 )
2147 2149
2148 2150
2149 2151 @interfaceutil.implementer(repository.imanifestrevisionstored)
2150 2152 class treemanifestctx(object):
2151 2153 def __init__(self, manifestlog, dir, node):
2152 2154 self._manifestlog = manifestlog
2153 2155 self._dir = dir
2154 2156 self._data = None
2155 2157
2156 2158 self._node = node
2157 2159
2158 2160 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2159 2161 # we can instantiate treemanifestctx objects for directories we don't
2160 2162 # have on disk.
2161 2163 # self.p1, self.p2 = store.parents(node)
2162 2164 # rev = store.rev(node)
2163 2165 # self.linkrev = store.linkrev(rev)
2164 2166
2165 2167 def _storage(self):
2166 2168 narrowmatch = self._manifestlog._narrowmatch
2167 2169 if not narrowmatch.always():
2168 2170 if not narrowmatch.visitdir(self._dir[:-1]):
2169 2171 return excludedmanifestrevlog(self._dir)
2170 2172 return self._manifestlog.getstorage(self._dir)
2171 2173
2172 2174 def read(self):
2173 2175 if self._data is None:
2174 2176 store = self._storage()
2175 2177 if self._node == nullid:
2176 2178 self._data = treemanifest()
2177 2179 # TODO accessing non-public API
2178 2180 elif store._treeondisk:
2179 2181 m = treemanifest(dir=self._dir)
2180 2182
2181 2183 def gettext():
2182 2184 return store.revision(self._node)
2183 2185
2184 2186 def readsubtree(dir, subm):
2185 2187 # Set verify to False since we need to be able to create
2186 2188 # subtrees for trees that don't exist on disk.
2187 2189 return self._manifestlog.get(dir, subm, verify=False).read()
2188 2190
2189 2191 m.read(gettext, readsubtree)
2190 2192 m.setnode(self._node)
2191 2193 self._data = m
2192 2194 else:
2193 2195 if self._node in store.fulltextcache:
2194 2196 text = pycompat.bytestr(store.fulltextcache[self._node])
2195 2197 else:
2196 2198 text = store.revision(self._node)
2197 2199 arraytext = bytearray(text)
2198 2200 store.fulltextcache[self._node] = arraytext
2199 2201 self._data = treemanifest(dir=self._dir, text=text)
2200 2202
2201 2203 return self._data
2202 2204
2203 2205 def node(self):
2204 2206 return self._node
2205 2207
2206 2208 def copy(self):
2207 2209 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2208 2210 memmf._treemanifest = self.read().copy()
2209 2211 return memmf
2210 2212
2211 2213 @propertycache
2212 2214 def parents(self):
2213 2215 return self._storage().parents(self._node)
2214 2216
2215 2217 def readdelta(self, shallow=False):
2216 2218 """Returns a manifest containing just the entries that are present
2217 2219 in this manifest, but not in its p1 manifest. This is efficient to read
2218 2220 if the revlog delta is already p1.
2219 2221
2220 2222 If `shallow` is True, this will read the delta for this directory,
2221 2223 without recursively reading subdirectory manifests. Instead, any
2222 2224 subdirectory entry will be reported as it appears in the manifest, i.e.
2223 2225 the subdirectory will be reported among files and distinguished only by
2224 2226 its 't' flag.
2225 2227 """
2226 2228 store = self._storage()
2227 2229 if shallow:
2228 2230 r = store.rev(self._node)
2229 2231 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2230 2232 return manifestdict(d)
2231 2233 else:
2232 2234 # Need to perform a slow delta
2233 2235 r0 = store.deltaparent(store.rev(self._node))
2234 2236 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2235 2237 m1 = self.read()
2236 2238 md = treemanifest(dir=self._dir)
2237 2239 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2238 2240 if n1:
2239 2241 md[f] = n1
2240 2242 if fl1:
2241 2243 md.setflag(f, fl1)
2242 2244 return md
2243 2245
2244 2246 def readfast(self, shallow=False):
2245 2247 """Calls either readdelta or read, based on which would be less work.
2246 2248 readdelta is called if the delta is against the p1, and therefore can be
2247 2249 read quickly.
2248 2250
2249 2251 If `shallow` is True, it only returns the entries from this manifest,
2250 2252 and not any submanifests.
2251 2253 """
2252 2254 store = self._storage()
2253 2255 r = store.rev(self._node)
2254 2256 deltaparent = store.deltaparent(r)
2255 2257 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2256 2258 return self.readdelta(shallow=shallow)
2257 2259
2258 2260 if shallow:
2259 2261 return manifestdict(store.revision(self._node))
2260 2262 else:
2261 2263 return self.read()
2262 2264
2263 2265 def find(self, key):
2264 2266 return self.read().find(key)
2265 2267
2266 2268
2267 2269 class excludeddir(treemanifest):
2268 2270 """Stand-in for a directory that is excluded from the repository.
2269 2271
2270 2272 With narrowing active on a repository that uses treemanifests,
2271 2273 some of the directory revlogs will be excluded from the resulting
2272 2274 clone. This is a huge storage win for clients, but means we need
2273 2275 some sort of pseudo-manifest to surface to internals so we can
2274 2276 detect a merge conflict outside the narrowspec. That's what this
2275 2277 class is: it stands in for a directory whose node is known, but
2276 2278 whose contents are unknown.
2277 2279 """
2278 2280
2279 2281 def __init__(self, dir, node):
2280 2282 super(excludeddir, self).__init__(dir)
2281 2283 self._node = node
2282 2284 # Add an empty file, which will be included by iterators and such,
2283 2285 # appearing as the directory itself (i.e. something like "dir/")
2284 2286 self._files[b''] = node
2285 2287 self._flags[b''] = b't'
2286 2288
2287 2289 # Manifests outside the narrowspec should never be modified, so avoid
2288 2290 # copying. This makes a noticeable difference when there are very many
2289 2291 # directories outside the narrowspec. Also, it makes sense for the copy to
2290 2292 # be of the same type as the original, which would not happen with the
2291 2293 # super type's copy().
2292 2294 def copy(self):
2293 2295 return self
2294 2296
2295 2297
2296 2298 class excludeddirmanifestctx(treemanifestctx):
2297 2299 """context wrapper for excludeddir - see that docstring for rationale"""
2298 2300
2299 2301 def __init__(self, dir, node):
2300 2302 self._dir = dir
2301 2303 self._node = node
2302 2304
2303 2305 def read(self):
2304 2306 return excludeddir(self._dir, self._node)
2305 2307
2306 2308 def readfast(self, shallow=False):
2307 2309 # special version of readfast since we don't have underlying storage
2308 2310 return self.read()
2309 2311
2310 2312 def write(self, *args):
2311 2313 raise error.ProgrammingError(
2312 2314 b'attempt to write manifest from excluded dir %s' % self._dir
2313 2315 )
2314 2316
2315 2317
2316 2318 class excludedmanifestrevlog(manifestrevlog):
2317 2319 """Stand-in for excluded treemanifest revlogs.
2318 2320
2319 2321 When narrowing is active on a treemanifest repository, we'll have
2320 2322 references to directories we can't see due to the revlog being
2321 2323 skipped. This class exists to conform to the manifestrevlog
2322 2324 interface for those directories and proactively prevent writes to
2323 2325 outside the narrowspec.
2324 2326 """
2325 2327
2326 2328 def __init__(self, dir):
2327 2329 self._dir = dir
2328 2330
2329 2331 def __len__(self):
2330 2332 raise error.ProgrammingError(
2331 2333 b'attempt to get length of excluded dir %s' % self._dir
2332 2334 )
2333 2335
2334 2336 def rev(self, node):
2335 2337 raise error.ProgrammingError(
2336 2338 b'attempt to get rev from excluded dir %s' % self._dir
2337 2339 )
2338 2340
2339 2341 def linkrev(self, node):
2340 2342 raise error.ProgrammingError(
2341 2343 b'attempt to get linkrev from excluded dir %s' % self._dir
2342 2344 )
2343 2345
2344 2346 def node(self, rev):
2345 2347 raise error.ProgrammingError(
2346 2348 b'attempt to get node from excluded dir %s' % self._dir
2347 2349 )
2348 2350
2349 2351 def add(self, *args, **kwargs):
2350 2352 # We should never write entries in dirlogs outside the narrow clone.
2351 2353 # However, the method still gets called from writesubtree() in
2352 2354 # _addtree(), so we need to handle it. We should possibly make that
2353 2355 # avoid calling add() with a clean manifest (_dirty is always False
2354 2356 # in excludeddir instances).
2355 2357 pass
@@ -1,3201 +1,3203 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86 from .pure import parsers as pureparsers
87 87
88 88 # blanked usage of all the name to prevent pyflakes constraints
89 89 # We need these name available in the module for extensions.
90 90 REVLOGV0
91 91 REVLOGV1
92 92 REVLOGV2
93 93 FLAG_INLINE_DATA
94 94 FLAG_GENERALDELTA
95 95 REVLOG_DEFAULT_FLAGS
96 96 REVLOG_DEFAULT_FORMAT
97 97 REVLOG_DEFAULT_VERSION
98 98 REVLOGV1_FLAGS
99 99 REVLOGV2_FLAGS
100 100 REVIDX_ISCENSORED
101 101 REVIDX_ELLIPSIS
102 102 REVIDX_SIDEDATA
103 103 REVIDX_HASCOPIESINFO
104 104 REVIDX_EXTSTORED
105 105 REVIDX_DEFAULT_FLAGS
106 106 REVIDX_FLAGS_ORDER
107 107 REVIDX_RAWTEXT_CHANGING_FLAGS
108 108
109 109 parsers = policy.importmod('parsers')
110 110 rustancestor = policy.importrust('ancestor')
111 111 rustdagop = policy.importrust('dagop')
112 112 rustrevlog = policy.importrust('revlog')
113 113
114 114 # Aliased for performance.
115 115 _zlibdecompress = zlib.decompress
116 116
117 117 # max size of revlog with inline data
118 118 _maxinline = 131072
119 119 _chunksize = 1048576
120 120
121 121 # Flag processors for REVIDX_ELLIPSIS.
122 122 def ellipsisreadprocessor(rl, text):
123 123 return text, False
124 124
125 125
126 126 def ellipsiswriteprocessor(rl, text):
127 127 return text, False
128 128
129 129
130 130 def ellipsisrawprocessor(rl, text):
131 131 return False
132 132
133 133
134 134 ellipsisprocessor = (
135 135 ellipsisreadprocessor,
136 136 ellipsiswriteprocessor,
137 137 ellipsisrawprocessor,
138 138 )
139 139
140 140
141 141 def getoffset(q):
142 142 return int(q >> 16)
143 143
144 144
145 145 def gettype(q):
146 146 return int(q & 0xFFFF)
147 147
148 148
149 149 def offset_type(offset, type):
150 150 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
151 151 raise ValueError(b'unknown revlog index flags')
152 152 return int(int(offset) << 16 | type)
153 153
154 154
155 155 def _verify_revision(rl, skipflags, state, node):
156 156 """Verify the integrity of the given revlog ``node`` while providing a hook
157 157 point for extensions to influence the operation."""
158 158 if skipflags:
159 159 state[b'skipread'].add(node)
160 160 else:
161 161 # Side-effect: read content and verify hash.
162 162 rl.revision(node)
163 163
164 164
165 165 # True if a fast implementation for persistent-nodemap is available
166 166 #
167 167 # We also consider we have a "fast" implementation in "pure" python because
168 168 # people using pure don't really have performance consideration (and a
169 169 # wheelbarrow of other slowness source)
170 170 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
171 171 parsers, 'BaseIndexObject'
172 172 )
173 173
174 174
175 175 @attr.s(slots=True, frozen=True)
176 176 class _revisioninfo(object):
177 177 """Information about a revision that allows building its fulltext
178 178 node: expected hash of the revision
179 179 p1, p2: parent revs of the revision
180 180 btext: built text cache consisting of a one-element list
181 181 cachedelta: (baserev, uncompressed_delta) or None
182 182 flags: flags associated to the revision storage
183 183
184 184 One of btext[0] or cachedelta must be set.
185 185 """
186 186
187 187 node = attr.ib()
188 188 p1 = attr.ib()
189 189 p2 = attr.ib()
190 190 btext = attr.ib()
191 191 textlen = attr.ib()
192 192 cachedelta = attr.ib()
193 193 flags = attr.ib()
194 194
195 195
196 196 @interfaceutil.implementer(repository.irevisiondelta)
197 197 @attr.s(slots=True)
198 198 class revlogrevisiondelta(object):
199 199 node = attr.ib()
200 200 p1node = attr.ib()
201 201 p2node = attr.ib()
202 202 basenode = attr.ib()
203 203 flags = attr.ib()
204 204 baserevisionsize = attr.ib()
205 205 revision = attr.ib()
206 206 delta = attr.ib()
207 207 sidedata = attr.ib()
208 208 linknode = attr.ib(default=None)
209 209
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218
219 219 # index v0:
220 220 # 4 bytes: offset
221 221 # 4 bytes: compressed length
222 222 # 4 bytes: base rev
223 223 # 4 bytes: link rev
224 224 # 20 bytes: parent 1 nodeid
225 225 # 20 bytes: parent 2 nodeid
226 226 # 20 bytes: nodeid
227 227 indexformatv0 = struct.Struct(b">4l20s20s20s")
228 228 indexformatv0_pack = indexformatv0.pack
229 229 indexformatv0_unpack = indexformatv0.unpack
230 230
231 231
232 232 class revlogoldindex(list):
233 233 @property
234 234 def nodemap(self):
235 235 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
236 236 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
237 237 return self._nodemap
238 238
239 239 @util.propertycache
240 240 def _nodemap(self):
241 241 nodemap = nodemaputil.NodeMap({nullid: nullrev})
242 242 for r in range(0, len(self)):
243 243 n = self[r][7]
244 244 nodemap[n] = r
245 245 return nodemap
246 246
247 247 def has_node(self, node):
248 248 """return True if the node exist in the index"""
249 249 return node in self._nodemap
250 250
251 251 def rev(self, node):
252 252 """return a revision for a node
253 253
254 254 If the node is unknown, raise a RevlogError"""
255 255 return self._nodemap[node]
256 256
257 257 def get_rev(self, node):
258 258 """return a revision for a node
259 259
260 260 If the node is unknown, return None"""
261 261 return self._nodemap.get(node)
262 262
263 263 def append(self, tup):
264 264 self._nodemap[tup[7]] = len(self)
265 265 super(revlogoldindex, self).append(tup)
266 266
267 267 def __delitem__(self, i):
268 268 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
269 269 raise ValueError(b"deleting slices only supports a:-1 with step 1")
270 270 for r in pycompat.xrange(i.start, len(self)):
271 271 del self._nodemap[self[r][7]]
272 272 super(revlogoldindex, self).__delitem__(i)
273 273
274 274 def clearcaches(self):
275 275 self.__dict__.pop('_nodemap', None)
276 276
277 277 def __getitem__(self, i):
278 278 if i == -1:
279 279 return (0, 0, 0, -1, -1, -1, -1, nullid)
280 280 return list.__getitem__(self, i)
281 281
282 282
283 283 class revlogoldio(object):
284 284 def __init__(self):
285 285 self.size = indexformatv0.size
286 286
287 287 def parseindex(self, data, inline):
288 288 s = self.size
289 289 index = []
290 290 nodemap = nodemaputil.NodeMap({nullid: nullrev})
291 291 n = off = 0
292 292 l = len(data)
293 293 while off + s <= l:
294 294 cur = data[off : off + s]
295 295 off += s
296 296 e = indexformatv0_unpack(cur)
297 297 # transform to revlogv1 format
298 298 e2 = (
299 299 offset_type(e[0], 0),
300 300 e[1],
301 301 -1,
302 302 e[2],
303 303 e[3],
304 304 nodemap.get(e[4], nullrev),
305 305 nodemap.get(e[5], nullrev),
306 306 e[6],
307 307 )
308 308 index.append(e2)
309 309 nodemap[e[6]] = n
310 310 n += 1
311 311
312 312 index = revlogoldindex(index)
313 313 return index, None
314 314
315 315 def packentry(self, entry, node, version, rev):
316 316 if gettype(entry[0]):
317 317 raise error.RevlogError(
318 318 _(b'index entry flags need revlog version 1')
319 319 )
320 320 e2 = (
321 321 getoffset(entry[0]),
322 322 entry[1],
323 323 entry[3],
324 324 entry[4],
325 325 node(entry[5]),
326 326 node(entry[6]),
327 327 entry[7],
328 328 )
329 329 return indexformatv0_pack(*e2)
330 330
331 331
332 332 # index ng:
333 333 # 6 bytes: offset
334 334 # 2 bytes: flags
335 335 # 4 bytes: compressed length
336 336 # 4 bytes: uncompressed length
337 337 # 4 bytes: base rev
338 338 # 4 bytes: link rev
339 339 # 4 bytes: parent 1 rev
340 340 # 4 bytes: parent 2 rev
341 341 # 32 bytes: nodeid
342 342 indexformatng = struct.Struct(b">Qiiiiii20s12x")
343 343 indexformatng_pack = indexformatng.pack
344 344 versionformat = struct.Struct(b">I")
345 345 versionformat_pack = versionformat.pack
346 346 versionformat_unpack = versionformat.unpack
347 347
348 348 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
349 349 # signed integer)
350 350 _maxentrysize = 0x7FFFFFFF
351 351
352 352
353 353 class revlogio(object):
354 354 def __init__(self):
355 355 self.size = indexformatng.size
356 356
357 357 def parseindex(self, data, inline):
358 358 # call the C implementation to parse the index data
359 359 index, cache = parsers.parse_index2(data, inline)
360 360 return index, cache
361 361
362 362 def packentry(self, entry, node, version, rev):
363 363 p = indexformatng_pack(*entry)
364 364 if rev == 0:
365 365 p = versionformat_pack(version) + p[4:]
366 366 return p
367 367
368 368
369 369 indexformatv2 = struct.Struct(pureparsers.Index2Mixin.index_format)
370 370 indexformatv2_pack = indexformatv2.pack
371 371
372 372
373 373 class revlogv2io(object):
374 374 def __init__(self):
375 375 self.size = indexformatv2.size
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index2(data, inline, revlogv2=True)
379 379 return index, cache
380 380
381 381 def packentry(self, entry, node, version, rev):
382 382 p = indexformatv2_pack(*entry)
383 383 if rev == 0:
384 384 p = versionformat_pack(version) + p[4:]
385 385 return p
386 386
387 387
388 388 NodemapRevlogIO = None
389 389
390 390 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
391 391
392 392 class NodemapRevlogIO(revlogio):
393 393 """A debug oriented IO class that return a PersistentNodeMapIndexObject
394 394
395 395 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
396 396 """
397 397
398 398 def parseindex(self, data, inline):
399 399 index, cache = parsers.parse_index_devel_nodemap(data, inline)
400 400 return index, cache
401 401
402 402
403 403 class rustrevlogio(revlogio):
404 404 def parseindex(self, data, inline):
405 405 index, cache = super(rustrevlogio, self).parseindex(data, inline)
406 406 return rustrevlog.MixedIndex(index), cache
407 407
408 408
409 409 class revlog(object):
410 410 """
411 411 the underlying revision storage object
412 412
413 413 A revlog consists of two parts, an index and the revision data.
414 414
415 415 The index is a file with a fixed record size containing
416 416 information on each revision, including its nodeid (hash), the
417 417 nodeids of its parents, the position and offset of its data within
418 418 the data file, and the revision it's based on. Finally, each entry
419 419 contains a linkrev entry that can serve as a pointer to external
420 420 data.
421 421
422 422 The revision data itself is a linear collection of data chunks.
423 423 Each chunk represents a revision and is usually represented as a
424 424 delta against the previous chunk. To bound lookup time, runs of
425 425 deltas are limited to about 2 times the length of the original
426 426 version data. This makes retrieval of a version proportional to
427 427 its size, or O(1) relative to the number of revisions.
428 428
429 429 Both pieces of the revlog are written to in an append-only
430 430 fashion, which means we never need to rewrite a file to insert or
431 431 remove data, and can use some simple techniques to avoid the need
432 432 for locking while reading.
433 433
434 434 If checkambig, indexfile is opened with checkambig=True at
435 435 writing, to avoid file stat ambiguity.
436 436
437 437 If mmaplargeindex is True, and an mmapindexthreshold is set, the
438 438 index will be mmapped rather than read if it is larger than the
439 439 configured threshold.
440 440
441 441 If censorable is True, the revlog can have censored revisions.
442 442
443 443 If `upperboundcomp` is not None, this is the expected maximal gain from
444 444 compression for the data content.
445 445
446 446 `concurrencychecker` is an optional function that receives 3 arguments: a
447 447 file handle, a filename, and an expected position. It should check whether
448 448 the current position in the file handle is valid, and log/warn/fail (by
449 449 raising).
450 450 """
451 451
452 452 _flagserrorclass = error.RevlogError
453 453
454 454 def __init__(
455 455 self,
456 456 opener,
457 457 indexfile,
458 458 datafile=None,
459 459 checkambig=False,
460 460 mmaplargeindex=False,
461 461 censorable=False,
462 462 upperboundcomp=None,
463 463 persistentnodemap=False,
464 464 concurrencychecker=None,
465 465 ):
466 466 """
467 467 create a revlog object
468 468
469 469 opener is a function that abstracts the file opening operation
470 470 and can be used to implement COW semantics or the like.
471 471
472 472 """
473 473 self.upperboundcomp = upperboundcomp
474 474 self.indexfile = indexfile
475 475 self.datafile = datafile or (indexfile[:-2] + b".d")
476 476 self.nodemap_file = None
477 477 if persistentnodemap:
478 478 self.nodemap_file = nodemaputil.get_nodemap_file(
479 479 opener, self.indexfile
480 480 )
481 481
482 482 self.opener = opener
483 483 # When True, indexfile is opened with checkambig=True at writing, to
484 484 # avoid file stat ambiguity.
485 485 self._checkambig = checkambig
486 486 self._mmaplargeindex = mmaplargeindex
487 487 self._censorable = censorable
488 488 # 3-tuple of (node, rev, text) for a raw revision.
489 489 self._revisioncache = None
490 490 # Maps rev to chain base rev.
491 491 self._chainbasecache = util.lrucachedict(100)
492 492 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
493 493 self._chunkcache = (0, b'')
494 494 # How much data to read and cache into the raw revlog data cache.
495 495 self._chunkcachesize = 65536
496 496 self._maxchainlen = None
497 497 self._deltabothparents = True
498 498 self.index = None
499 499 self._nodemap_docket = None
500 500 # Mapping of partial identifiers to full nodes.
501 501 self._pcache = {}
502 502 # Mapping of revision integer to full node.
503 503 self._compengine = b'zlib'
504 504 self._compengineopts = {}
505 505 self._maxdeltachainspan = -1
506 506 self._withsparseread = False
507 507 self._sparserevlog = False
508 508 self._srdensitythreshold = 0.50
509 509 self._srmingapsize = 262144
510 510
511 511 # Make copy of flag processors so each revlog instance can support
512 512 # custom flags.
513 513 self._flagprocessors = dict(flagutil.flagprocessors)
514 514
515 515 # 2-tuple of file handles being used for active writing.
516 516 self._writinghandles = None
517 517
518 518 self._loadindex()
519 519
520 520 self._concurrencychecker = concurrencychecker
521 521
522 522 def _loadindex(self):
523 523 mmapindexthreshold = None
524 524 opts = self.opener.options
525 525
526 526 if b'revlogv2' in opts:
527 527 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
528 528 elif b'revlogv1' in opts:
529 529 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
530 530 if b'generaldelta' in opts:
531 531 newversionflags |= FLAG_GENERALDELTA
532 532 elif b'revlogv0' in self.opener.options:
533 533 newversionflags = REVLOGV0
534 534 else:
535 535 newversionflags = REVLOG_DEFAULT_VERSION
536 536
537 537 if b'chunkcachesize' in opts:
538 538 self._chunkcachesize = opts[b'chunkcachesize']
539 539 if b'maxchainlen' in opts:
540 540 self._maxchainlen = opts[b'maxchainlen']
541 541 if b'deltabothparents' in opts:
542 542 self._deltabothparents = opts[b'deltabothparents']
543 543 self._lazydelta = bool(opts.get(b'lazydelta', True))
544 544 self._lazydeltabase = False
545 545 if self._lazydelta:
546 546 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
547 547 if b'compengine' in opts:
548 548 self._compengine = opts[b'compengine']
549 549 if b'zlib.level' in opts:
550 550 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
551 551 if b'zstd.level' in opts:
552 552 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
553 553 if b'maxdeltachainspan' in opts:
554 554 self._maxdeltachainspan = opts[b'maxdeltachainspan']
555 555 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
556 556 mmapindexthreshold = opts[b'mmapindexthreshold']
557 557 self.hassidedata = bool(opts.get(b'side-data', False))
558 558 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
559 559 withsparseread = bool(opts.get(b'with-sparse-read', False))
560 560 # sparse-revlog forces sparse-read
561 561 self._withsparseread = self._sparserevlog or withsparseread
562 562 if b'sparse-read-density-threshold' in opts:
563 563 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
564 564 if b'sparse-read-min-gap-size' in opts:
565 565 self._srmingapsize = opts[b'sparse-read-min-gap-size']
566 566 if opts.get(b'enableellipsis'):
567 567 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
568 568
569 569 # revlog v0 doesn't have flag processors
570 570 for flag, processor in pycompat.iteritems(
571 571 opts.get(b'flagprocessors', {})
572 572 ):
573 573 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
574 574
575 575 if self._chunkcachesize <= 0:
576 576 raise error.RevlogError(
577 577 _(b'revlog chunk cache size %r is not greater than 0')
578 578 % self._chunkcachesize
579 579 )
580 580 elif self._chunkcachesize & (self._chunkcachesize - 1):
581 581 raise error.RevlogError(
582 582 _(b'revlog chunk cache size %r is not a power of 2')
583 583 % self._chunkcachesize
584 584 )
585 585
586 586 indexdata = b''
587 587 self._initempty = True
588 588 try:
589 589 with self._indexfp() as f:
590 590 if (
591 591 mmapindexthreshold is not None
592 592 and self.opener.fstat(f).st_size >= mmapindexthreshold
593 593 ):
594 594 # TODO: should .close() to release resources without
595 595 # relying on Python GC
596 596 indexdata = util.buffer(util.mmapread(f))
597 597 else:
598 598 indexdata = f.read()
599 599 if len(indexdata) > 0:
600 600 versionflags = versionformat_unpack(indexdata[:4])[0]
601 601 self._initempty = False
602 602 else:
603 603 versionflags = newversionflags
604 604 except IOError as inst:
605 605 if inst.errno != errno.ENOENT:
606 606 raise
607 607
608 608 versionflags = newversionflags
609 609
610 610 self.version = versionflags
611 611
612 612 flags = versionflags & ~0xFFFF
613 613 fmt = versionflags & 0xFFFF
614 614
615 615 if fmt == REVLOGV0:
616 616 if flags:
617 617 raise error.RevlogError(
618 618 _(b'unknown flags (%#04x) in version %d revlog %s')
619 619 % (flags >> 16, fmt, self.indexfile)
620 620 )
621 621
622 622 self._inline = False
623 623 self._generaldelta = False
624 624
625 625 elif fmt == REVLOGV1:
626 626 if flags & ~REVLOGV1_FLAGS:
627 627 raise error.RevlogError(
628 628 _(b'unknown flags (%#04x) in version %d revlog %s')
629 629 % (flags >> 16, fmt, self.indexfile)
630 630 )
631 631
632 632 self._inline = versionflags & FLAG_INLINE_DATA
633 633 self._generaldelta = versionflags & FLAG_GENERALDELTA
634 634
635 635 elif fmt == REVLOGV2:
636 636 if flags & ~REVLOGV2_FLAGS:
637 637 raise error.RevlogError(
638 638 _(b'unknown flags (%#04x) in version %d revlog %s')
639 639 % (flags >> 16, fmt, self.indexfile)
640 640 )
641 641
642 642 self._inline = versionflags & FLAG_INLINE_DATA
643 643 # generaldelta implied by version 2 revlogs.
644 644 self._generaldelta = True
645 645
646 646 else:
647 647 raise error.RevlogError(
648 648 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
649 649 )
650 650 # sparse-revlog can't be on without general-delta (issue6056)
651 651 if not self._generaldelta:
652 652 self._sparserevlog = False
653 653
654 654 self._storedeltachains = True
655 655
656 656 devel_nodemap = (
657 657 self.nodemap_file
658 658 and opts.get(b'devel-force-nodemap', False)
659 659 and NodemapRevlogIO is not None
660 660 )
661 661
662 662 use_rust_index = False
663 663 if rustrevlog is not None:
664 664 if self.nodemap_file is not None:
665 665 use_rust_index = True
666 666 else:
667 667 use_rust_index = self.opener.options.get(b'rust.index')
668 668
669 669 self._io = revlogio()
670 670 if self.version == REVLOGV0:
671 671 self._io = revlogoldio()
672 672 elif fmt == REVLOGV2:
673 673 self._io = revlogv2io()
674 674 elif devel_nodemap:
675 675 self._io = NodemapRevlogIO()
676 676 elif use_rust_index:
677 677 self._io = rustrevlogio()
678 678 try:
679 679 d = self._io.parseindex(indexdata, self._inline)
680 680 index, _chunkcache = d
681 681 use_nodemap = (
682 682 not self._inline
683 683 and self.nodemap_file is not None
684 684 and util.safehasattr(index, 'update_nodemap_data')
685 685 )
686 686 if use_nodemap:
687 687 nodemap_data = nodemaputil.persisted_data(self)
688 688 if nodemap_data is not None:
689 689 docket = nodemap_data[0]
690 690 if (
691 691 len(d[0]) > docket.tip_rev
692 692 and d[0][docket.tip_rev][7] == docket.tip_node
693 693 ):
694 694 # no changelog tampering
695 695 self._nodemap_docket = docket
696 696 index.update_nodemap_data(*nodemap_data)
697 697 except (ValueError, IndexError):
698 698 raise error.RevlogError(
699 699 _(b"index %s is corrupted") % self.indexfile
700 700 )
701 701 self.index, self._chunkcache = d
702 702 if not self._chunkcache:
703 703 self._chunkclear()
704 704 # revnum -> (chain-length, sum-delta-length)
705 705 self._chaininfocache = util.lrucachedict(500)
706 706 # revlog header -> revlog compressor
707 707 self._decompressors = {}
708 708
709 709 @util.propertycache
710 710 def _compressor(self):
711 711 engine = util.compengines[self._compengine]
712 712 return engine.revlogcompressor(self._compengineopts)
713 713
714 714 def _indexfp(self, mode=b'r'):
715 715 """file object for the revlog's index file"""
716 716 args = {'mode': mode}
717 717 if mode != b'r':
718 718 args['checkambig'] = self._checkambig
719 719 if mode == b'w':
720 720 args['atomictemp'] = True
721 721 return self.opener(self.indexfile, **args)
722 722
723 723 def _datafp(self, mode=b'r'):
724 724 """file object for the revlog's data file"""
725 725 return self.opener(self.datafile, mode=mode)
726 726
727 727 @contextlib.contextmanager
728 728 def _datareadfp(self, existingfp=None):
729 729 """file object suitable to read data"""
730 730 # Use explicit file handle, if given.
731 731 if existingfp is not None:
732 732 yield existingfp
733 733
734 734 # Use a file handle being actively used for writes, if available.
735 735 # There is some danger to doing this because reads will seek the
736 736 # file. However, _writeentry() performs a SEEK_END before all writes,
737 737 # so we should be safe.
738 738 elif self._writinghandles:
739 739 if self._inline:
740 740 yield self._writinghandles[0]
741 741 else:
742 742 yield self._writinghandles[1]
743 743
744 744 # Otherwise open a new file handle.
745 745 else:
746 746 if self._inline:
747 747 func = self._indexfp
748 748 else:
749 749 func = self._datafp
750 750 with func() as fp:
751 751 yield fp
752 752
753 753 def tiprev(self):
754 754 return len(self.index) - 1
755 755
756 756 def tip(self):
757 757 return self.node(self.tiprev())
758 758
759 759 def __contains__(self, rev):
760 760 return 0 <= rev < len(self)
761 761
762 762 def __len__(self):
763 763 return len(self.index)
764 764
765 765 def __iter__(self):
766 766 return iter(pycompat.xrange(len(self)))
767 767
768 768 def revs(self, start=0, stop=None):
769 769 """iterate over all rev in this revlog (from start to stop)"""
770 770 return storageutil.iterrevs(len(self), start=start, stop=stop)
771 771
772 772 @property
773 773 def nodemap(self):
774 774 msg = (
775 775 b"revlog.nodemap is deprecated, "
776 776 b"use revlog.index.[has_node|rev|get_rev]"
777 777 )
778 778 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
779 779 return self.index.nodemap
780 780
781 781 @property
782 782 def _nodecache(self):
783 783 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
784 784 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
785 785 return self.index.nodemap
786 786
787 787 def hasnode(self, node):
788 788 try:
789 789 self.rev(node)
790 790 return True
791 791 except KeyError:
792 792 return False
793 793
794 794 def candelta(self, baserev, rev):
795 795 """whether two revisions (baserev, rev) can be delta-ed or not"""
796 796 # Disable delta if either rev requires a content-changing flag
797 797 # processor (ex. LFS). This is because such flag processor can alter
798 798 # the rawtext content that the delta will be based on, and two clients
799 799 # could have a same revlog node with different flags (i.e. different
800 800 # rawtext contents) and the delta could be incompatible.
801 801 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
802 802 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
803 803 ):
804 804 return False
805 805 return True
806 806
807 807 def update_caches(self, transaction):
808 808 if self.nodemap_file is not None:
809 809 if transaction is None:
810 810 nodemaputil.update_persistent_nodemap(self)
811 811 else:
812 812 nodemaputil.setup_persistent_nodemap(transaction, self)
813 813
814 814 def clearcaches(self):
815 815 self._revisioncache = None
816 816 self._chainbasecache.clear()
817 817 self._chunkcache = (0, b'')
818 818 self._pcache = {}
819 819 self._nodemap_docket = None
820 820 self.index.clearcaches()
821 821 # The python code is the one responsible for validating the docket, we
822 822 # end up having to refresh it here.
823 823 use_nodemap = (
824 824 not self._inline
825 825 and self.nodemap_file is not None
826 826 and util.safehasattr(self.index, 'update_nodemap_data')
827 827 )
828 828 if use_nodemap:
829 829 nodemap_data = nodemaputil.persisted_data(self)
830 830 if nodemap_data is not None:
831 831 self._nodemap_docket = nodemap_data[0]
832 832 self.index.update_nodemap_data(*nodemap_data)
833 833
834 834 def rev(self, node):
835 835 try:
836 836 return self.index.rev(node)
837 837 except TypeError:
838 838 raise
839 839 except error.RevlogError:
840 840 # parsers.c radix tree lookup failed
841 841 if node == wdirid or node in wdirfilenodeids:
842 842 raise error.WdirUnsupported
843 843 raise error.LookupError(node, self.indexfile, _(b'no node'))
844 844
845 845 # Accessors for index entries.
846 846
847 847 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
848 848 # are flags.
849 849 def start(self, rev):
850 850 return int(self.index[rev][0] >> 16)
851 851
852 852 def flags(self, rev):
853 853 return self.index[rev][0] & 0xFFFF
854 854
855 855 def length(self, rev):
856 856 return self.index[rev][1]
857 857
858 858 def sidedata_length(self, rev):
859 859 if self.version & 0xFFFF != REVLOGV2:
860 860 return 0
861 861 return self.index[rev][9]
862 862
863 863 def rawsize(self, rev):
864 864 """return the length of the uncompressed text for a given revision"""
865 865 l = self.index[rev][2]
866 866 if l >= 0:
867 867 return l
868 868
869 869 t = self.rawdata(rev)
870 870 return len(t)
871 871
872 872 def size(self, rev):
873 873 """length of non-raw text (processed by a "read" flag processor)"""
874 874 # fast path: if no "read" flag processor could change the content,
875 875 # size is rawsize. note: ELLIPSIS is known to not change the content.
876 876 flags = self.flags(rev)
877 877 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
878 878 return self.rawsize(rev)
879 879
880 880 return len(self.revision(rev, raw=False))
881 881
882 882 def chainbase(self, rev):
883 883 base = self._chainbasecache.get(rev)
884 884 if base is not None:
885 885 return base
886 886
887 887 index = self.index
888 888 iterrev = rev
889 889 base = index[iterrev][3]
890 890 while base != iterrev:
891 891 iterrev = base
892 892 base = index[iterrev][3]
893 893
894 894 self._chainbasecache[rev] = base
895 895 return base
896 896
897 897 def linkrev(self, rev):
898 898 return self.index[rev][4]
899 899
900 900 def parentrevs(self, rev):
901 901 try:
902 902 entry = self.index[rev]
903 903 except IndexError:
904 904 if rev == wdirrev:
905 905 raise error.WdirUnsupported
906 906 raise
907 907
908 908 return entry[5], entry[6]
909 909
910 910 # fast parentrevs(rev) where rev isn't filtered
911 911 _uncheckedparentrevs = parentrevs
912 912
913 913 def node(self, rev):
914 914 try:
915 915 return self.index[rev][7]
916 916 except IndexError:
917 917 if rev == wdirrev:
918 918 raise error.WdirUnsupported
919 919 raise
920 920
921 921 # Derived from index values.
922 922
923 923 def end(self, rev):
924 924 return self.start(rev) + self.length(rev)
925 925
926 926 def parents(self, node):
927 927 i = self.index
928 928 d = i[self.rev(node)]
929 929 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
930 930
931 931 def chainlen(self, rev):
932 932 return self._chaininfo(rev)[0]
933 933
934 934 def _chaininfo(self, rev):
935 935 chaininfocache = self._chaininfocache
936 936 if rev in chaininfocache:
937 937 return chaininfocache[rev]
938 938 index = self.index
939 939 generaldelta = self._generaldelta
940 940 iterrev = rev
941 941 e = index[iterrev]
942 942 clen = 0
943 943 compresseddeltalen = 0
944 944 while iterrev != e[3]:
945 945 clen += 1
946 946 compresseddeltalen += e[1]
947 947 if generaldelta:
948 948 iterrev = e[3]
949 949 else:
950 950 iterrev -= 1
951 951 if iterrev in chaininfocache:
952 952 t = chaininfocache[iterrev]
953 953 clen += t[0]
954 954 compresseddeltalen += t[1]
955 955 break
956 956 e = index[iterrev]
957 957 else:
958 958 # Add text length of base since decompressing that also takes
959 959 # work. For cache hits the length is already included.
960 960 compresseddeltalen += e[1]
961 961 r = (clen, compresseddeltalen)
962 962 chaininfocache[rev] = r
963 963 return r
964 964
965 965 def _deltachain(self, rev, stoprev=None):
966 966 """Obtain the delta chain for a revision.
967 967
968 968 ``stoprev`` specifies a revision to stop at. If not specified, we
969 969 stop at the base of the chain.
970 970
971 971 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
972 972 revs in ascending order and ``stopped`` is a bool indicating whether
973 973 ``stoprev`` was hit.
974 974 """
975 975 # Try C implementation.
976 976 try:
977 977 return self.index.deltachain(rev, stoprev, self._generaldelta)
978 978 except AttributeError:
979 979 pass
980 980
981 981 chain = []
982 982
983 983 # Alias to prevent attribute lookup in tight loop.
984 984 index = self.index
985 985 generaldelta = self._generaldelta
986 986
987 987 iterrev = rev
988 988 e = index[iterrev]
989 989 while iterrev != e[3] and iterrev != stoprev:
990 990 chain.append(iterrev)
991 991 if generaldelta:
992 992 iterrev = e[3]
993 993 else:
994 994 iterrev -= 1
995 995 e = index[iterrev]
996 996
997 997 if iterrev == stoprev:
998 998 stopped = True
999 999 else:
1000 1000 chain.append(iterrev)
1001 1001 stopped = False
1002 1002
1003 1003 chain.reverse()
1004 1004 return chain, stopped
1005 1005
1006 1006 def ancestors(self, revs, stoprev=0, inclusive=False):
1007 1007 """Generate the ancestors of 'revs' in reverse revision order.
1008 1008 Does not generate revs lower than stoprev.
1009 1009
1010 1010 See the documentation for ancestor.lazyancestors for more details."""
1011 1011
1012 1012 # first, make sure start revisions aren't filtered
1013 1013 revs = list(revs)
1014 1014 checkrev = self.node
1015 1015 for r in revs:
1016 1016 checkrev(r)
1017 1017 # and we're sure ancestors aren't filtered as well
1018 1018
1019 1019 if rustancestor is not None:
1020 1020 lazyancestors = rustancestor.LazyAncestors
1021 1021 arg = self.index
1022 1022 else:
1023 1023 lazyancestors = ancestor.lazyancestors
1024 1024 arg = self._uncheckedparentrevs
1025 1025 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
1026 1026
1027 1027 def descendants(self, revs):
1028 1028 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
1029 1029
1030 1030 def findcommonmissing(self, common=None, heads=None):
1031 1031 """Return a tuple of the ancestors of common and the ancestors of heads
1032 1032 that are not ancestors of common. In revset terminology, we return the
1033 1033 tuple:
1034 1034
1035 1035 ::common, (::heads) - (::common)
1036 1036
1037 1037 The list is sorted by revision number, meaning it is
1038 1038 topologically sorted.
1039 1039
1040 1040 'heads' and 'common' are both lists of node IDs. If heads is
1041 1041 not supplied, uses all of the revlog's heads. If common is not
1042 1042 supplied, uses nullid."""
1043 1043 if common is None:
1044 1044 common = [nullid]
1045 1045 if heads is None:
1046 1046 heads = self.heads()
1047 1047
1048 1048 common = [self.rev(n) for n in common]
1049 1049 heads = [self.rev(n) for n in heads]
1050 1050
1051 1051 # we want the ancestors, but inclusive
1052 1052 class lazyset(object):
1053 1053 def __init__(self, lazyvalues):
1054 1054 self.addedvalues = set()
1055 1055 self.lazyvalues = lazyvalues
1056 1056
1057 1057 def __contains__(self, value):
1058 1058 return value in self.addedvalues or value in self.lazyvalues
1059 1059
1060 1060 def __iter__(self):
1061 1061 added = self.addedvalues
1062 1062 for r in added:
1063 1063 yield r
1064 1064 for r in self.lazyvalues:
1065 1065 if not r in added:
1066 1066 yield r
1067 1067
1068 1068 def add(self, value):
1069 1069 self.addedvalues.add(value)
1070 1070
1071 1071 def update(self, values):
1072 1072 self.addedvalues.update(values)
1073 1073
1074 1074 has = lazyset(self.ancestors(common))
1075 1075 has.add(nullrev)
1076 1076 has.update(common)
1077 1077
1078 1078 # take all ancestors from heads that aren't in has
1079 1079 missing = set()
1080 1080 visit = collections.deque(r for r in heads if r not in has)
1081 1081 while visit:
1082 1082 r = visit.popleft()
1083 1083 if r in missing:
1084 1084 continue
1085 1085 else:
1086 1086 missing.add(r)
1087 1087 for p in self.parentrevs(r):
1088 1088 if p not in has:
1089 1089 visit.append(p)
1090 1090 missing = list(missing)
1091 1091 missing.sort()
1092 1092 return has, [self.node(miss) for miss in missing]
1093 1093
1094 1094 def incrementalmissingrevs(self, common=None):
1095 1095 """Return an object that can be used to incrementally compute the
1096 1096 revision numbers of the ancestors of arbitrary sets that are not
1097 1097 ancestors of common. This is an ancestor.incrementalmissingancestors
1098 1098 object.
1099 1099
1100 1100 'common' is a list of revision numbers. If common is not supplied, uses
1101 1101 nullrev.
1102 1102 """
1103 1103 if common is None:
1104 1104 common = [nullrev]
1105 1105
1106 1106 if rustancestor is not None:
1107 1107 return rustancestor.MissingAncestors(self.index, common)
1108 1108 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1109 1109
1110 1110 def findmissingrevs(self, common=None, heads=None):
1111 1111 """Return the revision numbers of the ancestors of heads that
1112 1112 are not ancestors of common.
1113 1113
1114 1114 More specifically, return a list of revision numbers corresponding to
1115 1115 nodes N such that every N satisfies the following constraints:
1116 1116
1117 1117 1. N is an ancestor of some node in 'heads'
1118 1118 2. N is not an ancestor of any node in 'common'
1119 1119
1120 1120 The list is sorted by revision number, meaning it is
1121 1121 topologically sorted.
1122 1122
1123 1123 'heads' and 'common' are both lists of revision numbers. If heads is
1124 1124 not supplied, uses all of the revlog's heads. If common is not
1125 1125 supplied, uses nullid."""
1126 1126 if common is None:
1127 1127 common = [nullrev]
1128 1128 if heads is None:
1129 1129 heads = self.headrevs()
1130 1130
1131 1131 inc = self.incrementalmissingrevs(common=common)
1132 1132 return inc.missingancestors(heads)
1133 1133
1134 1134 def findmissing(self, common=None, heads=None):
1135 1135 """Return the ancestors of heads that are not ancestors of common.
1136 1136
1137 1137 More specifically, return a list of nodes N such that every N
1138 1138 satisfies the following constraints:
1139 1139
1140 1140 1. N is an ancestor of some node in 'heads'
1141 1141 2. N is not an ancestor of any node in 'common'
1142 1142
1143 1143 The list is sorted by revision number, meaning it is
1144 1144 topologically sorted.
1145 1145
1146 1146 'heads' and 'common' are both lists of node IDs. If heads is
1147 1147 not supplied, uses all of the revlog's heads. If common is not
1148 1148 supplied, uses nullid."""
1149 1149 if common is None:
1150 1150 common = [nullid]
1151 1151 if heads is None:
1152 1152 heads = self.heads()
1153 1153
1154 1154 common = [self.rev(n) for n in common]
1155 1155 heads = [self.rev(n) for n in heads]
1156 1156
1157 1157 inc = self.incrementalmissingrevs(common=common)
1158 1158 return [self.node(r) for r in inc.missingancestors(heads)]
1159 1159
1160 1160 def nodesbetween(self, roots=None, heads=None):
1161 1161 """Return a topological path from 'roots' to 'heads'.
1162 1162
1163 1163 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1164 1164 topologically sorted list of all nodes N that satisfy both of
1165 1165 these constraints:
1166 1166
1167 1167 1. N is a descendant of some node in 'roots'
1168 1168 2. N is an ancestor of some node in 'heads'
1169 1169
1170 1170 Every node is considered to be both a descendant and an ancestor
1171 1171 of itself, so every reachable node in 'roots' and 'heads' will be
1172 1172 included in 'nodes'.
1173 1173
1174 1174 'outroots' is the list of reachable nodes in 'roots', i.e., the
1175 1175 subset of 'roots' that is returned in 'nodes'. Likewise,
1176 1176 'outheads' is the subset of 'heads' that is also in 'nodes'.
1177 1177
1178 1178 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1179 1179 unspecified, uses nullid as the only root. If 'heads' is
1180 1180 unspecified, uses list of all of the revlog's heads."""
1181 1181 nonodes = ([], [], [])
1182 1182 if roots is not None:
1183 1183 roots = list(roots)
1184 1184 if not roots:
1185 1185 return nonodes
1186 1186 lowestrev = min([self.rev(n) for n in roots])
1187 1187 else:
1188 1188 roots = [nullid] # Everybody's a descendant of nullid
1189 1189 lowestrev = nullrev
1190 1190 if (lowestrev == nullrev) and (heads is None):
1191 1191 # We want _all_ the nodes!
1192 1192 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1193 1193 if heads is None:
1194 1194 # All nodes are ancestors, so the latest ancestor is the last
1195 1195 # node.
1196 1196 highestrev = len(self) - 1
1197 1197 # Set ancestors to None to signal that every node is an ancestor.
1198 1198 ancestors = None
1199 1199 # Set heads to an empty dictionary for later discovery of heads
1200 1200 heads = {}
1201 1201 else:
1202 1202 heads = list(heads)
1203 1203 if not heads:
1204 1204 return nonodes
1205 1205 ancestors = set()
1206 1206 # Turn heads into a dictionary so we can remove 'fake' heads.
1207 1207 # Also, later we will be using it to filter out the heads we can't
1208 1208 # find from roots.
1209 1209 heads = dict.fromkeys(heads, False)
1210 1210 # Start at the top and keep marking parents until we're done.
1211 1211 nodestotag = set(heads)
1212 1212 # Remember where the top was so we can use it as a limit later.
1213 1213 highestrev = max([self.rev(n) for n in nodestotag])
1214 1214 while nodestotag:
1215 1215 # grab a node to tag
1216 1216 n = nodestotag.pop()
1217 1217 # Never tag nullid
1218 1218 if n == nullid:
1219 1219 continue
1220 1220 # A node's revision number represents its place in a
1221 1221 # topologically sorted list of nodes.
1222 1222 r = self.rev(n)
1223 1223 if r >= lowestrev:
1224 1224 if n not in ancestors:
1225 1225 # If we are possibly a descendant of one of the roots
1226 1226 # and we haven't already been marked as an ancestor
1227 1227 ancestors.add(n) # Mark as ancestor
1228 1228 # Add non-nullid parents to list of nodes to tag.
1229 1229 nodestotag.update(
1230 1230 [p for p in self.parents(n) if p != nullid]
1231 1231 )
1232 1232 elif n in heads: # We've seen it before, is it a fake head?
1233 1233 # So it is, real heads should not be the ancestors of
1234 1234 # any other heads.
1235 1235 heads.pop(n)
1236 1236 if not ancestors:
1237 1237 return nonodes
1238 1238 # Now that we have our set of ancestors, we want to remove any
1239 1239 # roots that are not ancestors.
1240 1240
1241 1241 # If one of the roots was nullid, everything is included anyway.
1242 1242 if lowestrev > nullrev:
1243 1243 # But, since we weren't, let's recompute the lowest rev to not
1244 1244 # include roots that aren't ancestors.
1245 1245
1246 1246 # Filter out roots that aren't ancestors of heads
1247 1247 roots = [root for root in roots if root in ancestors]
1248 1248 # Recompute the lowest revision
1249 1249 if roots:
1250 1250 lowestrev = min([self.rev(root) for root in roots])
1251 1251 else:
1252 1252 # No more roots? Return empty list
1253 1253 return nonodes
1254 1254 else:
1255 1255 # We are descending from nullid, and don't need to care about
1256 1256 # any other roots.
1257 1257 lowestrev = nullrev
1258 1258 roots = [nullid]
1259 1259 # Transform our roots list into a set.
1260 1260 descendants = set(roots)
1261 1261 # Also, keep the original roots so we can filter out roots that aren't
1262 1262 # 'real' roots (i.e. are descended from other roots).
1263 1263 roots = descendants.copy()
1264 1264 # Our topologically sorted list of output nodes.
1265 1265 orderedout = []
1266 1266 # Don't start at nullid since we don't want nullid in our output list,
1267 1267 # and if nullid shows up in descendants, empty parents will look like
1268 1268 # they're descendants.
1269 1269 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1270 1270 n = self.node(r)
1271 1271 isdescendant = False
1272 1272 if lowestrev == nullrev: # Everybody is a descendant of nullid
1273 1273 isdescendant = True
1274 1274 elif n in descendants:
1275 1275 # n is already a descendant
1276 1276 isdescendant = True
1277 1277 # This check only needs to be done here because all the roots
1278 1278 # will start being marked is descendants before the loop.
1279 1279 if n in roots:
1280 1280 # If n was a root, check if it's a 'real' root.
1281 1281 p = tuple(self.parents(n))
1282 1282 # If any of its parents are descendants, it's not a root.
1283 1283 if (p[0] in descendants) or (p[1] in descendants):
1284 1284 roots.remove(n)
1285 1285 else:
1286 1286 p = tuple(self.parents(n))
1287 1287 # A node is a descendant if either of its parents are
1288 1288 # descendants. (We seeded the dependents list with the roots
1289 1289 # up there, remember?)
1290 1290 if (p[0] in descendants) or (p[1] in descendants):
1291 1291 descendants.add(n)
1292 1292 isdescendant = True
1293 1293 if isdescendant and ((ancestors is None) or (n in ancestors)):
1294 1294 # Only include nodes that are both descendants and ancestors.
1295 1295 orderedout.append(n)
1296 1296 if (ancestors is not None) and (n in heads):
1297 1297 # We're trying to figure out which heads are reachable
1298 1298 # from roots.
1299 1299 # Mark this head as having been reached
1300 1300 heads[n] = True
1301 1301 elif ancestors is None:
1302 1302 # Otherwise, we're trying to discover the heads.
1303 1303 # Assume this is a head because if it isn't, the next step
1304 1304 # will eventually remove it.
1305 1305 heads[n] = True
1306 1306 # But, obviously its parents aren't.
1307 1307 for p in self.parents(n):
1308 1308 heads.pop(p, None)
1309 1309 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1310 1310 roots = list(roots)
1311 1311 assert orderedout
1312 1312 assert roots
1313 1313 assert heads
1314 1314 return (orderedout, roots, heads)
1315 1315
1316 1316 def headrevs(self, revs=None):
1317 1317 if revs is None:
1318 1318 try:
1319 1319 return self.index.headrevs()
1320 1320 except AttributeError:
1321 1321 return self._headrevs()
1322 1322 if rustdagop is not None:
1323 1323 return rustdagop.headrevs(self.index, revs)
1324 1324 return dagop.headrevs(revs, self._uncheckedparentrevs)
1325 1325
1326 1326 def computephases(self, roots):
1327 1327 return self.index.computephasesmapsets(roots)
1328 1328
1329 1329 def _headrevs(self):
1330 1330 count = len(self)
1331 1331 if not count:
1332 1332 return [nullrev]
1333 1333 # we won't iter over filtered rev so nobody is a head at start
1334 1334 ishead = [0] * (count + 1)
1335 1335 index = self.index
1336 1336 for r in self:
1337 1337 ishead[r] = 1 # I may be an head
1338 1338 e = index[r]
1339 1339 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1340 1340 return [r for r, val in enumerate(ishead) if val]
1341 1341
1342 1342 def heads(self, start=None, stop=None):
1343 1343 """return the list of all nodes that have no children
1344 1344
1345 1345 if start is specified, only heads that are descendants of
1346 1346 start will be returned
1347 1347 if stop is specified, it will consider all the revs from stop
1348 1348 as if they had no children
1349 1349 """
1350 1350 if start is None and stop is None:
1351 1351 if not len(self):
1352 1352 return [nullid]
1353 1353 return [self.node(r) for r in self.headrevs()]
1354 1354
1355 1355 if start is None:
1356 1356 start = nullrev
1357 1357 else:
1358 1358 start = self.rev(start)
1359 1359
1360 1360 stoprevs = {self.rev(n) for n in stop or []}
1361 1361
1362 1362 revs = dagop.headrevssubset(
1363 1363 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1364 1364 )
1365 1365
1366 1366 return [self.node(rev) for rev in revs]
1367 1367
1368 1368 def children(self, node):
1369 1369 """find the children of a given node"""
1370 1370 c = []
1371 1371 p = self.rev(node)
1372 1372 for r in self.revs(start=p + 1):
1373 1373 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1374 1374 if prevs:
1375 1375 for pr in prevs:
1376 1376 if pr == p:
1377 1377 c.append(self.node(r))
1378 1378 elif p == nullrev:
1379 1379 c.append(self.node(r))
1380 1380 return c
1381 1381
1382 1382 def commonancestorsheads(self, a, b):
1383 1383 """calculate all the heads of the common ancestors of nodes a and b"""
1384 1384 a, b = self.rev(a), self.rev(b)
1385 1385 ancs = self._commonancestorsheads(a, b)
1386 1386 return pycompat.maplist(self.node, ancs)
1387 1387
1388 1388 def _commonancestorsheads(self, *revs):
1389 1389 """calculate all the heads of the common ancestors of revs"""
1390 1390 try:
1391 1391 ancs = self.index.commonancestorsheads(*revs)
1392 1392 except (AttributeError, OverflowError): # C implementation failed
1393 1393 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1394 1394 return ancs
1395 1395
1396 1396 def isancestor(self, a, b):
1397 1397 """return True if node a is an ancestor of node b
1398 1398
1399 1399 A revision is considered an ancestor of itself."""
1400 1400 a, b = self.rev(a), self.rev(b)
1401 1401 return self.isancestorrev(a, b)
1402 1402
1403 1403 def isancestorrev(self, a, b):
1404 1404 """return True if revision a is an ancestor of revision b
1405 1405
1406 1406 A revision is considered an ancestor of itself.
1407 1407
1408 1408 The implementation of this is trivial but the use of
1409 1409 reachableroots is not."""
1410 1410 if a == nullrev:
1411 1411 return True
1412 1412 elif a == b:
1413 1413 return True
1414 1414 elif a > b:
1415 1415 return False
1416 1416 return bool(self.reachableroots(a, [b], [a], includepath=False))
1417 1417
1418 1418 def reachableroots(self, minroot, heads, roots, includepath=False):
1419 1419 """return (heads(::(<roots> and <roots>::<heads>)))
1420 1420
1421 1421 If includepath is True, return (<roots>::<heads>)."""
1422 1422 try:
1423 1423 return self.index.reachableroots2(
1424 1424 minroot, heads, roots, includepath
1425 1425 )
1426 1426 except AttributeError:
1427 1427 return dagop._reachablerootspure(
1428 1428 self.parentrevs, minroot, roots, heads, includepath
1429 1429 )
1430 1430
1431 1431 def ancestor(self, a, b):
1432 1432 """calculate the "best" common ancestor of nodes a and b"""
1433 1433
1434 1434 a, b = self.rev(a), self.rev(b)
1435 1435 try:
1436 1436 ancs = self.index.ancestors(a, b)
1437 1437 except (AttributeError, OverflowError):
1438 1438 ancs = ancestor.ancestors(self.parentrevs, a, b)
1439 1439 if ancs:
1440 1440 # choose a consistent winner when there's a tie
1441 1441 return min(map(self.node, ancs))
1442 1442 return nullid
1443 1443
1444 1444 def _match(self, id):
1445 1445 if isinstance(id, int):
1446 1446 # rev
1447 1447 return self.node(id)
1448 1448 if len(id) == 20:
1449 1449 # possibly a binary node
1450 1450 # odds of a binary node being all hex in ASCII are 1 in 10**25
1451 1451 try:
1452 1452 node = id
1453 1453 self.rev(node) # quick search the index
1454 1454 return node
1455 1455 except error.LookupError:
1456 1456 pass # may be partial hex id
1457 1457 try:
1458 1458 # str(rev)
1459 1459 rev = int(id)
1460 1460 if b"%d" % rev != id:
1461 1461 raise ValueError
1462 1462 if rev < 0:
1463 1463 rev = len(self) + rev
1464 1464 if rev < 0 or rev >= len(self):
1465 1465 raise ValueError
1466 1466 return self.node(rev)
1467 1467 except (ValueError, OverflowError):
1468 1468 pass
1469 1469 if len(id) == 40:
1470 1470 try:
1471 1471 # a full hex nodeid?
1472 1472 node = bin(id)
1473 1473 self.rev(node)
1474 1474 return node
1475 1475 except (TypeError, error.LookupError):
1476 1476 pass
1477 1477
1478 1478 def _partialmatch(self, id):
1479 1479 # we don't care wdirfilenodeids as they should be always full hash
1480 1480 maybewdir = wdirhex.startswith(id)
1481 1481 try:
1482 1482 partial = self.index.partialmatch(id)
1483 1483 if partial and self.hasnode(partial):
1484 1484 if maybewdir:
1485 1485 # single 'ff...' match in radix tree, ambiguous with wdir
1486 1486 raise error.RevlogError
1487 1487 return partial
1488 1488 if maybewdir:
1489 1489 # no 'ff...' match in radix tree, wdir identified
1490 1490 raise error.WdirUnsupported
1491 1491 return None
1492 1492 except error.RevlogError:
1493 1493 # parsers.c radix tree lookup gave multiple matches
1494 1494 # fast path: for unfiltered changelog, radix tree is accurate
1495 1495 if not getattr(self, 'filteredrevs', None):
1496 1496 raise error.AmbiguousPrefixLookupError(
1497 1497 id, self.indexfile, _(b'ambiguous identifier')
1498 1498 )
1499 1499 # fall through to slow path that filters hidden revisions
1500 1500 except (AttributeError, ValueError):
1501 1501 # we are pure python, or key was too short to search radix tree
1502 1502 pass
1503 1503
1504 1504 if id in self._pcache:
1505 1505 return self._pcache[id]
1506 1506
1507 1507 if len(id) <= 40:
1508 1508 try:
1509 1509 # hex(node)[:...]
1510 1510 l = len(id) // 2 # grab an even number of digits
1511 1511 prefix = bin(id[: l * 2])
1512 1512 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1513 1513 nl = [
1514 1514 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1515 1515 ]
1516 1516 if nullhex.startswith(id):
1517 1517 nl.append(nullid)
1518 1518 if len(nl) > 0:
1519 1519 if len(nl) == 1 and not maybewdir:
1520 1520 self._pcache[id] = nl[0]
1521 1521 return nl[0]
1522 1522 raise error.AmbiguousPrefixLookupError(
1523 1523 id, self.indexfile, _(b'ambiguous identifier')
1524 1524 )
1525 1525 if maybewdir:
1526 1526 raise error.WdirUnsupported
1527 1527 return None
1528 1528 except TypeError:
1529 1529 pass
1530 1530
1531 1531 def lookup(self, id):
1532 1532 """locate a node based on:
1533 1533 - revision number or str(revision number)
1534 1534 - nodeid or subset of hex nodeid
1535 1535 """
1536 1536 n = self._match(id)
1537 1537 if n is not None:
1538 1538 return n
1539 1539 n = self._partialmatch(id)
1540 1540 if n:
1541 1541 return n
1542 1542
1543 1543 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1544 1544
1545 1545 def shortest(self, node, minlength=1):
1546 1546 """Find the shortest unambiguous prefix that matches node."""
1547 1547
1548 1548 def isvalid(prefix):
1549 1549 try:
1550 1550 matchednode = self._partialmatch(prefix)
1551 1551 except error.AmbiguousPrefixLookupError:
1552 1552 return False
1553 1553 except error.WdirUnsupported:
1554 1554 # single 'ff...' match
1555 1555 return True
1556 1556 if matchednode is None:
1557 1557 raise error.LookupError(node, self.indexfile, _(b'no node'))
1558 1558 return True
1559 1559
1560 1560 def maybewdir(prefix):
1561 1561 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1562 1562
1563 1563 hexnode = hex(node)
1564 1564
1565 1565 def disambiguate(hexnode, minlength):
1566 1566 """Disambiguate against wdirid."""
1567 1567 for length in range(minlength, len(hexnode) + 1):
1568 1568 prefix = hexnode[:length]
1569 1569 if not maybewdir(prefix):
1570 1570 return prefix
1571 1571
1572 1572 if not getattr(self, 'filteredrevs', None):
1573 1573 try:
1574 1574 length = max(self.index.shortest(node), minlength)
1575 1575 return disambiguate(hexnode, length)
1576 1576 except error.RevlogError:
1577 1577 if node != wdirid:
1578 1578 raise error.LookupError(node, self.indexfile, _(b'no node'))
1579 1579 except AttributeError:
1580 1580 # Fall through to pure code
1581 1581 pass
1582 1582
1583 1583 if node == wdirid:
1584 1584 for length in range(minlength, len(hexnode) + 1):
1585 1585 prefix = hexnode[:length]
1586 1586 if isvalid(prefix):
1587 1587 return prefix
1588 1588
1589 1589 for length in range(minlength, len(hexnode) + 1):
1590 1590 prefix = hexnode[:length]
1591 1591 if isvalid(prefix):
1592 1592 return disambiguate(hexnode, length)
1593 1593
1594 1594 def cmp(self, node, text):
1595 1595 """compare text with a given file revision
1596 1596
1597 1597 returns True if text is different than what is stored.
1598 1598 """
1599 1599 p1, p2 = self.parents(node)
1600 1600 return storageutil.hashrevisionsha1(text, p1, p2) != node
1601 1601
1602 1602 def _cachesegment(self, offset, data):
1603 1603 """Add a segment to the revlog cache.
1604 1604
1605 1605 Accepts an absolute offset and the data that is at that location.
1606 1606 """
1607 1607 o, d = self._chunkcache
1608 1608 # try to add to existing cache
1609 1609 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1610 1610 self._chunkcache = o, d + data
1611 1611 else:
1612 1612 self._chunkcache = offset, data
1613 1613
1614 1614 def _readsegment(self, offset, length, df=None):
1615 1615 """Load a segment of raw data from the revlog.
1616 1616
1617 1617 Accepts an absolute offset, length to read, and an optional existing
1618 1618 file handle to read from.
1619 1619
1620 1620 If an existing file handle is passed, it will be seeked and the
1621 1621 original seek position will NOT be restored.
1622 1622
1623 1623 Returns a str or buffer of raw byte data.
1624 1624
1625 1625 Raises if the requested number of bytes could not be read.
1626 1626 """
1627 1627 # Cache data both forward and backward around the requested
1628 1628 # data, in a fixed size window. This helps speed up operations
1629 1629 # involving reading the revlog backwards.
1630 1630 cachesize = self._chunkcachesize
1631 1631 realoffset = offset & ~(cachesize - 1)
1632 1632 reallength = (
1633 1633 (offset + length + cachesize) & ~(cachesize - 1)
1634 1634 ) - realoffset
1635 1635 with self._datareadfp(df) as df:
1636 1636 df.seek(realoffset)
1637 1637 d = df.read(reallength)
1638 1638
1639 1639 self._cachesegment(realoffset, d)
1640 1640 if offset != realoffset or reallength != length:
1641 1641 startoffset = offset - realoffset
1642 1642 if len(d) - startoffset < length:
1643 1643 raise error.RevlogError(
1644 1644 _(
1645 1645 b'partial read of revlog %s; expected %d bytes from '
1646 1646 b'offset %d, got %d'
1647 1647 )
1648 1648 % (
1649 1649 self.indexfile if self._inline else self.datafile,
1650 1650 length,
1651 1651 realoffset,
1652 1652 len(d) - startoffset,
1653 1653 )
1654 1654 )
1655 1655
1656 1656 return util.buffer(d, startoffset, length)
1657 1657
1658 1658 if len(d) < length:
1659 1659 raise error.RevlogError(
1660 1660 _(
1661 1661 b'partial read of revlog %s; expected %d bytes from offset '
1662 1662 b'%d, got %d'
1663 1663 )
1664 1664 % (
1665 1665 self.indexfile if self._inline else self.datafile,
1666 1666 length,
1667 1667 offset,
1668 1668 len(d),
1669 1669 )
1670 1670 )
1671 1671
1672 1672 return d
1673 1673
1674 1674 def _getsegment(self, offset, length, df=None):
1675 1675 """Obtain a segment of raw data from the revlog.
1676 1676
1677 1677 Accepts an absolute offset, length of bytes to obtain, and an
1678 1678 optional file handle to the already-opened revlog. If the file
1679 1679 handle is used, it's original seek position will not be preserved.
1680 1680
1681 1681 Requests for data may be returned from a cache.
1682 1682
1683 1683 Returns a str or a buffer instance of raw byte data.
1684 1684 """
1685 1685 o, d = self._chunkcache
1686 1686 l = len(d)
1687 1687
1688 1688 # is it in the cache?
1689 1689 cachestart = offset - o
1690 1690 cacheend = cachestart + length
1691 1691 if cachestart >= 0 and cacheend <= l:
1692 1692 if cachestart == 0 and cacheend == l:
1693 1693 return d # avoid a copy
1694 1694 return util.buffer(d, cachestart, cacheend - cachestart)
1695 1695
1696 1696 return self._readsegment(offset, length, df=df)
1697 1697
1698 1698 def _getsegmentforrevs(self, startrev, endrev, df=None):
1699 1699 """Obtain a segment of raw data corresponding to a range of revisions.
1700 1700
1701 1701 Accepts the start and end revisions and an optional already-open
1702 1702 file handle to be used for reading. If the file handle is read, its
1703 1703 seek position will not be preserved.
1704 1704
1705 1705 Requests for data may be satisfied by a cache.
1706 1706
1707 1707 Returns a 2-tuple of (offset, data) for the requested range of
1708 1708 revisions. Offset is the integer offset from the beginning of the
1709 1709 revlog and data is a str or buffer of the raw byte data.
1710 1710
1711 1711 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1712 1712 to determine where each revision's data begins and ends.
1713 1713 """
1714 1714 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1715 1715 # (functions are expensive).
1716 1716 index = self.index
1717 1717 istart = index[startrev]
1718 1718 start = int(istart[0] >> 16)
1719 1719 if startrev == endrev:
1720 1720 end = start + istart[1]
1721 1721 else:
1722 1722 iend = index[endrev]
1723 1723 end = int(iend[0] >> 16) + iend[1]
1724 1724
1725 1725 if self._inline:
1726 1726 start += (startrev + 1) * self._io.size
1727 1727 end += (endrev + 1) * self._io.size
1728 1728 length = end - start
1729 1729
1730 1730 return start, self._getsegment(start, length, df=df)
1731 1731
1732 1732 def _chunk(self, rev, df=None):
1733 1733 """Obtain a single decompressed chunk for a revision.
1734 1734
1735 1735 Accepts an integer revision and an optional already-open file handle
1736 1736 to be used for reading. If used, the seek position of the file will not
1737 1737 be preserved.
1738 1738
1739 1739 Returns a str holding uncompressed data for the requested revision.
1740 1740 """
1741 1741 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1742 1742
1743 1743 def _chunks(self, revs, df=None, targetsize=None):
1744 1744 """Obtain decompressed chunks for the specified revisions.
1745 1745
1746 1746 Accepts an iterable of numeric revisions that are assumed to be in
1747 1747 ascending order. Also accepts an optional already-open file handle
1748 1748 to be used for reading. If used, the seek position of the file will
1749 1749 not be preserved.
1750 1750
1751 1751 This function is similar to calling ``self._chunk()`` multiple times,
1752 1752 but is faster.
1753 1753
1754 1754 Returns a list with decompressed data for each requested revision.
1755 1755 """
1756 1756 if not revs:
1757 1757 return []
1758 1758 start = self.start
1759 1759 length = self.length
1760 1760 inline = self._inline
1761 1761 iosize = self._io.size
1762 1762 buffer = util.buffer
1763 1763
1764 1764 l = []
1765 1765 ladd = l.append
1766 1766
1767 1767 if not self._withsparseread:
1768 1768 slicedchunks = (revs,)
1769 1769 else:
1770 1770 slicedchunks = deltautil.slicechunk(
1771 1771 self, revs, targetsize=targetsize
1772 1772 )
1773 1773
1774 1774 for revschunk in slicedchunks:
1775 1775 firstrev = revschunk[0]
1776 1776 # Skip trailing revisions with empty diff
1777 1777 for lastrev in revschunk[::-1]:
1778 1778 if length(lastrev) != 0:
1779 1779 break
1780 1780
1781 1781 try:
1782 1782 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1783 1783 except OverflowError:
1784 1784 # issue4215 - we can't cache a run of chunks greater than
1785 1785 # 2G on Windows
1786 1786 return [self._chunk(rev, df=df) for rev in revschunk]
1787 1787
1788 1788 decomp = self.decompress
1789 1789 for rev in revschunk:
1790 1790 chunkstart = start(rev)
1791 1791 if inline:
1792 1792 chunkstart += (rev + 1) * iosize
1793 1793 chunklength = length(rev)
1794 1794 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1795 1795
1796 1796 return l
1797 1797
1798 1798 def _chunkclear(self):
1799 1799 """Clear the raw chunk cache."""
1800 1800 self._chunkcache = (0, b'')
1801 1801
1802 1802 def deltaparent(self, rev):
1803 1803 """return deltaparent of the given revision"""
1804 1804 base = self.index[rev][3]
1805 1805 if base == rev:
1806 1806 return nullrev
1807 1807 elif self._generaldelta:
1808 1808 return base
1809 1809 else:
1810 1810 return rev - 1
1811 1811
1812 1812 def issnapshot(self, rev):
1813 1813 """tells whether rev is a snapshot"""
1814 1814 if not self._sparserevlog:
1815 1815 return self.deltaparent(rev) == nullrev
1816 1816 elif util.safehasattr(self.index, b'issnapshot'):
1817 1817 # directly assign the method to cache the testing and access
1818 1818 self.issnapshot = self.index.issnapshot
1819 1819 return self.issnapshot(rev)
1820 1820 if rev == nullrev:
1821 1821 return True
1822 1822 entry = self.index[rev]
1823 1823 base = entry[3]
1824 1824 if base == rev:
1825 1825 return True
1826 1826 if base == nullrev:
1827 1827 return True
1828 1828 p1 = entry[5]
1829 1829 p2 = entry[6]
1830 1830 if base == p1 or base == p2:
1831 1831 return False
1832 1832 return self.issnapshot(base)
1833 1833
1834 1834 def snapshotdepth(self, rev):
1835 1835 """number of snapshot in the chain before this one"""
1836 1836 if not self.issnapshot(rev):
1837 1837 raise error.ProgrammingError(b'revision %d not a snapshot')
1838 1838 return len(self._deltachain(rev)[0]) - 1
1839 1839
1840 1840 def revdiff(self, rev1, rev2):
1841 1841 """return or calculate a delta between two revisions
1842 1842
1843 1843 The delta calculated is in binary form and is intended to be written to
1844 1844 revlog data directly. So this function needs raw revision data.
1845 1845 """
1846 1846 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1847 1847 return bytes(self._chunk(rev2))
1848 1848
1849 1849 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1850 1850
1851 1851 def _processflags(self, text, flags, operation, raw=False):
1852 1852 """deprecated entry point to access flag processors"""
1853 1853 msg = b'_processflag(...) use the specialized variant'
1854 1854 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1855 1855 if raw:
1856 1856 return text, flagutil.processflagsraw(self, text, flags)
1857 1857 elif operation == b'read':
1858 1858 return flagutil.processflagsread(self, text, flags)
1859 1859 else: # write operation
1860 1860 return flagutil.processflagswrite(self, text, flags)
1861 1861
1862 1862 def revision(self, nodeorrev, _df=None, raw=False):
1863 1863 """return an uncompressed revision of a given node or revision
1864 1864 number.
1865 1865
1866 1866 _df - an existing file handle to read from. (internal-only)
1867 1867 raw - an optional argument specifying if the revision data is to be
1868 1868 treated as raw data when applying flag transforms. 'raw' should be set
1869 1869 to True when generating changegroups or in debug commands.
1870 1870 """
1871 1871 if raw:
1872 1872 msg = (
1873 1873 b'revlog.revision(..., raw=True) is deprecated, '
1874 1874 b'use revlog.rawdata(...)'
1875 1875 )
1876 1876 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1877 1877 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1878 1878
1879 1879 def sidedata(self, nodeorrev, _df=None):
1880 1880 """a map of extra data related to the changeset but not part of the hash
1881 1881
1882 1882 This function currently return a dictionary. However, more advanced
1883 1883 mapping object will likely be used in the future for a more
1884 1884 efficient/lazy code.
1885 1885 """
1886 1886 return self._revisiondata(nodeorrev, _df)[1]
1887 1887
1888 1888 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1889 1889 # deal with <nodeorrev> argument type
1890 1890 if isinstance(nodeorrev, int):
1891 1891 rev = nodeorrev
1892 1892 node = self.node(rev)
1893 1893 else:
1894 1894 node = nodeorrev
1895 1895 rev = None
1896 1896
1897 1897 # fast path the special `nullid` rev
1898 1898 if node == nullid:
1899 1899 return b"", {}
1900 1900
1901 1901 # ``rawtext`` is the text as stored inside the revlog. Might be the
1902 1902 # revision or might need to be processed to retrieve the revision.
1903 1903 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1904 1904
1905 1905 if self.version & 0xFFFF == REVLOGV2:
1906 1906 if rev is None:
1907 1907 rev = self.rev(node)
1908 1908 sidedata = self._sidedata(rev)
1909 1909 else:
1910 1910 sidedata = {}
1911 1911
1912 1912 if raw and validated:
1913 1913 # if we don't want to process the raw text and that raw
1914 1914 # text is cached, we can exit early.
1915 1915 return rawtext, sidedata
1916 1916 if rev is None:
1917 1917 rev = self.rev(node)
1918 1918 # the revlog's flag for this revision
1919 1919 # (usually alter its state or content)
1920 1920 flags = self.flags(rev)
1921 1921
1922 1922 if validated and flags == REVIDX_DEFAULT_FLAGS:
1923 1923 # no extra flags set, no flag processor runs, text = rawtext
1924 1924 return rawtext, sidedata
1925 1925
1926 1926 if raw:
1927 1927 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1928 1928 text = rawtext
1929 1929 else:
1930 1930 r = flagutil.processflagsread(self, rawtext, flags)
1931 1931 text, validatehash = r
1932 1932 if validatehash:
1933 1933 self.checkhash(text, node, rev=rev)
1934 1934 if not validated:
1935 1935 self._revisioncache = (node, rev, rawtext)
1936 1936
1937 1937 return text, sidedata
1938 1938
1939 1939 def _rawtext(self, node, rev, _df=None):
1940 1940 """return the possibly unvalidated rawtext for a revision
1941 1941
1942 1942 returns (rev, rawtext, validated)
1943 1943 """
1944 1944
1945 1945 # revision in the cache (could be useful to apply delta)
1946 1946 cachedrev = None
1947 1947 # An intermediate text to apply deltas to
1948 1948 basetext = None
1949 1949
1950 1950 # Check if we have the entry in cache
1951 1951 # The cache entry looks like (node, rev, rawtext)
1952 1952 if self._revisioncache:
1953 1953 if self._revisioncache[0] == node:
1954 1954 return (rev, self._revisioncache[2], True)
1955 1955 cachedrev = self._revisioncache[1]
1956 1956
1957 1957 if rev is None:
1958 1958 rev = self.rev(node)
1959 1959
1960 1960 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1961 1961 if stopped:
1962 1962 basetext = self._revisioncache[2]
1963 1963
1964 1964 # drop cache to save memory, the caller is expected to
1965 1965 # update self._revisioncache after validating the text
1966 1966 self._revisioncache = None
1967 1967
1968 1968 targetsize = None
1969 1969 rawsize = self.index[rev][2]
1970 1970 if 0 <= rawsize:
1971 1971 targetsize = 4 * rawsize
1972 1972
1973 1973 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1974 1974 if basetext is None:
1975 1975 basetext = bytes(bins[0])
1976 1976 bins = bins[1:]
1977 1977
1978 1978 rawtext = mdiff.patches(basetext, bins)
1979 1979 del basetext # let us have a chance to free memory early
1980 1980 return (rev, rawtext, False)
1981 1981
1982 1982 def _sidedata(self, rev):
1983 1983 """Return the sidedata for a given revision number."""
1984 1984 index_entry = self.index[rev]
1985 1985 sidedata_offset = index_entry[8]
1986 1986 sidedata_size = index_entry[9]
1987 1987
1988 1988 if self._inline:
1989 1989 sidedata_offset += self._io.size * (1 + rev)
1990 1990 if sidedata_size == 0:
1991 1991 return {}
1992 1992
1993 1993 segment = self._getsegment(sidedata_offset, sidedata_size)
1994 1994 sidedata = sidedatautil.deserialize_sidedata(segment)
1995 1995 return sidedata
1996 1996
1997 1997 def rawdata(self, nodeorrev, _df=None):
1998 1998 """return an uncompressed raw data of a given node or revision number.
1999 1999
2000 2000 _df - an existing file handle to read from. (internal-only)
2001 2001 """
2002 2002 return self._revisiondata(nodeorrev, _df, raw=True)[0]
2003 2003
2004 2004 def hash(self, text, p1, p2):
2005 2005 """Compute a node hash.
2006 2006
2007 2007 Available as a function so that subclasses can replace the hash
2008 2008 as needed.
2009 2009 """
2010 2010 return storageutil.hashrevisionsha1(text, p1, p2)
2011 2011
2012 2012 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2013 2013 """Check node hash integrity.
2014 2014
2015 2015 Available as a function so that subclasses can extend hash mismatch
2016 2016 behaviors as needed.
2017 2017 """
2018 2018 try:
2019 2019 if p1 is None and p2 is None:
2020 2020 p1, p2 = self.parents(node)
2021 2021 if node != self.hash(text, p1, p2):
2022 2022 # Clear the revision cache on hash failure. The revision cache
2023 2023 # only stores the raw revision and clearing the cache does have
2024 2024 # the side-effect that we won't have a cache hit when the raw
2025 2025 # revision data is accessed. But this case should be rare and
2026 2026 # it is extra work to teach the cache about the hash
2027 2027 # verification state.
2028 2028 if self._revisioncache and self._revisioncache[0] == node:
2029 2029 self._revisioncache = None
2030 2030
2031 2031 revornode = rev
2032 2032 if revornode is None:
2033 2033 revornode = templatefilters.short(hex(node))
2034 2034 raise error.RevlogError(
2035 2035 _(b"integrity check failed on %s:%s")
2036 2036 % (self.indexfile, pycompat.bytestr(revornode))
2037 2037 )
2038 2038 except error.RevlogError:
2039 2039 if self._censorable and storageutil.iscensoredtext(text):
2040 2040 raise error.CensoredNodeError(self.indexfile, node, text)
2041 2041 raise
2042 2042
2043 2043 def _enforceinlinesize(self, tr, fp=None):
2044 2044 """Check if the revlog is too big for inline and convert if so.
2045 2045
2046 2046 This should be called after revisions are added to the revlog. If the
2047 2047 revlog has grown too large to be an inline revlog, it will convert it
2048 2048 to use multiple index and data files.
2049 2049 """
2050 2050 tiprev = len(self) - 1
2051 2051 if (
2052 2052 not self._inline
2053 2053 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2054 2054 ):
2055 2055 return
2056 2056
2057 2057 troffset = tr.findoffset(self.indexfile)
2058 2058 if troffset is None:
2059 2059 raise error.RevlogError(
2060 2060 _(b"%s not found in the transaction") % self.indexfile
2061 2061 )
2062 2062 trindex = 0
2063 2063 tr.add(self.datafile, 0)
2064 2064
2065 2065 if fp:
2066 2066 fp.flush()
2067 2067 fp.close()
2068 2068 # We can't use the cached file handle after close(). So prevent
2069 2069 # its usage.
2070 2070 self._writinghandles = None
2071 2071
2072 2072 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2073 2073 for r in self:
2074 2074 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2075 2075 if troffset <= self.start(r):
2076 2076 trindex = r
2077 2077
2078 2078 with self._indexfp(b'w') as fp:
2079 2079 self.version &= ~FLAG_INLINE_DATA
2080 2080 self._inline = False
2081 2081 io = self._io
2082 2082 for i in self:
2083 2083 e = io.packentry(self.index[i], self.node, self.version, i)
2084 2084 fp.write(e)
2085 2085
2086 2086 # the temp file replace the real index when we exit the context
2087 2087 # manager
2088 2088
2089 2089 tr.replace(self.indexfile, trindex * self._io.size)
2090 2090 nodemaputil.setup_persistent_nodemap(tr, self)
2091 2091 self._chunkclear()
2092 2092
2093 2093 def _nodeduplicatecallback(self, transaction, node):
2094 2094 """called when trying to add a node already stored."""
2095 2095
2096 2096 def addrevision(
2097 2097 self,
2098 2098 text,
2099 2099 transaction,
2100 2100 link,
2101 2101 p1,
2102 2102 p2,
2103 2103 cachedelta=None,
2104 2104 node=None,
2105 2105 flags=REVIDX_DEFAULT_FLAGS,
2106 2106 deltacomputer=None,
2107 2107 sidedata=None,
2108 2108 ):
2109 2109 """add a revision to the log
2110 2110
2111 2111 text - the revision data to add
2112 2112 transaction - the transaction object used for rollback
2113 2113 link - the linkrev data to add
2114 2114 p1, p2 - the parent nodeids of the revision
2115 2115 cachedelta - an optional precomputed delta
2116 2116 node - nodeid of revision; typically node is not specified, and it is
2117 2117 computed by default as hash(text, p1, p2), however subclasses might
2118 2118 use different hashing method (and override checkhash() in such case)
2119 2119 flags - the known flags to set on the revision
2120 2120 deltacomputer - an optional deltacomputer instance shared between
2121 2121 multiple calls
2122 2122 """
2123 2123 if link == nullrev:
2124 2124 raise error.RevlogError(
2125 2125 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2126 2126 )
2127 2127
2128 2128 if sidedata is None:
2129 2129 sidedata = {}
2130 2130 elif not self.hassidedata:
2131 2131 raise error.ProgrammingError(
2132 2132 _(b"trying to add sidedata to a revlog who don't support them")
2133 2133 )
2134 2134
2135 2135 if flags:
2136 2136 node = node or self.hash(text, p1, p2)
2137 2137
2138 2138 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
2139 2139
2140 2140 # If the flag processor modifies the revision data, ignore any provided
2141 2141 # cachedelta.
2142 2142 if rawtext != text:
2143 2143 cachedelta = None
2144 2144
2145 2145 if len(rawtext) > _maxentrysize:
2146 2146 raise error.RevlogError(
2147 2147 _(
2148 2148 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2149 2149 )
2150 2150 % (self.indexfile, len(rawtext))
2151 2151 )
2152 2152
2153 2153 node = node or self.hash(rawtext, p1, p2)
2154 2154 rev = self.index.get_rev(node)
2155 2155 if rev is not None:
2156 2156 return rev
2157 2157
2158 2158 if validatehash:
2159 2159 self.checkhash(rawtext, node, p1=p1, p2=p2)
2160 2160
2161 2161 return self.addrawrevision(
2162 2162 rawtext,
2163 2163 transaction,
2164 2164 link,
2165 2165 p1,
2166 2166 p2,
2167 2167 node,
2168 2168 flags,
2169 2169 cachedelta=cachedelta,
2170 2170 deltacomputer=deltacomputer,
2171 2171 sidedata=sidedata,
2172 2172 )
2173 2173
2174 2174 def addrawrevision(
2175 2175 self,
2176 2176 rawtext,
2177 2177 transaction,
2178 2178 link,
2179 2179 p1,
2180 2180 p2,
2181 2181 node,
2182 2182 flags,
2183 2183 cachedelta=None,
2184 2184 deltacomputer=None,
2185 2185 sidedata=None,
2186 2186 ):
2187 2187 """add a raw revision with known flags, node and parents
2188 2188 useful when reusing a revision not stored in this revlog (ex: received
2189 2189 over wire, or read from an external bundle).
2190 2190 """
2191 2191 dfh = None
2192 2192 if not self._inline:
2193 2193 dfh = self._datafp(b"a+")
2194 2194 ifh = self._indexfp(b"a+")
2195 2195 try:
2196 2196 return self._addrevision(
2197 2197 node,
2198 2198 rawtext,
2199 2199 transaction,
2200 2200 link,
2201 2201 p1,
2202 2202 p2,
2203 2203 flags,
2204 2204 cachedelta,
2205 2205 ifh,
2206 2206 dfh,
2207 2207 deltacomputer=deltacomputer,
2208 2208 sidedata=sidedata,
2209 2209 )
2210 2210 finally:
2211 2211 if dfh:
2212 2212 dfh.close()
2213 2213 ifh.close()
2214 2214
2215 2215 def compress(self, data):
2216 2216 """Generate a possibly-compressed representation of data."""
2217 2217 if not data:
2218 2218 return b'', data
2219 2219
2220 2220 compressed = self._compressor.compress(data)
2221 2221
2222 2222 if compressed:
2223 2223 # The revlog compressor added the header in the returned data.
2224 2224 return b'', compressed
2225 2225
2226 2226 if data[0:1] == b'\0':
2227 2227 return b'', data
2228 2228 return b'u', data
2229 2229
2230 2230 def decompress(self, data):
2231 2231 """Decompress a revlog chunk.
2232 2232
2233 2233 The chunk is expected to begin with a header identifying the
2234 2234 format type so it can be routed to an appropriate decompressor.
2235 2235 """
2236 2236 if not data:
2237 2237 return data
2238 2238
2239 2239 # Revlogs are read much more frequently than they are written and many
2240 2240 # chunks only take microseconds to decompress, so performance is
2241 2241 # important here.
2242 2242 #
2243 2243 # We can make a few assumptions about revlogs:
2244 2244 #
2245 2245 # 1) the majority of chunks will be compressed (as opposed to inline
2246 2246 # raw data).
2247 2247 # 2) decompressing *any* data will likely by at least 10x slower than
2248 2248 # returning raw inline data.
2249 2249 # 3) we want to prioritize common and officially supported compression
2250 2250 # engines
2251 2251 #
2252 2252 # It follows that we want to optimize for "decompress compressed data
2253 2253 # when encoded with common and officially supported compression engines"
2254 2254 # case over "raw data" and "data encoded by less common or non-official
2255 2255 # compression engines." That is why we have the inline lookup first
2256 2256 # followed by the compengines lookup.
2257 2257 #
2258 2258 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2259 2259 # compressed chunks. And this matters for changelog and manifest reads.
2260 2260 t = data[0:1]
2261 2261
2262 2262 if t == b'x':
2263 2263 try:
2264 2264 return _zlibdecompress(data)
2265 2265 except zlib.error as e:
2266 2266 raise error.RevlogError(
2267 2267 _(b'revlog decompress error: %s')
2268 2268 % stringutil.forcebytestr(e)
2269 2269 )
2270 2270 # '\0' is more common than 'u' so it goes first.
2271 2271 elif t == b'\0':
2272 2272 return data
2273 2273 elif t == b'u':
2274 2274 return util.buffer(data, 1)
2275 2275
2276 2276 try:
2277 2277 compressor = self._decompressors[t]
2278 2278 except KeyError:
2279 2279 try:
2280 2280 engine = util.compengines.forrevlogheader(t)
2281 2281 compressor = engine.revlogcompressor(self._compengineopts)
2282 2282 self._decompressors[t] = compressor
2283 2283 except KeyError:
2284 2284 raise error.RevlogError(_(b'unknown compression type %r') % t)
2285 2285
2286 2286 return compressor.decompress(data)
2287 2287
2288 2288 def _addrevision(
2289 2289 self,
2290 2290 node,
2291 2291 rawtext,
2292 2292 transaction,
2293 2293 link,
2294 2294 p1,
2295 2295 p2,
2296 2296 flags,
2297 2297 cachedelta,
2298 2298 ifh,
2299 2299 dfh,
2300 2300 alwayscache=False,
2301 2301 deltacomputer=None,
2302 2302 sidedata=None,
2303 2303 ):
2304 2304 """internal function to add revisions to the log
2305 2305
2306 2306 see addrevision for argument descriptions.
2307 2307
2308 2308 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2309 2309
2310 2310 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2311 2311 be used.
2312 2312
2313 2313 invariants:
2314 2314 - rawtext is optional (can be None); if not set, cachedelta must be set.
2315 2315 if both are set, they must correspond to each other.
2316 2316 """
2317 2317 if node == nullid:
2318 2318 raise error.RevlogError(
2319 2319 _(b"%s: attempt to add null revision") % self.indexfile
2320 2320 )
2321 2321 if node == wdirid or node in wdirfilenodeids:
2322 2322 raise error.RevlogError(
2323 2323 _(b"%s: attempt to add wdir revision") % self.indexfile
2324 2324 )
2325 2325
2326 2326 if self._inline:
2327 2327 fh = ifh
2328 2328 else:
2329 2329 fh = dfh
2330 2330
2331 2331 btext = [rawtext]
2332 2332
2333 2333 curr = len(self)
2334 2334 prev = curr - 1
2335 2335
2336 2336 offset = self._get_data_offset(prev)
2337 2337
2338 2338 if self._concurrencychecker:
2339 2339 if self._inline:
2340 2340 # offset is "as if" it were in the .d file, so we need to add on
2341 2341 # the size of the entry metadata.
2342 2342 self._concurrencychecker(
2343 2343 ifh, self.indexfile, offset + curr * self._io.size
2344 2344 )
2345 2345 else:
2346 2346 # Entries in the .i are a consistent size.
2347 2347 self._concurrencychecker(
2348 2348 ifh, self.indexfile, curr * self._io.size
2349 2349 )
2350 2350 self._concurrencychecker(dfh, self.datafile, offset)
2351 2351
2352 2352 p1r, p2r = self.rev(p1), self.rev(p2)
2353 2353
2354 2354 # full versions are inserted when the needed deltas
2355 2355 # become comparable to the uncompressed text
2356 2356 if rawtext is None:
2357 2357 # need rawtext size, before changed by flag processors, which is
2358 2358 # the non-raw size. use revlog explicitly to avoid filelog's extra
2359 2359 # logic that might remove metadata size.
2360 2360 textlen = mdiff.patchedsize(
2361 2361 revlog.size(self, cachedelta[0]), cachedelta[1]
2362 2362 )
2363 2363 else:
2364 2364 textlen = len(rawtext)
2365 2365
2366 2366 if deltacomputer is None:
2367 2367 deltacomputer = deltautil.deltacomputer(self)
2368 2368
2369 2369 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2370 2370
2371 2371 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2372 2372
2373 2373 if sidedata:
2374 2374 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2375 2375 sidedata_offset = offset + deltainfo.deltalen
2376 2376 else:
2377 2377 serialized_sidedata = b""
2378 2378 # Don't store the offset if the sidedata is empty, that way
2379 2379 # we can easily detect empty sidedata and they will be no different
2380 2380 # than ones we manually add.
2381 2381 sidedata_offset = 0
2382 2382
2383 2383 e = (
2384 2384 offset_type(offset, flags),
2385 2385 deltainfo.deltalen,
2386 2386 textlen,
2387 2387 deltainfo.base,
2388 2388 link,
2389 2389 p1r,
2390 2390 p2r,
2391 2391 node,
2392 2392 sidedata_offset,
2393 2393 len(serialized_sidedata),
2394 2394 )
2395 2395
2396 2396 if self.version & 0xFFFF != REVLOGV2:
2397 2397 e = e[:8]
2398 2398
2399 2399 self.index.append(e)
2400 2400 entry = self._io.packentry(e, self.node, self.version, curr)
2401 2401 self._writeentry(
2402 2402 transaction,
2403 2403 ifh,
2404 2404 dfh,
2405 2405 entry,
2406 2406 deltainfo.data,
2407 2407 link,
2408 2408 offset,
2409 2409 serialized_sidedata,
2410 2410 )
2411 2411
2412 2412 rawtext = btext[0]
2413 2413
2414 2414 if alwayscache and rawtext is None:
2415 2415 rawtext = deltacomputer.buildtext(revinfo, fh)
2416 2416
2417 2417 if type(rawtext) == bytes: # only accept immutable objects
2418 2418 self._revisioncache = (node, curr, rawtext)
2419 2419 self._chainbasecache[curr] = deltainfo.chainbase
2420 2420 return curr
2421 2421
2422 2422 def _get_data_offset(self, prev):
2423 2423 """Returns the current offset in the (in-transaction) data file.
2424 2424 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
2425 2425 file to store that information: since sidedata can be rewritten to the
2426 2426 end of the data file within a transaction, you can have cases where, for
2427 2427 example, rev `n` does not have sidedata while rev `n - 1` does, leading
2428 2428 to `n - 1`'s sidedata being written after `n`'s data.
2429 2429
2430 2430 TODO cache this in a docket file before getting out of experimental."""
2431 2431 if self.version & 0xFFFF != REVLOGV2:
2432 2432 return self.end(prev)
2433 2433
2434 2434 offset = 0
2435 2435 for rev, entry in enumerate(self.index):
2436 2436 sidedata_end = entry[8] + entry[9]
2437 2437 # Sidedata for a previous rev has potentially been written after
2438 2438 # this rev's end, so take the max.
2439 2439 offset = max(self.end(rev), offset, sidedata_end)
2440 2440 return offset
2441 2441
2442 2442 def _writeentry(
2443 2443 self, transaction, ifh, dfh, entry, data, link, offset, sidedata
2444 2444 ):
2445 2445 # Files opened in a+ mode have inconsistent behavior on various
2446 2446 # platforms. Windows requires that a file positioning call be made
2447 2447 # when the file handle transitions between reads and writes. See
2448 2448 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2449 2449 # platforms, Python or the platform itself can be buggy. Some versions
2450 2450 # of Solaris have been observed to not append at the end of the file
2451 2451 # if the file was seeked to before the end. See issue4943 for more.
2452 2452 #
2453 2453 # We work around this issue by inserting a seek() before writing.
2454 2454 # Note: This is likely not necessary on Python 3. However, because
2455 2455 # the file handle is reused for reads and may be seeked there, we need
2456 2456 # to be careful before changing this.
2457 2457 ifh.seek(0, os.SEEK_END)
2458 2458 if dfh:
2459 2459 dfh.seek(0, os.SEEK_END)
2460 2460
2461 2461 curr = len(self) - 1
2462 2462 if not self._inline:
2463 2463 transaction.add(self.datafile, offset)
2464 2464 transaction.add(self.indexfile, curr * len(entry))
2465 2465 if data[0]:
2466 2466 dfh.write(data[0])
2467 2467 dfh.write(data[1])
2468 2468 if sidedata:
2469 2469 dfh.write(sidedata)
2470 2470 ifh.write(entry)
2471 2471 else:
2472 2472 offset += curr * self._io.size
2473 2473 transaction.add(self.indexfile, offset)
2474 2474 ifh.write(entry)
2475 2475 ifh.write(data[0])
2476 2476 ifh.write(data[1])
2477 2477 if sidedata:
2478 2478 ifh.write(sidedata)
2479 2479 self._enforceinlinesize(transaction, ifh)
2480 2480 nodemaputil.setup_persistent_nodemap(transaction, self)
2481 2481
2482 2482 def addgroup(
2483 2483 self,
2484 2484 deltas,
2485 2485 linkmapper,
2486 2486 transaction,
2487 2487 alwayscache=False,
2488 2488 addrevisioncb=None,
2489 2489 duplicaterevisioncb=None,
2490 2490 ):
2491 2491 """
2492 2492 add a delta group
2493 2493
2494 2494 given a set of deltas, add them to the revision log. the
2495 2495 first delta is against its parent, which should be in our
2496 2496 log, the rest are against the previous delta.
2497 2497
2498 2498 If ``addrevisioncb`` is defined, it will be called with arguments of
2499 2499 this revlog and the node that was added.
2500 2500 """
2501 2501
2502 2502 if self._writinghandles:
2503 2503 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2504 2504
2505 2505 r = len(self)
2506 2506 end = 0
2507 2507 if r:
2508 2508 end = self.end(r - 1)
2509 2509 ifh = self._indexfp(b"a+")
2510 2510 isize = r * self._io.size
2511 2511 if self._inline:
2512 2512 transaction.add(self.indexfile, end + isize)
2513 2513 dfh = None
2514 2514 else:
2515 2515 transaction.add(self.indexfile, isize)
2516 2516 transaction.add(self.datafile, end)
2517 2517 dfh = self._datafp(b"a+")
2518 2518
2519 2519 def flush():
2520 2520 if dfh:
2521 2521 dfh.flush()
2522 2522 ifh.flush()
2523 2523
2524 2524 self._writinghandles = (ifh, dfh)
2525 2525 empty = True
2526 2526
2527 2527 try:
2528 2528 deltacomputer = deltautil.deltacomputer(self)
2529 2529 # loop through our set of deltas
2530 2530 for data in deltas:
2531 2531 node, p1, p2, linknode, deltabase, delta, flags, sidedata = data
2532 2532 link = linkmapper(linknode)
2533 2533 flags = flags or REVIDX_DEFAULT_FLAGS
2534 2534
2535 2535 rev = self.index.get_rev(node)
2536 2536 if rev is not None:
2537 2537 # this can happen if two branches make the same change
2538 2538 self._nodeduplicatecallback(transaction, rev)
2539 2539 if duplicaterevisioncb:
2540 2540 duplicaterevisioncb(self, rev)
2541 2541 empty = False
2542 2542 continue
2543 2543
2544 2544 for p in (p1, p2):
2545 2545 if not self.index.has_node(p):
2546 2546 raise error.LookupError(
2547 2547 p, self.indexfile, _(b'unknown parent')
2548 2548 )
2549 2549
2550 2550 if not self.index.has_node(deltabase):
2551 2551 raise error.LookupError(
2552 2552 deltabase, self.indexfile, _(b'unknown delta base')
2553 2553 )
2554 2554
2555 2555 baserev = self.rev(deltabase)
2556 2556
2557 2557 if baserev != nullrev and self.iscensored(baserev):
2558 2558 # if base is censored, delta must be full replacement in a
2559 2559 # single patch operation
2560 2560 hlen = struct.calcsize(b">lll")
2561 2561 oldlen = self.rawsize(baserev)
2562 2562 newlen = len(delta) - hlen
2563 2563 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2564 2564 raise error.CensoredBaseError(
2565 2565 self.indexfile, self.node(baserev)
2566 2566 )
2567 2567
2568 2568 if not flags and self._peek_iscensored(baserev, delta, flush):
2569 2569 flags |= REVIDX_ISCENSORED
2570 2570
2571 2571 # We assume consumers of addrevisioncb will want to retrieve
2572 2572 # the added revision, which will require a call to
2573 2573 # revision(). revision() will fast path if there is a cache
2574 2574 # hit. So, we tell _addrevision() to always cache in this case.
2575 2575 # We're only using addgroup() in the context of changegroup
2576 2576 # generation so the revision data can always be handled as raw
2577 2577 # by the flagprocessor.
2578 2578 rev = self._addrevision(
2579 2579 node,
2580 2580 None,
2581 2581 transaction,
2582 2582 link,
2583 2583 p1,
2584 2584 p2,
2585 2585 flags,
2586 2586 (baserev, delta),
2587 2587 ifh,
2588 2588 dfh,
2589 2589 alwayscache=alwayscache,
2590 2590 deltacomputer=deltacomputer,
2591 2591 sidedata=sidedata,
2592 2592 )
2593 2593
2594 2594 if addrevisioncb:
2595 2595 addrevisioncb(self, rev)
2596 2596 empty = False
2597 2597
2598 2598 if not dfh and not self._inline:
2599 2599 # addrevision switched from inline to conventional
2600 2600 # reopen the index
2601 2601 ifh.close()
2602 2602 dfh = self._datafp(b"a+")
2603 2603 ifh = self._indexfp(b"a+")
2604 2604 self._writinghandles = (ifh, dfh)
2605 2605 finally:
2606 2606 self._writinghandles = None
2607 2607
2608 2608 if dfh:
2609 2609 dfh.close()
2610 2610 ifh.close()
2611 2611 return not empty
2612 2612
2613 2613 def iscensored(self, rev):
2614 2614 """Check if a file revision is censored."""
2615 2615 if not self._censorable:
2616 2616 return False
2617 2617
2618 2618 return self.flags(rev) & REVIDX_ISCENSORED
2619 2619
2620 2620 def _peek_iscensored(self, baserev, delta, flush):
2621 2621 """Quickly check if a delta produces a censored revision."""
2622 2622 if not self._censorable:
2623 2623 return False
2624 2624
2625 2625 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2626 2626
2627 2627 def getstrippoint(self, minlink):
2628 2628 """find the minimum rev that must be stripped to strip the linkrev
2629 2629
2630 2630 Returns a tuple containing the minimum rev and a set of all revs that
2631 2631 have linkrevs that will be broken by this strip.
2632 2632 """
2633 2633 return storageutil.resolvestripinfo(
2634 2634 minlink,
2635 2635 len(self) - 1,
2636 2636 self.headrevs(),
2637 2637 self.linkrev,
2638 2638 self.parentrevs,
2639 2639 )
2640 2640
2641 2641 def strip(self, minlink, transaction):
2642 2642 """truncate the revlog on the first revision with a linkrev >= minlink
2643 2643
2644 2644 This function is called when we're stripping revision minlink and
2645 2645 its descendants from the repository.
2646 2646
2647 2647 We have to remove all revisions with linkrev >= minlink, because
2648 2648 the equivalent changelog revisions will be renumbered after the
2649 2649 strip.
2650 2650
2651 2651 So we truncate the revlog on the first of these revisions, and
2652 2652 trust that the caller has saved the revisions that shouldn't be
2653 2653 removed and that it'll re-add them after this truncation.
2654 2654 """
2655 2655 if len(self) == 0:
2656 2656 return
2657 2657
2658 2658 rev, _ = self.getstrippoint(minlink)
2659 2659 if rev == len(self):
2660 2660 return
2661 2661
2662 2662 # first truncate the files on disk
2663 2663 end = self.start(rev)
2664 2664 if not self._inline:
2665 2665 transaction.add(self.datafile, end)
2666 2666 end = rev * self._io.size
2667 2667 else:
2668 2668 end += rev * self._io.size
2669 2669
2670 2670 transaction.add(self.indexfile, end)
2671 2671
2672 2672 # then reset internal state in memory to forget those revisions
2673 2673 self._revisioncache = None
2674 2674 self._chaininfocache = util.lrucachedict(500)
2675 2675 self._chunkclear()
2676 2676
2677 2677 del self.index[rev:-1]
2678 2678
2679 2679 def checksize(self):
2680 2680 """Check size of index and data files
2681 2681
2682 2682 return a (dd, di) tuple.
2683 2683 - dd: extra bytes for the "data" file
2684 2684 - di: extra bytes for the "index" file
2685 2685
2686 2686 A healthy revlog will return (0, 0).
2687 2687 """
2688 2688 expected = 0
2689 2689 if len(self):
2690 2690 expected = max(0, self.end(len(self) - 1))
2691 2691
2692 2692 try:
2693 2693 with self._datafp() as f:
2694 2694 f.seek(0, io.SEEK_END)
2695 2695 actual = f.tell()
2696 2696 dd = actual - expected
2697 2697 except IOError as inst:
2698 2698 if inst.errno != errno.ENOENT:
2699 2699 raise
2700 2700 dd = 0
2701 2701
2702 2702 try:
2703 2703 f = self.opener(self.indexfile)
2704 2704 f.seek(0, io.SEEK_END)
2705 2705 actual = f.tell()
2706 2706 f.close()
2707 2707 s = self._io.size
2708 2708 i = max(0, actual // s)
2709 2709 di = actual - (i * s)
2710 2710 if self._inline:
2711 2711 databytes = 0
2712 2712 for r in self:
2713 2713 databytes += max(0, self.length(r))
2714 2714 dd = 0
2715 2715 di = actual - len(self) * s - databytes
2716 2716 except IOError as inst:
2717 2717 if inst.errno != errno.ENOENT:
2718 2718 raise
2719 2719 di = 0
2720 2720
2721 2721 return (dd, di)
2722 2722
2723 2723 def files(self):
2724 2724 res = [self.indexfile]
2725 2725 if not self._inline:
2726 2726 res.append(self.datafile)
2727 2727 return res
2728 2728
2729 2729 def emitrevisions(
2730 2730 self,
2731 2731 nodes,
2732 2732 nodesorder=None,
2733 2733 revisiondata=False,
2734 2734 assumehaveparentrevisions=False,
2735 2735 deltamode=repository.CG_DELTAMODE_STD,
2736 sidedata_helpers=None,
2736 2737 ):
2737 2738 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2738 2739 raise error.ProgrammingError(
2739 2740 b'unhandled value for nodesorder: %s' % nodesorder
2740 2741 )
2741 2742
2742 2743 if nodesorder is None and not self._generaldelta:
2743 2744 nodesorder = b'storage'
2744 2745
2745 2746 if (
2746 2747 not self._storedeltachains
2747 2748 and deltamode != repository.CG_DELTAMODE_PREV
2748 2749 ):
2749 2750 deltamode = repository.CG_DELTAMODE_FULL
2750 2751
2751 2752 return storageutil.emitrevisions(
2752 2753 self,
2753 2754 nodes,
2754 2755 nodesorder,
2755 2756 revlogrevisiondelta,
2756 2757 deltaparentfn=self.deltaparent,
2757 2758 candeltafn=self.candelta,
2758 2759 rawsizefn=self.rawsize,
2759 2760 revdifffn=self.revdiff,
2760 2761 flagsfn=self.flags,
2761 2762 deltamode=deltamode,
2762 2763 revisiondata=revisiondata,
2763 2764 assumehaveparentrevisions=assumehaveparentrevisions,
2765 sidedata_helpers=sidedata_helpers,
2764 2766 )
2765 2767
2766 2768 DELTAREUSEALWAYS = b'always'
2767 2769 DELTAREUSESAMEREVS = b'samerevs'
2768 2770 DELTAREUSENEVER = b'never'
2769 2771
2770 2772 DELTAREUSEFULLADD = b'fulladd'
2771 2773
2772 2774 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2773 2775
2774 2776 def clone(
2775 2777 self,
2776 2778 tr,
2777 2779 destrevlog,
2778 2780 addrevisioncb=None,
2779 2781 deltareuse=DELTAREUSESAMEREVS,
2780 2782 forcedeltabothparents=None,
2781 2783 sidedatacompanion=None,
2782 2784 ):
2783 2785 """Copy this revlog to another, possibly with format changes.
2784 2786
2785 2787 The destination revlog will contain the same revisions and nodes.
2786 2788 However, it may not be bit-for-bit identical due to e.g. delta encoding
2787 2789 differences.
2788 2790
2789 2791 The ``deltareuse`` argument control how deltas from the existing revlog
2790 2792 are preserved in the destination revlog. The argument can have the
2791 2793 following values:
2792 2794
2793 2795 DELTAREUSEALWAYS
2794 2796 Deltas will always be reused (if possible), even if the destination
2795 2797 revlog would not select the same revisions for the delta. This is the
2796 2798 fastest mode of operation.
2797 2799 DELTAREUSESAMEREVS
2798 2800 Deltas will be reused if the destination revlog would pick the same
2799 2801 revisions for the delta. This mode strikes a balance between speed
2800 2802 and optimization.
2801 2803 DELTAREUSENEVER
2802 2804 Deltas will never be reused. This is the slowest mode of execution.
2803 2805 This mode can be used to recompute deltas (e.g. if the diff/delta
2804 2806 algorithm changes).
2805 2807 DELTAREUSEFULLADD
2806 2808 Revision will be re-added as if their were new content. This is
2807 2809 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2808 2810 eg: large file detection and handling.
2809 2811
2810 2812 Delta computation can be slow, so the choice of delta reuse policy can
2811 2813 significantly affect run time.
2812 2814
2813 2815 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2814 2816 two extremes. Deltas will be reused if they are appropriate. But if the
2815 2817 delta could choose a better revision, it will do so. This means if you
2816 2818 are converting a non-generaldelta revlog to a generaldelta revlog,
2817 2819 deltas will be recomputed if the delta's parent isn't a parent of the
2818 2820 revision.
2819 2821
2820 2822 In addition to the delta policy, the ``forcedeltabothparents``
2821 2823 argument controls whether to force compute deltas against both parents
2822 2824 for merges. By default, the current default is used.
2823 2825
2824 2826 If not None, the `sidedatacompanion` is callable that accept two
2825 2827 arguments:
2826 2828
2827 2829 (srcrevlog, rev)
2828 2830
2829 2831 and return a quintet that control changes to sidedata content from the
2830 2832 old revision to the new clone result:
2831 2833
2832 2834 (dropall, filterout, update, new_flags, dropped_flags)
2833 2835
2834 2836 * if `dropall` is True, all sidedata should be dropped
2835 2837 * `filterout` is a set of sidedata keys that should be dropped
2836 2838 * `update` is a mapping of additionnal/new key -> value
2837 2839 * new_flags is a bitfields of new flags that the revision should get
2838 2840 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2839 2841 """
2840 2842 if deltareuse not in self.DELTAREUSEALL:
2841 2843 raise ValueError(
2842 2844 _(b'value for deltareuse invalid: %s') % deltareuse
2843 2845 )
2844 2846
2845 2847 if len(destrevlog):
2846 2848 raise ValueError(_(b'destination revlog is not empty'))
2847 2849
2848 2850 if getattr(self, 'filteredrevs', None):
2849 2851 raise ValueError(_(b'source revlog has filtered revisions'))
2850 2852 if getattr(destrevlog, 'filteredrevs', None):
2851 2853 raise ValueError(_(b'destination revlog has filtered revisions'))
2852 2854
2853 2855 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2854 2856 # if possible.
2855 2857 oldlazydelta = destrevlog._lazydelta
2856 2858 oldlazydeltabase = destrevlog._lazydeltabase
2857 2859 oldamd = destrevlog._deltabothparents
2858 2860
2859 2861 try:
2860 2862 if deltareuse == self.DELTAREUSEALWAYS:
2861 2863 destrevlog._lazydeltabase = True
2862 2864 destrevlog._lazydelta = True
2863 2865 elif deltareuse == self.DELTAREUSESAMEREVS:
2864 2866 destrevlog._lazydeltabase = False
2865 2867 destrevlog._lazydelta = True
2866 2868 elif deltareuse == self.DELTAREUSENEVER:
2867 2869 destrevlog._lazydeltabase = False
2868 2870 destrevlog._lazydelta = False
2869 2871
2870 2872 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2871 2873
2872 2874 self._clone(
2873 2875 tr,
2874 2876 destrevlog,
2875 2877 addrevisioncb,
2876 2878 deltareuse,
2877 2879 forcedeltabothparents,
2878 2880 sidedatacompanion,
2879 2881 )
2880 2882
2881 2883 finally:
2882 2884 destrevlog._lazydelta = oldlazydelta
2883 2885 destrevlog._lazydeltabase = oldlazydeltabase
2884 2886 destrevlog._deltabothparents = oldamd
2885 2887
2886 2888 def _clone(
2887 2889 self,
2888 2890 tr,
2889 2891 destrevlog,
2890 2892 addrevisioncb,
2891 2893 deltareuse,
2892 2894 forcedeltabothparents,
2893 2895 sidedatacompanion,
2894 2896 ):
2895 2897 """perform the core duty of `revlog.clone` after parameter processing"""
2896 2898 deltacomputer = deltautil.deltacomputer(destrevlog)
2897 2899 index = self.index
2898 2900 for rev in self:
2899 2901 entry = index[rev]
2900 2902
2901 2903 # Some classes override linkrev to take filtered revs into
2902 2904 # account. Use raw entry from index.
2903 2905 flags = entry[0] & 0xFFFF
2904 2906 linkrev = entry[4]
2905 2907 p1 = index[entry[5]][7]
2906 2908 p2 = index[entry[6]][7]
2907 2909 node = entry[7]
2908 2910
2909 2911 sidedataactions = (False, [], {}, 0, 0)
2910 2912 if sidedatacompanion is not None:
2911 2913 sidedataactions = sidedatacompanion(self, rev)
2912 2914
2913 2915 # (Possibly) reuse the delta from the revlog if allowed and
2914 2916 # the revlog chunk is a delta.
2915 2917 cachedelta = None
2916 2918 rawtext = None
2917 2919 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2918 2920 dropall = sidedataactions[0]
2919 2921 filterout = sidedataactions[1]
2920 2922 update = sidedataactions[2]
2921 2923 new_flags = sidedataactions[3]
2922 2924 dropped_flags = sidedataactions[4]
2923 2925 text, sidedata = self._revisiondata(rev)
2924 2926 if dropall:
2925 2927 sidedata = {}
2926 2928 for key in filterout:
2927 2929 sidedata.pop(key, None)
2928 2930 sidedata.update(update)
2929 2931 if not sidedata:
2930 2932 sidedata = None
2931 2933
2932 2934 flags |= new_flags
2933 2935 flags &= ~dropped_flags
2934 2936
2935 2937 destrevlog.addrevision(
2936 2938 text,
2937 2939 tr,
2938 2940 linkrev,
2939 2941 p1,
2940 2942 p2,
2941 2943 cachedelta=cachedelta,
2942 2944 node=node,
2943 2945 flags=flags,
2944 2946 deltacomputer=deltacomputer,
2945 2947 sidedata=sidedata,
2946 2948 )
2947 2949 else:
2948 2950 if destrevlog._lazydelta:
2949 2951 dp = self.deltaparent(rev)
2950 2952 if dp != nullrev:
2951 2953 cachedelta = (dp, bytes(self._chunk(rev)))
2952 2954
2953 2955 if not cachedelta:
2954 2956 rawtext = self.rawdata(rev)
2955 2957
2956 2958 ifh = destrevlog.opener(
2957 2959 destrevlog.indexfile, b'a+', checkambig=False
2958 2960 )
2959 2961 dfh = None
2960 2962 if not destrevlog._inline:
2961 2963 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2962 2964 try:
2963 2965 destrevlog._addrevision(
2964 2966 node,
2965 2967 rawtext,
2966 2968 tr,
2967 2969 linkrev,
2968 2970 p1,
2969 2971 p2,
2970 2972 flags,
2971 2973 cachedelta,
2972 2974 ifh,
2973 2975 dfh,
2974 2976 deltacomputer=deltacomputer,
2975 2977 )
2976 2978 finally:
2977 2979 if dfh:
2978 2980 dfh.close()
2979 2981 ifh.close()
2980 2982
2981 2983 if addrevisioncb:
2982 2984 addrevisioncb(self, rev, node)
2983 2985
2984 2986 def censorrevision(self, tr, censornode, tombstone=b''):
2985 2987 if (self.version & 0xFFFF) == REVLOGV0:
2986 2988 raise error.RevlogError(
2987 2989 _(b'cannot censor with version %d revlogs') % self.version
2988 2990 )
2989 2991
2990 2992 censorrev = self.rev(censornode)
2991 2993 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2992 2994
2993 2995 if len(tombstone) > self.rawsize(censorrev):
2994 2996 raise error.Abort(
2995 2997 _(b'censor tombstone must be no longer than censored data')
2996 2998 )
2997 2999
2998 3000 # Rewriting the revlog in place is hard. Our strategy for censoring is
2999 3001 # to create a new revlog, copy all revisions to it, then replace the
3000 3002 # revlogs on transaction close.
3001 3003
3002 3004 newindexfile = self.indexfile + b'.tmpcensored'
3003 3005 newdatafile = self.datafile + b'.tmpcensored'
3004 3006
3005 3007 # This is a bit dangerous. We could easily have a mismatch of state.
3006 3008 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
3007 3009 newrl.version = self.version
3008 3010 newrl._generaldelta = self._generaldelta
3009 3011 newrl._io = self._io
3010 3012
3011 3013 for rev in self.revs():
3012 3014 node = self.node(rev)
3013 3015 p1, p2 = self.parents(node)
3014 3016
3015 3017 if rev == censorrev:
3016 3018 newrl.addrawrevision(
3017 3019 tombstone,
3018 3020 tr,
3019 3021 self.linkrev(censorrev),
3020 3022 p1,
3021 3023 p2,
3022 3024 censornode,
3023 3025 REVIDX_ISCENSORED,
3024 3026 )
3025 3027
3026 3028 if newrl.deltaparent(rev) != nullrev:
3027 3029 raise error.Abort(
3028 3030 _(
3029 3031 b'censored revision stored as delta; '
3030 3032 b'cannot censor'
3031 3033 ),
3032 3034 hint=_(
3033 3035 b'censoring of revlogs is not '
3034 3036 b'fully implemented; please report '
3035 3037 b'this bug'
3036 3038 ),
3037 3039 )
3038 3040 continue
3039 3041
3040 3042 if self.iscensored(rev):
3041 3043 if self.deltaparent(rev) != nullrev:
3042 3044 raise error.Abort(
3043 3045 _(
3044 3046 b'cannot censor due to censored '
3045 3047 b'revision having delta stored'
3046 3048 )
3047 3049 )
3048 3050 rawtext = self._chunk(rev)
3049 3051 else:
3050 3052 rawtext = self.rawdata(rev)
3051 3053
3052 3054 newrl.addrawrevision(
3053 3055 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
3054 3056 )
3055 3057
3056 3058 tr.addbackup(self.indexfile, location=b'store')
3057 3059 if not self._inline:
3058 3060 tr.addbackup(self.datafile, location=b'store')
3059 3061
3060 3062 self.opener.rename(newrl.indexfile, self.indexfile)
3061 3063 if not self._inline:
3062 3064 self.opener.rename(newrl.datafile, self.datafile)
3063 3065
3064 3066 self.clearcaches()
3065 3067 self._loadindex()
3066 3068
3067 3069 def verifyintegrity(self, state):
3068 3070 """Verifies the integrity of the revlog.
3069 3071
3070 3072 Yields ``revlogproblem`` instances describing problems that are
3071 3073 found.
3072 3074 """
3073 3075 dd, di = self.checksize()
3074 3076 if dd:
3075 3077 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3076 3078 if di:
3077 3079 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3078 3080
3079 3081 version = self.version & 0xFFFF
3080 3082
3081 3083 # The verifier tells us what version revlog we should be.
3082 3084 if version != state[b'expectedversion']:
3083 3085 yield revlogproblem(
3084 3086 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3085 3087 % (self.indexfile, version, state[b'expectedversion'])
3086 3088 )
3087 3089
3088 3090 state[b'skipread'] = set()
3089 3091 state[b'safe_renamed'] = set()
3090 3092
3091 3093 for rev in self:
3092 3094 node = self.node(rev)
3093 3095
3094 3096 # Verify contents. 4 cases to care about:
3095 3097 #
3096 3098 # common: the most common case
3097 3099 # rename: with a rename
3098 3100 # meta: file content starts with b'\1\n', the metadata
3099 3101 # header defined in filelog.py, but without a rename
3100 3102 # ext: content stored externally
3101 3103 #
3102 3104 # More formally, their differences are shown below:
3103 3105 #
3104 3106 # | common | rename | meta | ext
3105 3107 # -------------------------------------------------------
3106 3108 # flags() | 0 | 0 | 0 | not 0
3107 3109 # renamed() | False | True | False | ?
3108 3110 # rawtext[0:2]=='\1\n'| False | True | True | ?
3109 3111 #
3110 3112 # "rawtext" means the raw text stored in revlog data, which
3111 3113 # could be retrieved by "rawdata(rev)". "text"
3112 3114 # mentioned below is "revision(rev)".
3113 3115 #
3114 3116 # There are 3 different lengths stored physically:
3115 3117 # 1. L1: rawsize, stored in revlog index
3116 3118 # 2. L2: len(rawtext), stored in revlog data
3117 3119 # 3. L3: len(text), stored in revlog data if flags==0, or
3118 3120 # possibly somewhere else if flags!=0
3119 3121 #
3120 3122 # L1 should be equal to L2. L3 could be different from them.
3121 3123 # "text" may or may not affect commit hash depending on flag
3122 3124 # processors (see flagutil.addflagprocessor).
3123 3125 #
3124 3126 # | common | rename | meta | ext
3125 3127 # -------------------------------------------------
3126 3128 # rawsize() | L1 | L1 | L1 | L1
3127 3129 # size() | L1 | L2-LM | L1(*) | L1 (?)
3128 3130 # len(rawtext) | L2 | L2 | L2 | L2
3129 3131 # len(text) | L2 | L2 | L2 | L3
3130 3132 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3131 3133 #
3132 3134 # LM: length of metadata, depending on rawtext
3133 3135 # (*): not ideal, see comment in filelog.size
3134 3136 # (?): could be "- len(meta)" if the resolved content has
3135 3137 # rename metadata
3136 3138 #
3137 3139 # Checks needed to be done:
3138 3140 # 1. length check: L1 == L2, in all cases.
3139 3141 # 2. hash check: depending on flag processor, we may need to
3140 3142 # use either "text" (external), or "rawtext" (in revlog).
3141 3143
3142 3144 try:
3143 3145 skipflags = state.get(b'skipflags', 0)
3144 3146 if skipflags:
3145 3147 skipflags &= self.flags(rev)
3146 3148
3147 3149 _verify_revision(self, skipflags, state, node)
3148 3150
3149 3151 l1 = self.rawsize(rev)
3150 3152 l2 = len(self.rawdata(node))
3151 3153
3152 3154 if l1 != l2:
3153 3155 yield revlogproblem(
3154 3156 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3155 3157 node=node,
3156 3158 )
3157 3159
3158 3160 except error.CensoredNodeError:
3159 3161 if state[b'erroroncensored']:
3160 3162 yield revlogproblem(
3161 3163 error=_(b'censored file data'), node=node
3162 3164 )
3163 3165 state[b'skipread'].add(node)
3164 3166 except Exception as e:
3165 3167 yield revlogproblem(
3166 3168 error=_(b'unpacking %s: %s')
3167 3169 % (short(node), stringutil.forcebytestr(e)),
3168 3170 node=node,
3169 3171 )
3170 3172 state[b'skipread'].add(node)
3171 3173
3172 3174 def storageinfo(
3173 3175 self,
3174 3176 exclusivefiles=False,
3175 3177 sharedfiles=False,
3176 3178 revisionscount=False,
3177 3179 trackedsize=False,
3178 3180 storedsize=False,
3179 3181 ):
3180 3182 d = {}
3181 3183
3182 3184 if exclusivefiles:
3183 3185 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3184 3186 if not self._inline:
3185 3187 d[b'exclusivefiles'].append((self.opener, self.datafile))
3186 3188
3187 3189 if sharedfiles:
3188 3190 d[b'sharedfiles'] = []
3189 3191
3190 3192 if revisionscount:
3191 3193 d[b'revisionscount'] = len(self)
3192 3194
3193 3195 if trackedsize:
3194 3196 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3195 3197
3196 3198 if storedsize:
3197 3199 d[b'storedsize'] = sum(
3198 3200 self.opener.stat(path).st_size for path in self.files()
3199 3201 )
3200 3202
3201 3203 return d
@@ -1,513 +1,560 b''
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import re
11 11 import struct
12 12
13 13 from ..i18n import _
14 14 from ..node import (
15 15 bin,
16 16 nullid,
17 17 nullrev,
18 18 )
19 19 from .. import (
20 20 dagop,
21 21 error,
22 22 mdiff,
23 23 pycompat,
24 24 )
25 25 from ..interfaces import repository
26 from ..revlogutils import sidedata as sidedatamod
26 27 from ..utils import hashutil
27 28
28 29 _nullhash = hashutil.sha1(nullid)
29 30
30 31
31 32 def hashrevisionsha1(text, p1, p2):
32 33 """Compute the SHA-1 for revision data and its parents.
33 34
34 35 This hash combines both the current file contents and its history
35 36 in a manner that makes it easy to distinguish nodes with the same
36 37 content in the revision graph.
37 38 """
38 39 # As of now, if one of the parent node is null, p2 is null
39 40 if p2 == nullid:
40 41 # deep copy of a hash is faster than creating one
41 42 s = _nullhash.copy()
42 43 s.update(p1)
43 44 else:
44 45 # none of the parent nodes are nullid
45 46 if p1 < p2:
46 47 a = p1
47 48 b = p2
48 49 else:
49 50 a = p2
50 51 b = p1
51 52 s = hashutil.sha1(a)
52 53 s.update(b)
53 54 s.update(text)
54 55 return s.digest()
55 56
56 57
57 58 METADATA_RE = re.compile(b'\x01\n')
58 59
59 60
60 61 def parsemeta(text):
61 62 """Parse metadata header from revision data.
62 63
63 64 Returns a 2-tuple of (metadata, offset), where both can be None if there
64 65 is no metadata.
65 66 """
66 67 # text can be buffer, so we can't use .startswith or .index
67 68 if text[:2] != b'\x01\n':
68 69 return None, None
69 70 s = METADATA_RE.search(text, 2).start()
70 71 mtext = text[2:s]
71 72 meta = {}
72 73 for l in mtext.splitlines():
73 74 k, v = l.split(b': ', 1)
74 75 meta[k] = v
75 76 return meta, s + 2
76 77
77 78
78 79 def packmeta(meta, text):
79 80 """Add metadata to fulltext to produce revision text."""
80 81 keys = sorted(meta)
81 82 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
82 83 return b'\x01\n%s\x01\n%s' % (metatext, text)
83 84
84 85
85 86 def iscensoredtext(text):
86 87 meta = parsemeta(text)[0]
87 88 return meta and b'censored' in meta
88 89
89 90
90 91 def filtermetadata(text):
91 92 """Extract just the revision data from source text.
92 93
93 94 Returns ``text`` unless it has a metadata header, in which case we return
94 95 a new buffer without hte metadata.
95 96 """
96 97 if not text.startswith(b'\x01\n'):
97 98 return text
98 99
99 100 offset = text.index(b'\x01\n', 2)
100 101 return text[offset + 2 :]
101 102
102 103
103 104 def filerevisioncopied(store, node):
104 105 """Resolve file revision copy metadata.
105 106
106 107 Returns ``False`` if the file has no copy metadata. Otherwise a
107 108 2-tuple of the source filename and node.
108 109 """
109 110 if store.parents(node)[0] != nullid:
110 111 return False
111 112
112 113 meta = parsemeta(store.revision(node))[0]
113 114
114 115 # copy and copyrev occur in pairs. In rare cases due to old bugs,
115 116 # one can occur without the other. So ensure both are present to flag
116 117 # as a copy.
117 118 if meta and b'copy' in meta and b'copyrev' in meta:
118 119 return meta[b'copy'], bin(meta[b'copyrev'])
119 120
120 121 return False
121 122
122 123
123 124 def filedataequivalent(store, node, filedata):
124 125 """Determines whether file data is equivalent to a stored node.
125 126
126 127 Returns True if the passed file data would hash to the same value
127 128 as a stored revision and False otherwise.
128 129
129 130 When a stored revision is censored, filedata must be empty to have
130 131 equivalence.
131 132
132 133 When a stored revision has copy metadata, it is ignored as part
133 134 of the compare.
134 135 """
135 136
136 137 if filedata.startswith(b'\x01\n'):
137 138 revisiontext = b'\x01\n\x01\n' + filedata
138 139 else:
139 140 revisiontext = filedata
140 141
141 142 p1, p2 = store.parents(node)
142 143
143 144 computednode = hashrevisionsha1(revisiontext, p1, p2)
144 145
145 146 if computednode == node:
146 147 return True
147 148
148 149 # Censored files compare against the empty file.
149 150 if store.iscensored(store.rev(node)):
150 151 return filedata == b''
151 152
152 153 # Renaming a file produces a different hash, even if the data
153 154 # remains unchanged. Check if that's the case.
154 155 if store.renamed(node):
155 156 return store.read(node) == filedata
156 157
157 158 return False
158 159
159 160
160 161 def iterrevs(storelen, start=0, stop=None):
161 162 """Iterate over revision numbers in a store."""
162 163 step = 1
163 164
164 165 if stop is not None:
165 166 if start > stop:
166 167 step = -1
167 168 stop += step
168 169 if stop > storelen:
169 170 stop = storelen
170 171 else:
171 172 stop = storelen
172 173
173 174 return pycompat.xrange(start, stop, step)
174 175
175 176
176 177 def fileidlookup(store, fileid, identifier):
177 178 """Resolve the file node for a value.
178 179
179 180 ``store`` is an object implementing the ``ifileindex`` interface.
180 181
181 182 ``fileid`` can be:
182 183
183 184 * A 20 or 32 byte binary node.
184 185 * An integer revision number
185 186 * A 40 or 64 byte hex node.
186 187 * A bytes that can be parsed as an integer representing a revision number.
187 188
188 189 ``identifier`` is used to populate ``error.LookupError`` with an identifier
189 190 for the store.
190 191
191 192 Raises ``error.LookupError`` on failure.
192 193 """
193 194 if isinstance(fileid, int):
194 195 try:
195 196 return store.node(fileid)
196 197 except IndexError:
197 198 raise error.LookupError(
198 199 b'%d' % fileid, identifier, _(b'no match found')
199 200 )
200 201
201 202 if len(fileid) in (20, 32):
202 203 try:
203 204 store.rev(fileid)
204 205 return fileid
205 206 except error.LookupError:
206 207 pass
207 208
208 209 if len(fileid) in (40, 64):
209 210 try:
210 211 rawnode = bin(fileid)
211 212 store.rev(rawnode)
212 213 return rawnode
213 214 except TypeError:
214 215 pass
215 216
216 217 try:
217 218 rev = int(fileid)
218 219
219 220 if b'%d' % rev != fileid:
220 221 raise ValueError
221 222
222 223 try:
223 224 return store.node(rev)
224 225 except (IndexError, TypeError):
225 226 pass
226 227 except (ValueError, OverflowError):
227 228 pass
228 229
229 230 raise error.LookupError(fileid, identifier, _(b'no match found'))
230 231
231 232
232 233 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
233 234 """Resolve information needed to strip revisions.
234 235
235 236 Finds the minimum revision number that must be stripped in order to
236 237 strip ``minlinkrev``.
237 238
238 239 Returns a 2-tuple of the minimum revision number to do that and a set
239 240 of all revision numbers that have linkrevs that would be broken
240 241 by that strip.
241 242
242 243 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
243 244 ``headrevs`` is an iterable of head revisions.
244 245 ``linkrevfn`` is a callable that receives a revision and returns a linked
245 246 revision.
246 247 ``parentrevsfn`` is a callable that receives a revision number and returns
247 248 an iterable of its parent revision numbers.
248 249 """
249 250 brokenrevs = set()
250 251 strippoint = tiprev + 1
251 252
252 253 heads = {}
253 254 futurelargelinkrevs = set()
254 255 for head in headrevs:
255 256 headlinkrev = linkrevfn(head)
256 257 heads[head] = headlinkrev
257 258 if headlinkrev >= minlinkrev:
258 259 futurelargelinkrevs.add(headlinkrev)
259 260
260 261 # This algorithm involves walking down the rev graph, starting at the
261 262 # heads. Since the revs are topologically sorted according to linkrev,
262 263 # once all head linkrevs are below the minlink, we know there are
263 264 # no more revs that could have a linkrev greater than minlink.
264 265 # So we can stop walking.
265 266 while futurelargelinkrevs:
266 267 strippoint -= 1
267 268 linkrev = heads.pop(strippoint)
268 269
269 270 if linkrev < minlinkrev:
270 271 brokenrevs.add(strippoint)
271 272 else:
272 273 futurelargelinkrevs.remove(linkrev)
273 274
274 275 for p in parentrevsfn(strippoint):
275 276 if p != nullrev:
276 277 plinkrev = linkrevfn(p)
277 278 heads[p] = plinkrev
278 279 if plinkrev >= minlinkrev:
279 280 futurelargelinkrevs.add(plinkrev)
280 281
281 282 return strippoint, brokenrevs
282 283
283 284
284 285 def emitrevisions(
285 286 store,
286 287 nodes,
287 288 nodesorder,
288 289 resultcls,
289 290 deltaparentfn=None,
290 291 candeltafn=None,
291 292 rawsizefn=None,
292 293 revdifffn=None,
293 294 flagsfn=None,
294 295 deltamode=repository.CG_DELTAMODE_STD,
295 296 revisiondata=False,
296 297 assumehaveparentrevisions=False,
298 sidedata_helpers=None,
297 299 ):
298 300 """Generic implementation of ifiledata.emitrevisions().
299 301
300 302 Emitting revision data is subtly complex. This function attempts to
301 303 encapsulate all the logic for doing so in a backend-agnostic way.
302 304
303 305 ``store``
304 306 Object conforming to ``ifilestorage`` interface.
305 307
306 308 ``nodes``
307 309 List of revision nodes whose data to emit.
308 310
309 311 ``resultcls``
310 312 A type implementing the ``irevisiondelta`` interface that will be
311 313 constructed and returned.
312 314
313 315 ``deltaparentfn`` (optional)
314 316 Callable receiving a revision number and returning the revision number
315 317 of a revision that the internal delta is stored against. This delta
316 318 will be preferred over computing a new arbitrary delta.
317 319
318 320 If not defined, a delta will always be computed from raw revision
319 321 data.
320 322
321 323 ``candeltafn`` (optional)
322 324 Callable receiving a pair of revision numbers that returns a bool
323 325 indicating whether a delta between them can be produced.
324 326
325 327 If not defined, it is assumed that any two revisions can delta with
326 328 each other.
327 329
328 330 ``rawsizefn`` (optional)
329 331 Callable receiving a revision number and returning the length of the
330 332 ``store.rawdata(rev)``.
331 333
332 334 If not defined, ``len(store.rawdata(rev))`` will be called.
333 335
334 336 ``revdifffn`` (optional)
335 337 Callable receiving a pair of revision numbers that returns a delta
336 338 between them.
337 339
338 340 If not defined, a delta will be computed by invoking mdiff code
339 341 on ``store.revision()`` results.
340 342
341 343 Defining this function allows a precomputed or stored delta to be
342 344 used without having to compute on.
343 345
344 346 ``flagsfn`` (optional)
345 347 Callable receiving a revision number and returns the integer flags
346 348 value for it. If not defined, flags value will be 0.
347 349
348 350 ``deltamode``
349 351 constaint on delta to be sent:
350 352 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
351 353 * CG_DELTAMODE_PREV - only delta against "prev",
352 354 * CG_DELTAMODE_FULL - only issue full snapshot.
353 355
354 356 Whether to send fulltext revisions instead of deltas, if allowed.
355 357
356 358 ``nodesorder``
357 359 ``revisiondata``
358 360 ``assumehaveparentrevisions``
361 ``sidedata_helpers`` (optional)
362 If not None, means that sidedata should be included.
363 A dictionary of revlog type to tuples of `(repo, computers, removers)`:
364 * `repo` is used as an argument for computers
365 * `computers` is a list of `(category, (keys, computer)` that
366 compute the missing sidedata categories that were asked:
367 * `category` is the sidedata category
368 * `keys` are the sidedata keys to be affected
369 * `computer` is the function `(repo, store, rev, sidedata)` that
370 returns a new sidedata dict.
371 * `removers` will remove the keys corresponding to the categories
372 that are present, but not needed.
373 If both `computers` and `removers` are empty, sidedata are simply not
374 transformed.
375 Revlog types are `changelog`, `manifest` or `filelog`.
359 376 """
360 377
361 378 fnode = store.node
362 379 frev = store.rev
363 380
364 381 if nodesorder == b'nodes':
365 382 revs = [frev(n) for n in nodes]
366 383 elif nodesorder == b'linear':
367 384 revs = {frev(n) for n in nodes}
368 385 revs = dagop.linearize(revs, store.parentrevs)
369 386 else: # storage and default
370 387 revs = sorted(frev(n) for n in nodes)
371 388
372 389 prevrev = None
373 390
374 391 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
375 392 prevrev = store.parentrevs(revs[0])[0]
376 393
377 394 # Set of revs available to delta against.
378 395 available = set()
379 396
380 397 for rev in revs:
381 398 if rev == nullrev:
382 399 continue
383 400
384 401 node = fnode(rev)
385 402 p1rev, p2rev = store.parentrevs(rev)
386 403
387 404 if deltaparentfn:
388 405 deltaparentrev = deltaparentfn(rev)
389 406 else:
390 407 deltaparentrev = nullrev
391 408
392 409 # Forced delta against previous mode.
393 410 if deltamode == repository.CG_DELTAMODE_PREV:
394 411 baserev = prevrev
395 412
396 413 # We're instructed to send fulltext. Honor that.
397 414 elif deltamode == repository.CG_DELTAMODE_FULL:
398 415 baserev = nullrev
399 416 # We're instructed to use p1. Honor that
400 417 elif deltamode == repository.CG_DELTAMODE_P1:
401 418 baserev = p1rev
402 419
403 420 # There is a delta in storage. We try to use that because it
404 421 # amounts to effectively copying data from storage and is
405 422 # therefore the fastest.
406 423 elif deltaparentrev != nullrev:
407 424 # Base revision was already emitted in this group. We can
408 425 # always safely use the delta.
409 426 if deltaparentrev in available:
410 427 baserev = deltaparentrev
411 428
412 429 # Base revision is a parent that hasn't been emitted already.
413 430 # Use it if we can assume the receiver has the parent revision.
414 431 elif assumehaveparentrevisions and deltaparentrev in (p1rev, p2rev):
415 432 baserev = deltaparentrev
416 433
417 434 # No guarantee the receiver has the delta parent. Send delta
418 435 # against last revision (if possible), which in the common case
419 436 # should be similar enough to this revision that the delta is
420 437 # reasonable.
421 438 elif prevrev is not None:
422 439 baserev = prevrev
423 440 else:
424 441 baserev = nullrev
425 442
426 443 # Storage has a fulltext revision.
427 444
428 445 # Let's use the previous revision, which is as good a guess as any.
429 446 # There is definitely room to improve this logic.
430 447 elif prevrev is not None:
431 448 baserev = prevrev
432 449 else:
433 450 baserev = nullrev
434 451
435 452 # But we can't actually use our chosen delta base for whatever
436 453 # reason. Reset to fulltext.
437 454 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
438 455 baserev = nullrev
439 456
440 457 revision = None
441 458 delta = None
442 459 baserevisionsize = None
443 460
444 461 if revisiondata:
445 462 if store.iscensored(baserev) or store.iscensored(rev):
446 463 try:
447 464 revision = store.rawdata(node)
448 465 except error.CensoredNodeError as e:
449 466 revision = e.tombstone
450 467
451 468 if baserev != nullrev:
452 469 if rawsizefn:
453 470 baserevisionsize = rawsizefn(baserev)
454 471 else:
455 472 baserevisionsize = len(store.rawdata(baserev))
456 473
457 474 elif (
458 475 baserev == nullrev and deltamode != repository.CG_DELTAMODE_PREV
459 476 ):
460 477 revision = store.rawdata(node)
461 478 available.add(rev)
462 479 else:
463 480 if revdifffn:
464 481 delta = revdifffn(baserev, rev)
465 482 else:
466 483 delta = mdiff.textdiff(
467 484 store.rawdata(baserev), store.rawdata(rev)
468 485 )
469 486
470 487 available.add(rev)
471 488
489 sidedata = None
490 if sidedata_helpers:
491 sidedata = store.sidedata(rev)
492 sidedata = run_sidedata_helpers(
493 store=store,
494 sidedata_helpers=sidedata_helpers,
495 sidedata=sidedata,
496 rev=rev,
497 )
498 sidedata = sidedatamod.serialize_sidedata(sidedata)
499
472 500 yield resultcls(
473 501 node=node,
474 502 p1node=fnode(p1rev),
475 503 p2node=fnode(p2rev),
476 504 basenode=fnode(baserev),
477 505 flags=flagsfn(rev) if flagsfn else 0,
478 506 baserevisionsize=baserevisionsize,
479 507 revision=revision,
480 508 delta=delta,
481 509 sidedata=sidedata,
482 510 )
483 511
484 512 prevrev = rev
485 513
486 514
515 def run_sidedata_helpers(store, sidedata_helpers, sidedata, rev):
516 """Returns the sidedata for the given revision after running through
517 the given helpers.
518 - `store`: the revlog this applies to (changelog, manifest, or filelog
519 instance)
520 - `sidedata_helpers`: see `storageutil.emitrevisions`
521 - `sidedata`: previous sidedata at the given rev, if any
522 - `rev`: affected rev of `store`
523 """
524 repo, sd_computers, sd_removers = sidedata_helpers
525 kind = store.revlog_kind
526 for _keys, sd_computer in sd_computers.get(kind, []):
527 sidedata = sd_computer(repo, store, rev, sidedata)
528 for keys, _computer in sd_removers.get(kind, []):
529 for key in keys:
530 sidedata.pop(key, None)
531 return sidedata
532
533
487 534 def deltaiscensored(delta, baserev, baselenfn):
488 535 """Determine if a delta represents censored revision data.
489 536
490 537 ``baserev`` is the base revision this delta is encoded against.
491 538 ``baselenfn`` is a callable receiving a revision number that resolves the
492 539 length of the revision fulltext.
493 540
494 541 Returns a bool indicating if the result of the delta represents a censored
495 542 revision.
496 543 """
497 544 # Fragile heuristic: unless new file meta keys are added alphabetically
498 545 # preceding "censored", all censored revisions are prefixed by
499 546 # "\1\ncensored:". A delta producing such a censored revision must be a
500 547 # full-replacement delta, so we inspect the first and only patch in the
501 548 # delta for this prefix.
502 549 hlen = struct.calcsize(b">lll")
503 550 if len(delta) <= hlen:
504 551 return False
505 552
506 553 oldlen = baselenfn(baserev)
507 554 newlen = len(delta) - hlen
508 555 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
509 556 return False
510 557
511 558 add = b"\1\ncensored:"
512 559 addlen = len(add)
513 560 return newlen >= addlen and delta[hlen : hlen + addlen] == add
@@ -1,738 +1,740 b''
1 1 # simplestorerepo.py - Extension that swaps in alternate repository storage.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 # To use this with the test suite:
9 9 #
10 10 # $ HGREPOFEATURES="simplestore" ./run-tests.py \
11 11 # --extra-config-opt extensions.simplestore=`pwd`/simplestorerepo.py
12 12
13 13 from __future__ import absolute_import
14 14
15 15 import stat
16 16
17 17 from mercurial.i18n import _
18 18 from mercurial.node import (
19 19 bin,
20 20 hex,
21 21 nullid,
22 22 nullrev,
23 23 )
24 24 from mercurial.thirdparty import attr
25 25 from mercurial import (
26 26 ancestor,
27 27 bundlerepo,
28 28 error,
29 29 extensions,
30 30 localrepo,
31 31 mdiff,
32 32 pycompat,
33 33 revlog,
34 34 store,
35 35 verify,
36 36 )
37 37 from mercurial.interfaces import (
38 38 repository,
39 39 util as interfaceutil,
40 40 )
41 41 from mercurial.utils import (
42 42 cborutil,
43 43 storageutil,
44 44 )
45 45 from mercurial.revlogutils import flagutil
46 46
47 47 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
48 48 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
49 49 # be specifying the version(s) of Mercurial they are tested with, or
50 50 # leave the attribute unspecified.
51 51 testedwith = b'ships-with-hg-core'
52 52
53 53 REQUIREMENT = b'testonly-simplestore'
54 54
55 55
56 56 def validatenode(node):
57 57 if isinstance(node, int):
58 58 raise ValueError('expected node; got int')
59 59
60 60 if len(node) != 20:
61 61 raise ValueError('expected 20 byte node')
62 62
63 63
64 64 def validaterev(rev):
65 65 if not isinstance(rev, int):
66 66 raise ValueError('expected int')
67 67
68 68
69 69 class simplestoreerror(error.StorageError):
70 70 pass
71 71
72 72
73 73 @interfaceutil.implementer(repository.irevisiondelta)
74 74 @attr.s(slots=True)
75 75 class simplestorerevisiondelta(object):
76 76 node = attr.ib()
77 77 p1node = attr.ib()
78 78 p2node = attr.ib()
79 79 basenode = attr.ib()
80 80 flags = attr.ib()
81 81 baserevisionsize = attr.ib()
82 82 revision = attr.ib()
83 83 delta = attr.ib()
84 84 linknode = attr.ib(default=None)
85 85
86 86
87 87 @interfaceutil.implementer(repository.iverifyproblem)
88 88 @attr.s(frozen=True)
89 89 class simplefilestoreproblem(object):
90 90 warning = attr.ib(default=None)
91 91 error = attr.ib(default=None)
92 92 node = attr.ib(default=None)
93 93
94 94
95 95 @interfaceutil.implementer(repository.ifilestorage)
96 96 class filestorage(object):
97 97 """Implements storage for a tracked path.
98 98
99 99 Data is stored in the VFS in a directory corresponding to the tracked
100 100 path.
101 101
102 102 Index data is stored in an ``index`` file using CBOR.
103 103
104 104 Fulltext data is stored in files having names of the node.
105 105 """
106 106
107 107 _flagserrorclass = simplestoreerror
108 108
109 109 def __init__(self, svfs, path):
110 110 self._svfs = svfs
111 111 self._path = path
112 112
113 113 self._storepath = b'/'.join([b'data', path])
114 114 self._indexpath = b'/'.join([self._storepath, b'index'])
115 115
116 116 indexdata = self._svfs.tryread(self._indexpath)
117 117 if indexdata:
118 118 indexdata = cborutil.decodeall(indexdata)
119 119
120 120 self._indexdata = indexdata or []
121 121 self._indexbynode = {}
122 122 self._indexbyrev = {}
123 123 self._index = []
124 124 self._refreshindex()
125 125
126 126 self._flagprocessors = dict(flagutil.flagprocessors)
127 127
128 128 def _refreshindex(self):
129 129 self._indexbynode.clear()
130 130 self._indexbyrev.clear()
131 131 self._index = []
132 132
133 133 for i, entry in enumerate(self._indexdata):
134 134 self._indexbynode[entry[b'node']] = entry
135 135 self._indexbyrev[i] = entry
136 136
137 137 self._indexbynode[nullid] = {
138 138 b'node': nullid,
139 139 b'p1': nullid,
140 140 b'p2': nullid,
141 141 b'linkrev': nullrev,
142 142 b'flags': 0,
143 143 }
144 144
145 145 self._indexbyrev[nullrev] = {
146 146 b'node': nullid,
147 147 b'p1': nullid,
148 148 b'p2': nullid,
149 149 b'linkrev': nullrev,
150 150 b'flags': 0,
151 151 }
152 152
153 153 for i, entry in enumerate(self._indexdata):
154 154 p1rev, p2rev = self.parentrevs(self.rev(entry[b'node']))
155 155
156 156 # start, length, rawsize, chainbase, linkrev, p1, p2, node
157 157 self._index.append(
158 158 (0, 0, 0, -1, entry[b'linkrev'], p1rev, p2rev, entry[b'node'])
159 159 )
160 160
161 161 self._index.append((0, 0, 0, -1, -1, -1, -1, nullid))
162 162
163 163 def __len__(self):
164 164 return len(self._indexdata)
165 165
166 166 def __iter__(self):
167 167 return iter(range(len(self)))
168 168
169 169 def revs(self, start=0, stop=None):
170 170 step = 1
171 171 if stop is not None:
172 172 if start > stop:
173 173 step = -1
174 174
175 175 stop += step
176 176 else:
177 177 stop = len(self)
178 178
179 179 return range(start, stop, step)
180 180
181 181 def parents(self, node):
182 182 validatenode(node)
183 183
184 184 if node not in self._indexbynode:
185 185 raise KeyError('unknown node')
186 186
187 187 entry = self._indexbynode[node]
188 188
189 189 return entry[b'p1'], entry[b'p2']
190 190
191 191 def parentrevs(self, rev):
192 192 p1, p2 = self.parents(self._indexbyrev[rev][b'node'])
193 193 return self.rev(p1), self.rev(p2)
194 194
195 195 def rev(self, node):
196 196 validatenode(node)
197 197
198 198 try:
199 199 self._indexbynode[node]
200 200 except KeyError:
201 201 raise error.LookupError(node, self._indexpath, _('no node'))
202 202
203 203 for rev, entry in self._indexbyrev.items():
204 204 if entry[b'node'] == node:
205 205 return rev
206 206
207 207 raise error.ProgrammingError(b'this should not occur')
208 208
209 209 def node(self, rev):
210 210 validaterev(rev)
211 211
212 212 return self._indexbyrev[rev][b'node']
213 213
214 214 def hasnode(self, node):
215 215 validatenode(node)
216 216 return node in self._indexbynode
217 217
218 218 def censorrevision(self, tr, censornode, tombstone=b''):
219 219 raise NotImplementedError('TODO')
220 220
221 221 def lookup(self, node):
222 222 if isinstance(node, int):
223 223 return self.node(node)
224 224
225 225 if len(node) == 20:
226 226 self.rev(node)
227 227 return node
228 228
229 229 try:
230 230 rev = int(node)
231 231 if '%d' % rev != node:
232 232 raise ValueError
233 233
234 234 if rev < 0:
235 235 rev = len(self) + rev
236 236 if rev < 0 or rev >= len(self):
237 237 raise ValueError
238 238
239 239 return self.node(rev)
240 240 except (ValueError, OverflowError):
241 241 pass
242 242
243 243 if len(node) == 40:
244 244 try:
245 245 rawnode = bin(node)
246 246 self.rev(rawnode)
247 247 return rawnode
248 248 except TypeError:
249 249 pass
250 250
251 251 raise error.LookupError(node, self._path, _('invalid lookup input'))
252 252
253 253 def linkrev(self, rev):
254 254 validaterev(rev)
255 255
256 256 return self._indexbyrev[rev][b'linkrev']
257 257
258 258 def _flags(self, rev):
259 259 validaterev(rev)
260 260
261 261 return self._indexbyrev[rev][b'flags']
262 262
263 263 def _candelta(self, baserev, rev):
264 264 validaterev(baserev)
265 265 validaterev(rev)
266 266
267 267 if (self._flags(baserev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS) or (
268 268 self._flags(rev) & revlog.REVIDX_RAWTEXT_CHANGING_FLAGS
269 269 ):
270 270 return False
271 271
272 272 return True
273 273
274 274 def checkhash(self, text, node, p1=None, p2=None, rev=None):
275 275 if p1 is None and p2 is None:
276 276 p1, p2 = self.parents(node)
277 277 if node != storageutil.hashrevisionsha1(text, p1, p2):
278 278 raise simplestoreerror(
279 279 _("integrity check failed on %s") % self._path
280 280 )
281 281
282 282 def revision(self, nodeorrev, raw=False):
283 283 if isinstance(nodeorrev, int):
284 284 node = self.node(nodeorrev)
285 285 else:
286 286 node = nodeorrev
287 287 validatenode(node)
288 288
289 289 if node == nullid:
290 290 return b''
291 291
292 292 rev = self.rev(node)
293 293 flags = self._flags(rev)
294 294
295 295 path = b'/'.join([self._storepath, hex(node)])
296 296 rawtext = self._svfs.read(path)
297 297
298 298 if raw:
299 299 validatehash = flagutil.processflagsraw(self, rawtext, flags)
300 300 text = rawtext
301 301 else:
302 302 r = flagutil.processflagsread(self, rawtext, flags)
303 303 text, validatehash = r
304 304 if validatehash:
305 305 self.checkhash(text, node, rev=rev)
306 306
307 307 return text
308 308
309 309 def rawdata(self, nodeorrev):
310 310 return self.revision(raw=True)
311 311
312 312 def read(self, node):
313 313 validatenode(node)
314 314
315 315 revision = self.revision(node)
316 316
317 317 if not revision.startswith(b'\1\n'):
318 318 return revision
319 319
320 320 start = revision.index(b'\1\n', 2)
321 321 return revision[start + 2 :]
322 322
323 323 def renamed(self, node):
324 324 validatenode(node)
325 325
326 326 if self.parents(node)[0] != nullid:
327 327 return False
328 328
329 329 fulltext = self.revision(node)
330 330 m = storageutil.parsemeta(fulltext)[0]
331 331
332 332 if m and 'copy' in m:
333 333 return m['copy'], bin(m['copyrev'])
334 334
335 335 return False
336 336
337 337 def cmp(self, node, text):
338 338 validatenode(node)
339 339
340 340 t = text
341 341
342 342 if text.startswith(b'\1\n'):
343 343 t = b'\1\n\1\n' + text
344 344
345 345 p1, p2 = self.parents(node)
346 346
347 347 if storageutil.hashrevisionsha1(t, p1, p2) == node:
348 348 return False
349 349
350 350 if self.iscensored(self.rev(node)):
351 351 return text != b''
352 352
353 353 if self.renamed(node):
354 354 t2 = self.read(node)
355 355 return t2 != text
356 356
357 357 return True
358 358
359 359 def size(self, rev):
360 360 validaterev(rev)
361 361
362 362 node = self._indexbyrev[rev][b'node']
363 363
364 364 if self.renamed(node):
365 365 return len(self.read(node))
366 366
367 367 if self.iscensored(rev):
368 368 return 0
369 369
370 370 return len(self.revision(node))
371 371
372 372 def iscensored(self, rev):
373 373 validaterev(rev)
374 374
375 375 return self._flags(rev) & repository.REVISION_FLAG_CENSORED
376 376
377 377 def commonancestorsheads(self, a, b):
378 378 validatenode(a)
379 379 validatenode(b)
380 380
381 381 a = self.rev(a)
382 382 b = self.rev(b)
383 383
384 384 ancestors = ancestor.commonancestorsheads(self.parentrevs, a, b)
385 385 return pycompat.maplist(self.node, ancestors)
386 386
387 387 def descendants(self, revs):
388 388 # This is a copy of revlog.descendants()
389 389 first = min(revs)
390 390 if first == nullrev:
391 391 for i in self:
392 392 yield i
393 393 return
394 394
395 395 seen = set(revs)
396 396 for i in self.revs(start=first + 1):
397 397 for x in self.parentrevs(i):
398 398 if x != nullrev and x in seen:
399 399 seen.add(i)
400 400 yield i
401 401 break
402 402
403 403 # Required by verify.
404 404 def files(self):
405 405 entries = self._svfs.listdir(self._storepath)
406 406
407 407 # Strip out undo.backup.* files created as part of transaction
408 408 # recording.
409 409 entries = [f for f in entries if not f.startswith('undo.backup.')]
410 410
411 411 return [b'/'.join((self._storepath, f)) for f in entries]
412 412
413 413 def storageinfo(
414 414 self,
415 415 exclusivefiles=False,
416 416 sharedfiles=False,
417 417 revisionscount=False,
418 418 trackedsize=False,
419 419 storedsize=False,
420 420 ):
421 421 # TODO do a real implementation of this
422 422 return {
423 423 'exclusivefiles': [],
424 424 'sharedfiles': [],
425 425 'revisionscount': len(self),
426 426 'trackedsize': 0,
427 427 'storedsize': None,
428 428 }
429 429
430 430 def verifyintegrity(self, state):
431 431 state['skipread'] = set()
432 432 for rev in self:
433 433 node = self.node(rev)
434 434 try:
435 435 self.revision(node)
436 436 except Exception as e:
437 437 yield simplefilestoreproblem(
438 438 error='unpacking %s: %s' % (node, e), node=node
439 439 )
440 440 state['skipread'].add(node)
441 441
442 442 def emitrevisions(
443 443 self,
444 444 nodes,
445 445 nodesorder=None,
446 446 revisiondata=False,
447 447 assumehaveparentrevisions=False,
448 448 deltamode=repository.CG_DELTAMODE_STD,
449 sidedata_helpers=None,
449 450 ):
450 451 # TODO this will probably break on some ordering options.
451 452 nodes = [n for n in nodes if n != nullid]
452 453 if not nodes:
453 454 return
454 455 for delta in storageutil.emitrevisions(
455 456 self,
456 457 nodes,
457 458 nodesorder,
458 459 simplestorerevisiondelta,
459 460 revisiondata=revisiondata,
460 461 assumehaveparentrevisions=assumehaveparentrevisions,
461 462 deltamode=deltamode,
463 sidedata_helpers=sidedata_helpers,
462 464 ):
463 465 yield delta
464 466
465 467 def add(self, text, meta, transaction, linkrev, p1, p2):
466 468 if meta or text.startswith(b'\1\n'):
467 469 text = storageutil.packmeta(meta, text)
468 470
469 471 return self.addrevision(text, transaction, linkrev, p1, p2)
470 472
471 473 def addrevision(
472 474 self,
473 475 text,
474 476 transaction,
475 477 linkrev,
476 478 p1,
477 479 p2,
478 480 node=None,
479 481 flags=revlog.REVIDX_DEFAULT_FLAGS,
480 482 cachedelta=None,
481 483 ):
482 484 validatenode(p1)
483 485 validatenode(p2)
484 486
485 487 if flags:
486 488 node = node or storageutil.hashrevisionsha1(text, p1, p2)
487 489
488 490 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
489 491
490 492 node = node or storageutil.hashrevisionsha1(text, p1, p2)
491 493
492 494 if node in self._indexbynode:
493 495 return node
494 496
495 497 if validatehash:
496 498 self.checkhash(rawtext, node, p1=p1, p2=p2)
497 499
498 500 return self._addrawrevision(
499 501 node, rawtext, transaction, linkrev, p1, p2, flags
500 502 )
501 503
502 504 def _addrawrevision(self, node, rawtext, transaction, link, p1, p2, flags):
503 505 transaction.addbackup(self._indexpath)
504 506
505 507 path = b'/'.join([self._storepath, hex(node)])
506 508
507 509 self._svfs.write(path, rawtext)
508 510
509 511 self._indexdata.append(
510 512 {
511 513 b'node': node,
512 514 b'p1': p1,
513 515 b'p2': p2,
514 516 b'linkrev': link,
515 517 b'flags': flags,
516 518 }
517 519 )
518 520
519 521 self._reflectindexupdate()
520 522
521 523 return node
522 524
523 525 def _reflectindexupdate(self):
524 526 self._refreshindex()
525 527 self._svfs.write(
526 528 self._indexpath, ''.join(cborutil.streamencode(self._indexdata))
527 529 )
528 530
529 531 def addgroup(
530 532 self,
531 533 deltas,
532 534 linkmapper,
533 535 transaction,
534 536 addrevisioncb=None,
535 537 duplicaterevisioncb=None,
536 538 maybemissingparents=False,
537 539 ):
538 540 if maybemissingparents:
539 541 raise error.Abort(
540 542 _('simple store does not support missing parents ' 'write mode')
541 543 )
542 544
543 545 empty = True
544 546
545 547 transaction.addbackup(self._indexpath)
546 548
547 549 for node, p1, p2, linknode, deltabase, delta, flags in deltas:
548 550 linkrev = linkmapper(linknode)
549 551 flags = flags or revlog.REVIDX_DEFAULT_FLAGS
550 552
551 553 if node in self._indexbynode:
552 554 if duplicaterevisioncb:
553 555 duplicaterevisioncb(self, self.rev(node))
554 556 empty = False
555 557 continue
556 558
557 559 # Need to resolve the fulltext from the delta base.
558 560 if deltabase == nullid:
559 561 text = mdiff.patch(b'', delta)
560 562 else:
561 563 text = mdiff.patch(self.revision(deltabase), delta)
562 564
563 565 rev = self._addrawrevision(
564 566 node, text, transaction, linkrev, p1, p2, flags
565 567 )
566 568
567 569 if addrevisioncb:
568 570 addrevisioncb(self, rev)
569 571 empty = False
570 572 return not empty
571 573
572 574 def _headrevs(self):
573 575 # Assume all revisions are heads by default.
574 576 revishead = {rev: True for rev in self._indexbyrev}
575 577
576 578 for rev, entry in self._indexbyrev.items():
577 579 # Unset head flag for all seen parents.
578 580 revishead[self.rev(entry[b'p1'])] = False
579 581 revishead[self.rev(entry[b'p2'])] = False
580 582
581 583 return [rev for rev, ishead in sorted(revishead.items()) if ishead]
582 584
583 585 def heads(self, start=None, stop=None):
584 586 # This is copied from revlog.py.
585 587 if start is None and stop is None:
586 588 if not len(self):
587 589 return [nullid]
588 590 return [self.node(r) for r in self._headrevs()]
589 591
590 592 if start is None:
591 593 start = nullid
592 594 if stop is None:
593 595 stop = []
594 596 stoprevs = {self.rev(n) for n in stop}
595 597 startrev = self.rev(start)
596 598 reachable = {startrev}
597 599 heads = {startrev}
598 600
599 601 parentrevs = self.parentrevs
600 602 for r in self.revs(start=startrev + 1):
601 603 for p in parentrevs(r):
602 604 if p in reachable:
603 605 if r not in stoprevs:
604 606 reachable.add(r)
605 607 heads.add(r)
606 608 if p in heads and p not in stoprevs:
607 609 heads.remove(p)
608 610
609 611 return [self.node(r) for r in heads]
610 612
611 613 def children(self, node):
612 614 validatenode(node)
613 615
614 616 # This is a copy of revlog.children().
615 617 c = []
616 618 p = self.rev(node)
617 619 for r in self.revs(start=p + 1):
618 620 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
619 621 if prevs:
620 622 for pr in prevs:
621 623 if pr == p:
622 624 c.append(self.node(r))
623 625 elif p == nullrev:
624 626 c.append(self.node(r))
625 627 return c
626 628
627 629 def getstrippoint(self, minlink):
628 630 return storageutil.resolvestripinfo(
629 631 minlink,
630 632 len(self) - 1,
631 633 self._headrevs(),
632 634 self.linkrev,
633 635 self.parentrevs,
634 636 )
635 637
636 638 def strip(self, minlink, transaction):
637 639 if not len(self):
638 640 return
639 641
640 642 rev, _ignored = self.getstrippoint(minlink)
641 643 if rev == len(self):
642 644 return
643 645
644 646 # Purge index data starting at the requested revision.
645 647 self._indexdata[rev:] = []
646 648 self._reflectindexupdate()
647 649
648 650
649 651 def issimplestorefile(f, kind, st):
650 652 if kind != stat.S_IFREG:
651 653 return False
652 654
653 655 if store.isrevlog(f, kind, st):
654 656 return False
655 657
656 658 # Ignore transaction undo files.
657 659 if f.startswith('undo.'):
658 660 return False
659 661
660 662 # Otherwise assume it belongs to the simple store.
661 663 return True
662 664
663 665
664 666 class simplestore(store.encodedstore):
665 667 def datafiles(self):
666 668 for x in super(simplestore, self).datafiles():
667 669 yield x
668 670
669 671 # Supplement with non-revlog files.
670 672 extrafiles = self._walk('data', True, filefilter=issimplestorefile)
671 673
672 674 for unencoded, encoded, size in extrafiles:
673 675 try:
674 676 unencoded = store.decodefilename(unencoded)
675 677 except KeyError:
676 678 unencoded = None
677 679
678 680 yield unencoded, encoded, size
679 681
680 682
681 683 def reposetup(ui, repo):
682 684 if not repo.local():
683 685 return
684 686
685 687 if isinstance(repo, bundlerepo.bundlerepository):
686 688 raise error.Abort(_('cannot use simple store with bundlerepo'))
687 689
688 690 class simplestorerepo(repo.__class__):
689 691 def file(self, f):
690 692 return filestorage(self.svfs, f)
691 693
692 694 repo.__class__ = simplestorerepo
693 695
694 696
695 697 def featuresetup(ui, supported):
696 698 supported.add(REQUIREMENT)
697 699
698 700
699 701 def newreporequirements(orig, ui, createopts):
700 702 """Modifies default requirements for new repos to use the simple store."""
701 703 requirements = orig(ui, createopts)
702 704
703 705 # These requirements are only used to affect creation of the store
704 706 # object. We have our own store. So we can remove them.
705 707 # TODO do this once we feel like taking the test hit.
706 708 # if 'fncache' in requirements:
707 709 # requirements.remove('fncache')
708 710 # if 'dotencode' in requirements:
709 711 # requirements.remove('dotencode')
710 712
711 713 requirements.add(REQUIREMENT)
712 714
713 715 return requirements
714 716
715 717
716 718 def makestore(orig, requirements, path, vfstype):
717 719 if REQUIREMENT not in requirements:
718 720 return orig(requirements, path, vfstype)
719 721
720 722 return simplestore(path, vfstype)
721 723
722 724
723 725 def verifierinit(orig, self, *args, **kwargs):
724 726 orig(self, *args, **kwargs)
725 727
726 728 # We don't care that files in the store don't align with what is
727 729 # advertised. So suppress these warnings.
728 730 self.warnorphanstorefiles = False
729 731
730 732
731 733 def extsetup(ui):
732 734 localrepo.featuresetupfuncs.add(featuresetup)
733 735
734 736 extensions.wrapfunction(
735 737 localrepo, 'newreporequirements', newreporequirements
736 738 )
737 739 extensions.wrapfunction(localrepo, 'makestore', makestore)
738 740 extensions.wrapfunction(verify.verifier, '__init__', verifierinit)
General Comments 0
You need to be logged in to leave comments. Login now