##// END OF EJS Templates
revlog: change addrevision to return the new revision, not node...
Joerg Sonnenberger -
r47236:b38ac143 default draft
parent child Browse files
Show More
@@ -1,1299 +1,1301 b''
1 1 # sqlitestore.py - Storage backend that uses SQLite
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """store repository data in SQLite (EXPERIMENTAL)
9 9
10 10 The sqlitestore extension enables the storage of repository data in SQLite.
11 11
12 12 This extension is HIGHLY EXPERIMENTAL. There are NO BACKWARDS COMPATIBILITY
13 13 GUARANTEES. This means that repositories created with this extension may
14 14 only be usable with the exact version of this extension/Mercurial that was
15 15 used. The extension attempts to enforce this in order to prevent repository
16 16 corruption.
17 17
18 18 In addition, several features are not yet supported or have known bugs:
19 19
20 20 * Only some data is stored in SQLite. Changeset, manifest, and other repository
21 21 data is not yet stored in SQLite.
22 22 * Transactions are not robust. If the process is aborted at the right time
23 23 during transaction close/rollback, the repository could be in an inconsistent
24 24 state. This problem will diminish once all repository data is tracked by
25 25 SQLite.
26 26 * Bundle repositories do not work (the ability to use e.g.
27 27 `hg -R <bundle-file> log` to automatically overlay a bundle on top of the
28 28 existing repository).
29 29 * Various other features don't work.
30 30
31 31 This extension should work for basic clone/pull, update, and commit workflows.
32 32 Some history rewriting operations may fail due to lack of support for bundle
33 33 repositories.
34 34
35 35 To use, activate the extension and set the ``storage.new-repo-backend`` config
36 36 option to ``sqlite`` to enable new repositories to use SQLite for storage.
37 37 """
38 38
39 39 # To run the test suite with repos using SQLite by default, execute the
40 40 # following:
41 41 #
42 42 # HGREPOFEATURES="sqlitestore" run-tests.py \
43 43 # --extra-config-opt extensions.sqlitestore= \
44 44 # --extra-config-opt storage.new-repo-backend=sqlite
45 45
46 46 from __future__ import absolute_import
47 47
48 48 import sqlite3
49 49 import struct
50 50 import threading
51 51 import zlib
52 52
53 53 from mercurial.i18n import _
54 54 from mercurial.node import (
55 55 nullid,
56 56 nullrev,
57 57 short,
58 58 )
59 59 from mercurial.thirdparty import attr
60 60 from mercurial import (
61 61 ancestor,
62 62 dagop,
63 63 encoding,
64 64 error,
65 65 extensions,
66 66 localrepo,
67 67 mdiff,
68 68 pycompat,
69 69 registrar,
70 70 requirements,
71 71 util,
72 72 verify,
73 73 )
74 74 from mercurial.interfaces import (
75 75 repository,
76 76 util as interfaceutil,
77 77 )
78 78 from mercurial.utils import (
79 79 hashutil,
80 80 storageutil,
81 81 )
82 82
83 83 try:
84 84 from mercurial import zstd
85 85
86 86 zstd.__version__
87 87 except ImportError:
88 88 zstd = None
89 89
90 90 configtable = {}
91 91 configitem = registrar.configitem(configtable)
92 92
93 93 # experimental config: storage.sqlite.compression
94 94 configitem(
95 95 b'storage',
96 96 b'sqlite.compression',
97 97 default=b'zstd' if zstd else b'zlib',
98 98 experimental=True,
99 99 )
100 100
101 101 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
102 102 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
103 103 # be specifying the version(s) of Mercurial they are tested with, or
104 104 # leave the attribute unspecified.
105 105 testedwith = b'ships-with-hg-core'
106 106
107 107 REQUIREMENT = b'exp-sqlite-001'
108 108 REQUIREMENT_ZSTD = b'exp-sqlite-comp-001=zstd'
109 109 REQUIREMENT_ZLIB = b'exp-sqlite-comp-001=zlib'
110 110 REQUIREMENT_NONE = b'exp-sqlite-comp-001=none'
111 111 REQUIREMENT_SHALLOW_FILES = b'exp-sqlite-shallow-files'
112 112
113 113 CURRENT_SCHEMA_VERSION = 1
114 114
115 115 COMPRESSION_NONE = 1
116 116 COMPRESSION_ZSTD = 2
117 117 COMPRESSION_ZLIB = 3
118 118
119 119 FLAG_CENSORED = 1
120 120 FLAG_MISSING_P1 = 2
121 121 FLAG_MISSING_P2 = 4
122 122
123 123 CREATE_SCHEMA = [
124 124 # Deltas are stored as content-indexed blobs.
125 125 # compression column holds COMPRESSION_* constant for how the
126 126 # delta is encoded.
127 127 'CREATE TABLE delta ('
128 128 ' id INTEGER PRIMARY KEY, '
129 129 ' compression INTEGER NOT NULL, '
130 130 ' hash BLOB UNIQUE ON CONFLICT ABORT, '
131 131 ' delta BLOB NOT NULL '
132 132 ')',
133 133 # Tracked paths are denormalized to integers to avoid redundant
134 134 # storage of the path name.
135 135 'CREATE TABLE filepath ('
136 136 ' id INTEGER PRIMARY KEY, '
137 137 ' path BLOB NOT NULL '
138 138 ')',
139 139 'CREATE UNIQUE INDEX filepath_path ON filepath (path)',
140 140 # We have a single table for all file revision data.
141 141 # Each file revision is uniquely described by a (path, rev) and
142 142 # (path, node).
143 143 #
144 144 # Revision data is stored as a pointer to the delta producing this
145 145 # revision and the file revision whose delta should be applied before
146 146 # that one. One can reconstruct the delta chain by recursively following
147 147 # the delta base revision pointers until one encounters NULL.
148 148 #
149 149 # flags column holds bitwise integer flags controlling storage options.
150 150 # These flags are defined by the FLAG_* constants.
151 151 'CREATE TABLE fileindex ('
152 152 ' id INTEGER PRIMARY KEY, '
153 153 ' pathid INTEGER REFERENCES filepath(id), '
154 154 ' revnum INTEGER NOT NULL, '
155 155 ' p1rev INTEGER NOT NULL, '
156 156 ' p2rev INTEGER NOT NULL, '
157 157 ' linkrev INTEGER NOT NULL, '
158 158 ' flags INTEGER NOT NULL, '
159 159 ' deltaid INTEGER REFERENCES delta(id), '
160 160 ' deltabaseid INTEGER REFERENCES fileindex(id), '
161 161 ' node BLOB NOT NULL '
162 162 ')',
163 163 'CREATE UNIQUE INDEX fileindex_pathrevnum '
164 164 ' ON fileindex (pathid, revnum)',
165 165 'CREATE UNIQUE INDEX fileindex_pathnode ON fileindex (pathid, node)',
166 166 # Provide a view over all file data for convenience.
167 167 'CREATE VIEW filedata AS '
168 168 'SELECT '
169 169 ' fileindex.id AS id, '
170 170 ' filepath.id AS pathid, '
171 171 ' filepath.path AS path, '
172 172 ' fileindex.revnum AS revnum, '
173 173 ' fileindex.node AS node, '
174 174 ' fileindex.p1rev AS p1rev, '
175 175 ' fileindex.p2rev AS p2rev, '
176 176 ' fileindex.linkrev AS linkrev, '
177 177 ' fileindex.flags AS flags, '
178 178 ' fileindex.deltaid AS deltaid, '
179 179 ' fileindex.deltabaseid AS deltabaseid '
180 180 'FROM filepath, fileindex '
181 181 'WHERE fileindex.pathid=filepath.id',
182 182 'PRAGMA user_version=%d' % CURRENT_SCHEMA_VERSION,
183 183 ]
184 184
185 185
186 186 def resolvedeltachain(db, pathid, node, revisioncache, stoprids, zstddctx=None):
187 187 """Resolve a delta chain for a file node."""
188 188
189 189 # TODO the "not in ({stops})" here is possibly slowing down the query
190 190 # because it needs to perform the lookup on every recursive invocation.
191 191 # This could possibly be faster if we created a temporary query with
192 192 # baseid "poisoned" to null and limited the recursive filter to
193 193 # "is not null".
194 194 res = db.execute(
195 195 'WITH RECURSIVE '
196 196 ' deltachain(deltaid, baseid) AS ('
197 197 ' SELECT deltaid, deltabaseid FROM fileindex '
198 198 ' WHERE pathid=? AND node=? '
199 199 ' UNION ALL '
200 200 ' SELECT fileindex.deltaid, deltabaseid '
201 201 ' FROM fileindex, deltachain '
202 202 ' WHERE '
203 203 ' fileindex.id=deltachain.baseid '
204 204 ' AND deltachain.baseid IS NOT NULL '
205 205 ' AND fileindex.id NOT IN ({stops}) '
206 206 ' ) '
207 207 'SELECT deltachain.baseid, compression, delta '
208 208 'FROM deltachain, delta '
209 209 'WHERE delta.id=deltachain.deltaid'.format(
210 210 stops=','.join(['?'] * len(stoprids))
211 211 ),
212 212 tuple([pathid, node] + list(stoprids.keys())),
213 213 )
214 214
215 215 deltas = []
216 216 lastdeltabaseid = None
217 217
218 218 for deltabaseid, compression, delta in res:
219 219 lastdeltabaseid = deltabaseid
220 220
221 221 if compression == COMPRESSION_ZSTD:
222 222 delta = zstddctx.decompress(delta)
223 223 elif compression == COMPRESSION_NONE:
224 224 delta = delta
225 225 elif compression == COMPRESSION_ZLIB:
226 226 delta = zlib.decompress(delta)
227 227 else:
228 228 raise SQLiteStoreError(
229 229 b'unhandled compression type: %d' % compression
230 230 )
231 231
232 232 deltas.append(delta)
233 233
234 234 if lastdeltabaseid in stoprids:
235 235 basetext = revisioncache[stoprids[lastdeltabaseid]]
236 236 else:
237 237 basetext = deltas.pop()
238 238
239 239 deltas.reverse()
240 240 fulltext = mdiff.patches(basetext, deltas)
241 241
242 242 # SQLite returns buffer instances for blob columns on Python 2. This
243 243 # type can propagate through the delta application layer. Because
244 244 # downstream callers assume revisions are bytes, cast as needed.
245 245 if not isinstance(fulltext, bytes):
246 246 fulltext = bytes(delta)
247 247
248 248 return fulltext
249 249
250 250
251 251 def insertdelta(db, compression, hash, delta):
252 252 try:
253 253 return db.execute(
254 254 'INSERT INTO delta (compression, hash, delta) VALUES (?, ?, ?)',
255 255 (compression, hash, delta),
256 256 ).lastrowid
257 257 except sqlite3.IntegrityError:
258 258 return db.execute(
259 259 'SELECT id FROM delta WHERE hash=?', (hash,)
260 260 ).fetchone()[0]
261 261
262 262
263 263 class SQLiteStoreError(error.StorageError):
264 264 pass
265 265
266 266
267 267 @attr.s
268 268 class revisionentry(object):
269 269 rid = attr.ib()
270 270 rev = attr.ib()
271 271 node = attr.ib()
272 272 p1rev = attr.ib()
273 273 p2rev = attr.ib()
274 274 p1node = attr.ib()
275 275 p2node = attr.ib()
276 276 linkrev = attr.ib()
277 277 flags = attr.ib()
278 278
279 279
280 280 @interfaceutil.implementer(repository.irevisiondelta)
281 281 @attr.s(slots=True)
282 282 class sqliterevisiondelta(object):
283 283 node = attr.ib()
284 284 p1node = attr.ib()
285 285 p2node = attr.ib()
286 286 basenode = attr.ib()
287 287 flags = attr.ib()
288 288 baserevisionsize = attr.ib()
289 289 revision = attr.ib()
290 290 delta = attr.ib()
291 291 linknode = attr.ib(default=None)
292 292
293 293
294 294 @interfaceutil.implementer(repository.iverifyproblem)
295 295 @attr.s(frozen=True)
296 296 class sqliteproblem(object):
297 297 warning = attr.ib(default=None)
298 298 error = attr.ib(default=None)
299 299 node = attr.ib(default=None)
300 300
301 301
302 302 @interfaceutil.implementer(repository.ifilestorage)
303 303 class sqlitefilestore(object):
304 304 """Implements storage for an individual tracked path."""
305 305
306 306 def __init__(self, db, path, compression):
307 307 self._db = db
308 308 self._path = path
309 309
310 310 self._pathid = None
311 311
312 312 # revnum -> node
313 313 self._revtonode = {}
314 314 # node -> revnum
315 315 self._nodetorev = {}
316 316 # node -> data structure
317 317 self._revisions = {}
318 318
319 319 self._revisioncache = util.lrucachedict(10)
320 320
321 321 self._compengine = compression
322 322
323 323 if compression == b'zstd':
324 324 self._cctx = zstd.ZstdCompressor(level=3)
325 325 self._dctx = zstd.ZstdDecompressor()
326 326 else:
327 327 self._cctx = None
328 328 self._dctx = None
329 329
330 330 self._refreshindex()
331 331
332 332 def _refreshindex(self):
333 333 self._revtonode = {}
334 334 self._nodetorev = {}
335 335 self._revisions = {}
336 336
337 337 res = list(
338 338 self._db.execute(
339 339 'SELECT id FROM filepath WHERE path=?', (self._path,)
340 340 )
341 341 )
342 342
343 343 if not res:
344 344 self._pathid = None
345 345 return
346 346
347 347 self._pathid = res[0][0]
348 348
349 349 res = self._db.execute(
350 350 'SELECT id, revnum, node, p1rev, p2rev, linkrev, flags '
351 351 'FROM fileindex '
352 352 'WHERE pathid=? '
353 353 'ORDER BY revnum ASC',
354 354 (self._pathid,),
355 355 )
356 356
357 357 for i, row in enumerate(res):
358 358 rid, rev, node, p1rev, p2rev, linkrev, flags = row
359 359
360 360 if i != rev:
361 361 raise SQLiteStoreError(
362 362 _(b'sqlite database has inconsistent revision numbers')
363 363 )
364 364
365 365 if p1rev == nullrev:
366 366 p1node = nullid
367 367 else:
368 368 p1node = self._revtonode[p1rev]
369 369
370 370 if p2rev == nullrev:
371 371 p2node = nullid
372 372 else:
373 373 p2node = self._revtonode[p2rev]
374 374
375 375 entry = revisionentry(
376 376 rid=rid,
377 377 rev=rev,
378 378 node=node,
379 379 p1rev=p1rev,
380 380 p2rev=p2rev,
381 381 p1node=p1node,
382 382 p2node=p2node,
383 383 linkrev=linkrev,
384 384 flags=flags,
385 385 )
386 386
387 387 self._revtonode[rev] = node
388 388 self._nodetorev[node] = rev
389 389 self._revisions[node] = entry
390 390
391 391 # Start of ifileindex interface.
392 392
393 393 def __len__(self):
394 394 return len(self._revisions)
395 395
396 396 def __iter__(self):
397 397 return iter(pycompat.xrange(len(self._revisions)))
398 398
399 399 def hasnode(self, node):
400 400 if node == nullid:
401 401 return False
402 402
403 403 return node in self._nodetorev
404 404
405 405 def revs(self, start=0, stop=None):
406 406 return storageutil.iterrevs(
407 407 len(self._revisions), start=start, stop=stop
408 408 )
409 409
410 410 def parents(self, node):
411 411 if node == nullid:
412 412 return nullid, nullid
413 413
414 414 if node not in self._revisions:
415 415 raise error.LookupError(node, self._path, _(b'no node'))
416 416
417 417 entry = self._revisions[node]
418 418 return entry.p1node, entry.p2node
419 419
420 420 def parentrevs(self, rev):
421 421 if rev == nullrev:
422 422 return nullrev, nullrev
423 423
424 424 if rev not in self._revtonode:
425 425 raise IndexError(rev)
426 426
427 427 entry = self._revisions[self._revtonode[rev]]
428 428 return entry.p1rev, entry.p2rev
429 429
430 430 def rev(self, node):
431 431 if node == nullid:
432 432 return nullrev
433 433
434 434 if node not in self._nodetorev:
435 435 raise error.LookupError(node, self._path, _(b'no node'))
436 436
437 437 return self._nodetorev[node]
438 438
439 439 def node(self, rev):
440 440 if rev == nullrev:
441 441 return nullid
442 442
443 443 if rev not in self._revtonode:
444 444 raise IndexError(rev)
445 445
446 446 return self._revtonode[rev]
447 447
448 448 def lookup(self, node):
449 449 return storageutil.fileidlookup(self, node, self._path)
450 450
451 451 def linkrev(self, rev):
452 452 if rev == nullrev:
453 453 return nullrev
454 454
455 455 if rev not in self._revtonode:
456 456 raise IndexError(rev)
457 457
458 458 entry = self._revisions[self._revtonode[rev]]
459 459 return entry.linkrev
460 460
461 461 def iscensored(self, rev):
462 462 if rev == nullrev:
463 463 return False
464 464
465 465 if rev not in self._revtonode:
466 466 raise IndexError(rev)
467 467
468 468 return self._revisions[self._revtonode[rev]].flags & FLAG_CENSORED
469 469
470 470 def commonancestorsheads(self, node1, node2):
471 471 rev1 = self.rev(node1)
472 472 rev2 = self.rev(node2)
473 473
474 474 ancestors = ancestor.commonancestorsheads(self.parentrevs, rev1, rev2)
475 475 return pycompat.maplist(self.node, ancestors)
476 476
477 477 def descendants(self, revs):
478 478 # TODO we could implement this using a recursive SQL query, which
479 479 # might be faster.
480 480 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
481 481
482 482 def heads(self, start=None, stop=None):
483 483 if start is None and stop is None:
484 484 if not len(self):
485 485 return [nullid]
486 486
487 487 startrev = self.rev(start) if start is not None else nullrev
488 488 stoprevs = {self.rev(n) for n in stop or []}
489 489
490 490 revs = dagop.headrevssubset(
491 491 self.revs, self.parentrevs, startrev=startrev, stoprevs=stoprevs
492 492 )
493 493
494 494 return [self.node(rev) for rev in revs]
495 495
496 496 def children(self, node):
497 497 rev = self.rev(node)
498 498
499 499 res = self._db.execute(
500 500 'SELECT'
501 501 ' node '
502 502 ' FROM filedata '
503 503 ' WHERE path=? AND (p1rev=? OR p2rev=?) '
504 504 ' ORDER BY revnum ASC',
505 505 (self._path, rev, rev),
506 506 )
507 507
508 508 return [row[0] for row in res]
509 509
510 510 # End of ifileindex interface.
511 511
512 512 # Start of ifiledata interface.
513 513
514 514 def size(self, rev):
515 515 if rev == nullrev:
516 516 return 0
517 517
518 518 if rev not in self._revtonode:
519 519 raise IndexError(rev)
520 520
521 521 node = self._revtonode[rev]
522 522
523 523 if self.renamed(node):
524 524 return len(self.read(node))
525 525
526 526 return len(self.revision(node))
527 527
528 528 def revision(self, node, raw=False, _verifyhash=True):
529 529 if node in (nullid, nullrev):
530 530 return b''
531 531
532 532 if isinstance(node, int):
533 533 node = self.node(node)
534 534
535 535 if node not in self._nodetorev:
536 536 raise error.LookupError(node, self._path, _(b'no node'))
537 537
538 538 if node in self._revisioncache:
539 539 return self._revisioncache[node]
540 540
541 541 # Because we have a fulltext revision cache, we are able to
542 542 # short-circuit delta chain traversal and decompression as soon as
543 543 # we encounter a revision in the cache.
544 544
545 545 stoprids = {self._revisions[n].rid: n for n in self._revisioncache}
546 546
547 547 if not stoprids:
548 548 stoprids[-1] = None
549 549
550 550 fulltext = resolvedeltachain(
551 551 self._db,
552 552 self._pathid,
553 553 node,
554 554 self._revisioncache,
555 555 stoprids,
556 556 zstddctx=self._dctx,
557 557 )
558 558
559 559 # Don't verify hashes if parent nodes were rewritten, as the hash
560 560 # wouldn't verify.
561 561 if self._revisions[node].flags & (FLAG_MISSING_P1 | FLAG_MISSING_P2):
562 562 _verifyhash = False
563 563
564 564 if _verifyhash:
565 565 self._checkhash(fulltext, node)
566 566 self._revisioncache[node] = fulltext
567 567
568 568 return fulltext
569 569
570 570 def rawdata(self, *args, **kwargs):
571 571 return self.revision(*args, **kwargs)
572 572
573 573 def read(self, node):
574 574 return storageutil.filtermetadata(self.revision(node))
575 575
576 576 def renamed(self, node):
577 577 return storageutil.filerevisioncopied(self, node)
578 578
579 579 def cmp(self, node, fulltext):
580 580 return not storageutil.filedataequivalent(self, node, fulltext)
581 581
582 582 def emitrevisions(
583 583 self,
584 584 nodes,
585 585 nodesorder=None,
586 586 revisiondata=False,
587 587 assumehaveparentrevisions=False,
588 588 deltamode=repository.CG_DELTAMODE_STD,
589 589 ):
590 590 if nodesorder not in (b'nodes', b'storage', b'linear', None):
591 591 raise error.ProgrammingError(
592 592 b'unhandled value for nodesorder: %s' % nodesorder
593 593 )
594 594
595 595 nodes = [n for n in nodes if n != nullid]
596 596
597 597 if not nodes:
598 598 return
599 599
600 600 # TODO perform in a single query.
601 601 res = self._db.execute(
602 602 'SELECT revnum, deltaid FROM fileindex '
603 603 'WHERE pathid=? '
604 604 ' AND node in (%s)' % (','.join(['?'] * len(nodes))),
605 605 tuple([self._pathid] + nodes),
606 606 )
607 607
608 608 deltabases = {}
609 609
610 610 for rev, deltaid in res:
611 611 res = self._db.execute(
612 612 'SELECT revnum from fileindex WHERE pathid=? AND deltaid=?',
613 613 (self._pathid, deltaid),
614 614 )
615 615 deltabases[rev] = res.fetchone()[0]
616 616
617 617 # TODO define revdifffn so we can use delta from storage.
618 618 for delta in storageutil.emitrevisions(
619 619 self,
620 620 nodes,
621 621 nodesorder,
622 622 sqliterevisiondelta,
623 623 deltaparentfn=deltabases.__getitem__,
624 624 revisiondata=revisiondata,
625 625 assumehaveparentrevisions=assumehaveparentrevisions,
626 626 deltamode=deltamode,
627 627 ):
628 628
629 629 yield delta
630 630
631 631 # End of ifiledata interface.
632 632
633 633 # Start of ifilemutation interface.
634 634
635 635 def add(self, filedata, meta, transaction, linkrev, p1, p2):
636 636 if meta or filedata.startswith(b'\x01\n'):
637 637 filedata = storageutil.packmeta(meta, filedata)
638 638
639 return self.addrevision(filedata, transaction, linkrev, p1, p2)
639 rev = self.addrevision(filedata, transaction, linkrev, p1, p2)
640 return self.node(rev)
640 641
641 642 def addrevision(
642 643 self,
643 644 revisiondata,
644 645 transaction,
645 646 linkrev,
646 647 p1,
647 648 p2,
648 649 node=None,
649 650 flags=0,
650 651 cachedelta=None,
651 652 ):
652 653 if flags:
653 654 raise SQLiteStoreError(_(b'flags not supported on revisions'))
654 655
655 656 validatehash = node is not None
656 657 node = node or storageutil.hashrevisionsha1(revisiondata, p1, p2)
657 658
658 659 if validatehash:
659 660 self._checkhash(revisiondata, node, p1, p2)
660 661
661 if node in self._nodetorev:
662 return node
662 rev = self._nodetorev.get(node)
663 if rev is not None:
664 return rev
663 665
664 node = self._addrawrevision(
666 rev = self._addrawrevision(
665 667 node, revisiondata, transaction, linkrev, p1, p2
666 668 )
667 669
668 670 self._revisioncache[node] = revisiondata
669 return node
671 return rev
670 672
671 673 def addgroup(
672 674 self,
673 675 deltas,
674 676 linkmapper,
675 677 transaction,
676 678 addrevisioncb=None,
677 679 duplicaterevisioncb=None,
678 680 maybemissingparents=False,
679 681 ):
680 682 empty = True
681 683
682 684 for node, p1, p2, linknode, deltabase, delta, wireflags in deltas:
683 685 storeflags = 0
684 686
685 687 if wireflags & repository.REVISION_FLAG_CENSORED:
686 688 storeflags |= FLAG_CENSORED
687 689
688 690 if wireflags & ~repository.REVISION_FLAG_CENSORED:
689 691 raise SQLiteStoreError(b'unhandled revision flag')
690 692
691 693 if maybemissingparents:
692 694 if p1 != nullid and not self.hasnode(p1):
693 695 p1 = nullid
694 696 storeflags |= FLAG_MISSING_P1
695 697
696 698 if p2 != nullid and not self.hasnode(p2):
697 699 p2 = nullid
698 700 storeflags |= FLAG_MISSING_P2
699 701
700 702 baserev = self.rev(deltabase)
701 703
702 704 # If base is censored, delta must be full replacement in a single
703 705 # patch operation.
704 706 if baserev != nullrev and self.iscensored(baserev):
705 707 hlen = struct.calcsize(b'>lll')
706 708 oldlen = len(self.rawdata(deltabase, _verifyhash=False))
707 709 newlen = len(delta) - hlen
708 710
709 711 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
710 712 raise error.CensoredBaseError(self._path, deltabase)
711 713
712 714 if not (storeflags & FLAG_CENSORED) and storageutil.deltaiscensored(
713 715 delta, baserev, lambda x: len(self.rawdata(x))
714 716 ):
715 717 storeflags |= FLAG_CENSORED
716 718
717 719 linkrev = linkmapper(linknode)
718 720
719 721 if node in self._revisions:
720 722 # Possibly reset parents to make them proper.
721 723 entry = self._revisions[node]
722 724
723 725 if entry.flags & FLAG_MISSING_P1 and p1 != nullid:
724 726 entry.p1node = p1
725 727 entry.p1rev = self._nodetorev[p1]
726 728 entry.flags &= ~FLAG_MISSING_P1
727 729
728 730 self._db.execute(
729 731 'UPDATE fileindex SET p1rev=?, flags=? WHERE id=?',
730 732 (self._nodetorev[p1], entry.flags, entry.rid),
731 733 )
732 734
733 735 if entry.flags & FLAG_MISSING_P2 and p2 != nullid:
734 736 entry.p2node = p2
735 737 entry.p2rev = self._nodetorev[p2]
736 738 entry.flags &= ~FLAG_MISSING_P2
737 739
738 740 self._db.execute(
739 741 'UPDATE fileindex SET p2rev=?, flags=? WHERE id=?',
740 742 (self._nodetorev[p1], entry.flags, entry.rid),
741 743 )
742 744
743 745 if duplicaterevisioncb:
744 746 duplicaterevisioncb(self, node)
745 747 empty = False
746 748 continue
747 749
748 750 if deltabase == nullid:
749 751 text = mdiff.patch(b'', delta)
750 752 storedelta = None
751 753 else:
752 754 text = None
753 755 storedelta = (deltabase, delta)
754 756
755 757 self._addrawrevision(
756 758 node,
757 759 text,
758 760 transaction,
759 761 linkrev,
760 762 p1,
761 763 p2,
762 764 storedelta=storedelta,
763 765 flags=storeflags,
764 766 )
765 767
766 768 if addrevisioncb:
767 769 addrevisioncb(self, node)
768 770 empty = False
769 771
770 772 return not empty
771 773
772 774 def censorrevision(self, tr, censornode, tombstone=b''):
773 775 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
774 776
775 777 # This restriction is cargo culted from revlogs and makes no sense for
776 778 # SQLite, since columns can be resized at will.
777 779 if len(tombstone) > len(self.rawdata(censornode)):
778 780 raise error.Abort(
779 781 _(b'censor tombstone must be no longer than censored data')
780 782 )
781 783
782 784 # We need to replace the censored revision's data with the tombstone.
783 785 # But replacing that data will have implications for delta chains that
784 786 # reference it.
785 787 #
786 788 # While "better," more complex strategies are possible, we do something
787 789 # simple: we find delta chain children of the censored revision and we
788 790 # replace those incremental deltas with fulltexts of their corresponding
789 791 # revision. Then we delete the now-unreferenced delta and original
790 792 # revision and insert a replacement.
791 793
792 794 # Find the delta to be censored.
793 795 censoreddeltaid = self._db.execute(
794 796 'SELECT deltaid FROM fileindex WHERE id=?',
795 797 (self._revisions[censornode].rid,),
796 798 ).fetchone()[0]
797 799
798 800 # Find all its delta chain children.
799 801 # TODO once we support storing deltas for !files, we'll need to look
800 802 # for those delta chains too.
801 803 rows = list(
802 804 self._db.execute(
803 805 'SELECT id, pathid, node FROM fileindex '
804 806 'WHERE deltabaseid=? OR deltaid=?',
805 807 (censoreddeltaid, censoreddeltaid),
806 808 )
807 809 )
808 810
809 811 for row in rows:
810 812 rid, pathid, node = row
811 813
812 814 fulltext = resolvedeltachain(
813 815 self._db, pathid, node, {}, {-1: None}, zstddctx=self._dctx
814 816 )
815 817
816 818 deltahash = hashutil.sha1(fulltext).digest()
817 819
818 820 if self._compengine == b'zstd':
819 821 deltablob = self._cctx.compress(fulltext)
820 822 compression = COMPRESSION_ZSTD
821 823 elif self._compengine == b'zlib':
822 824 deltablob = zlib.compress(fulltext)
823 825 compression = COMPRESSION_ZLIB
824 826 elif self._compengine == b'none':
825 827 deltablob = fulltext
826 828 compression = COMPRESSION_NONE
827 829 else:
828 830 raise error.ProgrammingError(
829 831 b'unhandled compression engine: %s' % self._compengine
830 832 )
831 833
832 834 if len(deltablob) >= len(fulltext):
833 835 deltablob = fulltext
834 836 compression = COMPRESSION_NONE
835 837
836 838 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
837 839
838 840 self._db.execute(
839 841 'UPDATE fileindex SET deltaid=?, deltabaseid=NULL '
840 842 'WHERE id=?',
841 843 (deltaid, rid),
842 844 )
843 845
844 846 # Now create the tombstone delta and replace the delta on the censored
845 847 # node.
846 848 deltahash = hashutil.sha1(tombstone).digest()
847 849 tombstonedeltaid = insertdelta(
848 850 self._db, COMPRESSION_NONE, deltahash, tombstone
849 851 )
850 852
851 853 flags = self._revisions[censornode].flags
852 854 flags |= FLAG_CENSORED
853 855
854 856 self._db.execute(
855 857 'UPDATE fileindex SET flags=?, deltaid=?, deltabaseid=NULL '
856 858 'WHERE pathid=? AND node=?',
857 859 (flags, tombstonedeltaid, self._pathid, censornode),
858 860 )
859 861
860 862 self._db.execute('DELETE FROM delta WHERE id=?', (censoreddeltaid,))
861 863
862 864 self._refreshindex()
863 865 self._revisioncache.clear()
864 866
865 867 def getstrippoint(self, minlink):
866 868 return storageutil.resolvestripinfo(
867 869 minlink,
868 870 len(self) - 1,
869 871 [self.rev(n) for n in self.heads()],
870 872 self.linkrev,
871 873 self.parentrevs,
872 874 )
873 875
874 876 def strip(self, minlink, transaction):
875 877 if not len(self):
876 878 return
877 879
878 880 rev, _ignored = self.getstrippoint(minlink)
879 881
880 882 if rev == len(self):
881 883 return
882 884
883 885 for rev in self.revs(rev):
884 886 self._db.execute(
885 887 'DELETE FROM fileindex WHERE pathid=? AND node=?',
886 888 (self._pathid, self.node(rev)),
887 889 )
888 890
889 891 # TODO how should we garbage collect data in delta table?
890 892
891 893 self._refreshindex()
892 894
893 895 # End of ifilemutation interface.
894 896
895 897 # Start of ifilestorage interface.
896 898
897 899 def files(self):
898 900 return []
899 901
900 902 def storageinfo(
901 903 self,
902 904 exclusivefiles=False,
903 905 sharedfiles=False,
904 906 revisionscount=False,
905 907 trackedsize=False,
906 908 storedsize=False,
907 909 ):
908 910 d = {}
909 911
910 912 if exclusivefiles:
911 913 d[b'exclusivefiles'] = []
912 914
913 915 if sharedfiles:
914 916 # TODO list sqlite file(s) here.
915 917 d[b'sharedfiles'] = []
916 918
917 919 if revisionscount:
918 920 d[b'revisionscount'] = len(self)
919 921
920 922 if trackedsize:
921 923 d[b'trackedsize'] = sum(
922 924 len(self.revision(node)) for node in self._nodetorev
923 925 )
924 926
925 927 if storedsize:
926 928 # TODO implement this?
927 929 d[b'storedsize'] = None
928 930
929 931 return d
930 932
931 933 def verifyintegrity(self, state):
932 934 state[b'skipread'] = set()
933 935
934 936 for rev in self:
935 937 node = self.node(rev)
936 938
937 939 try:
938 940 self.revision(node)
939 941 except Exception as e:
940 942 yield sqliteproblem(
941 943 error=_(b'unpacking %s: %s') % (short(node), e), node=node
942 944 )
943 945
944 946 state[b'skipread'].add(node)
945 947
946 948 # End of ifilestorage interface.
947 949
948 950 def _checkhash(self, fulltext, node, p1=None, p2=None):
949 951 if p1 is None and p2 is None:
950 952 p1, p2 = self.parents(node)
951 953
952 954 if node == storageutil.hashrevisionsha1(fulltext, p1, p2):
953 955 return
954 956
955 957 try:
956 958 del self._revisioncache[node]
957 959 except KeyError:
958 960 pass
959 961
960 962 if storageutil.iscensoredtext(fulltext):
961 963 raise error.CensoredNodeError(self._path, node, fulltext)
962 964
963 965 raise SQLiteStoreError(_(b'integrity check failed on %s') % self._path)
964 966
965 967 def _addrawrevision(
966 968 self,
967 969 node,
968 970 revisiondata,
969 971 transaction,
970 972 linkrev,
971 973 p1,
972 974 p2,
973 975 storedelta=None,
974 976 flags=0,
975 977 ):
976 978 if self._pathid is None:
977 979 res = self._db.execute(
978 980 'INSERT INTO filepath (path) VALUES (?)', (self._path,)
979 981 )
980 982 self._pathid = res.lastrowid
981 983
982 984 # For simplicity, always store a delta against p1.
983 985 # TODO we need a lot more logic here to make behavior reasonable.
984 986
985 987 if storedelta:
986 988 deltabase, delta = storedelta
987 989
988 990 if isinstance(deltabase, int):
989 991 deltabase = self.node(deltabase)
990 992
991 993 else:
992 994 assert revisiondata is not None
993 995 deltabase = p1
994 996
995 997 if deltabase == nullid:
996 998 delta = revisiondata
997 999 else:
998 1000 delta = mdiff.textdiff(
999 1001 self.revision(self.rev(deltabase)), revisiondata
1000 1002 )
1001 1003
1002 1004 # File index stores a pointer to its delta and the parent delta.
1003 1005 # The parent delta is stored via a pointer to the fileindex PK.
1004 1006 if deltabase == nullid:
1005 1007 baseid = None
1006 1008 else:
1007 1009 baseid = self._revisions[deltabase].rid
1008 1010
1009 1011 # Deltas are stored with a hash of their content. This allows
1010 1012 # us to de-duplicate. The table is configured to ignore conflicts
1011 1013 # and it is faster to just insert and silently noop than to look
1012 1014 # first.
1013 1015 deltahash = hashutil.sha1(delta).digest()
1014 1016
1015 1017 if self._compengine == b'zstd':
1016 1018 deltablob = self._cctx.compress(delta)
1017 1019 compression = COMPRESSION_ZSTD
1018 1020 elif self._compengine == b'zlib':
1019 1021 deltablob = zlib.compress(delta)
1020 1022 compression = COMPRESSION_ZLIB
1021 1023 elif self._compengine == b'none':
1022 1024 deltablob = delta
1023 1025 compression = COMPRESSION_NONE
1024 1026 else:
1025 1027 raise error.ProgrammingError(
1026 1028 b'unhandled compression engine: %s' % self._compengine
1027 1029 )
1028 1030
1029 1031 # Don't store compressed data if it isn't practical.
1030 1032 if len(deltablob) >= len(delta):
1031 1033 deltablob = delta
1032 1034 compression = COMPRESSION_NONE
1033 1035
1034 1036 deltaid = insertdelta(self._db, compression, deltahash, deltablob)
1035 1037
1036 1038 rev = len(self)
1037 1039
1038 1040 if p1 == nullid:
1039 1041 p1rev = nullrev
1040 1042 else:
1041 1043 p1rev = self._nodetorev[p1]
1042 1044
1043 1045 if p2 == nullid:
1044 1046 p2rev = nullrev
1045 1047 else:
1046 1048 p2rev = self._nodetorev[p2]
1047 1049
1048 1050 rid = self._db.execute(
1049 1051 'INSERT INTO fileindex ('
1050 1052 ' pathid, revnum, node, p1rev, p2rev, linkrev, flags, '
1051 1053 ' deltaid, deltabaseid) '
1052 1054 ' VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)',
1053 1055 (
1054 1056 self._pathid,
1055 1057 rev,
1056 1058 node,
1057 1059 p1rev,
1058 1060 p2rev,
1059 1061 linkrev,
1060 1062 flags,
1061 1063 deltaid,
1062 1064 baseid,
1063 1065 ),
1064 1066 ).lastrowid
1065 1067
1066 1068 entry = revisionentry(
1067 1069 rid=rid,
1068 1070 rev=rev,
1069 1071 node=node,
1070 1072 p1rev=p1rev,
1071 1073 p2rev=p2rev,
1072 1074 p1node=p1,
1073 1075 p2node=p2,
1074 1076 linkrev=linkrev,
1075 1077 flags=flags,
1076 1078 )
1077 1079
1078 1080 self._nodetorev[node] = rev
1079 1081 self._revtonode[rev] = node
1080 1082 self._revisions[node] = entry
1081 1083
1082 return node
1084 return rev
1083 1085
1084 1086
1085 1087 class sqliterepository(localrepo.localrepository):
1086 1088 def cancopy(self):
1087 1089 return False
1088 1090
1089 1091 def transaction(self, *args, **kwargs):
1090 1092 current = self.currenttransaction()
1091 1093
1092 1094 tr = super(sqliterepository, self).transaction(*args, **kwargs)
1093 1095
1094 1096 if current:
1095 1097 return tr
1096 1098
1097 1099 self._dbconn.execute('BEGIN TRANSACTION')
1098 1100
1099 1101 def committransaction(_):
1100 1102 self._dbconn.commit()
1101 1103
1102 1104 tr.addfinalize(b'sqlitestore', committransaction)
1103 1105
1104 1106 return tr
1105 1107
1106 1108 @property
1107 1109 def _dbconn(self):
1108 1110 # SQLite connections can only be used on the thread that created
1109 1111 # them. In most cases, this "just works." However, hgweb uses
1110 1112 # multiple threads.
1111 1113 tid = threading.current_thread().ident
1112 1114
1113 1115 if self._db:
1114 1116 if self._db[0] == tid:
1115 1117 return self._db[1]
1116 1118
1117 1119 db = makedb(self.svfs.join(b'db.sqlite'))
1118 1120 self._db = (tid, db)
1119 1121
1120 1122 return db
1121 1123
1122 1124
1123 1125 def makedb(path):
1124 1126 """Construct a database handle for a database at path."""
1125 1127
1126 1128 db = sqlite3.connect(encoding.strfromlocal(path))
1127 1129 db.text_factory = bytes
1128 1130
1129 1131 res = db.execute('PRAGMA user_version').fetchone()[0]
1130 1132
1131 1133 # New database.
1132 1134 if res == 0:
1133 1135 for statement in CREATE_SCHEMA:
1134 1136 db.execute(statement)
1135 1137
1136 1138 db.commit()
1137 1139
1138 1140 elif res == CURRENT_SCHEMA_VERSION:
1139 1141 pass
1140 1142
1141 1143 else:
1142 1144 raise error.Abort(_(b'sqlite database has unrecognized version'))
1143 1145
1144 1146 db.execute('PRAGMA journal_mode=WAL')
1145 1147
1146 1148 return db
1147 1149
1148 1150
1149 1151 def featuresetup(ui, supported):
1150 1152 supported.add(REQUIREMENT)
1151 1153
1152 1154 if zstd:
1153 1155 supported.add(REQUIREMENT_ZSTD)
1154 1156
1155 1157 supported.add(REQUIREMENT_ZLIB)
1156 1158 supported.add(REQUIREMENT_NONE)
1157 1159 supported.add(REQUIREMENT_SHALLOW_FILES)
1158 1160 supported.add(requirements.NARROW_REQUIREMENT)
1159 1161
1160 1162
1161 1163 def newreporequirements(orig, ui, createopts):
1162 1164 if createopts[b'backend'] != b'sqlite':
1163 1165 return orig(ui, createopts)
1164 1166
1165 1167 # This restriction can be lifted once we have more confidence.
1166 1168 if b'sharedrepo' in createopts:
1167 1169 raise error.Abort(
1168 1170 _(b'shared repositories not supported with SQLite store')
1169 1171 )
1170 1172
1171 1173 # This filtering is out of an abundance of caution: we want to ensure
1172 1174 # we honor creation options and we do that by annotating exactly the
1173 1175 # creation options we recognize.
1174 1176 known = {
1175 1177 b'narrowfiles',
1176 1178 b'backend',
1177 1179 b'shallowfilestore',
1178 1180 }
1179 1181
1180 1182 unsupported = set(createopts) - known
1181 1183 if unsupported:
1182 1184 raise error.Abort(
1183 1185 _(b'SQLite store does not support repo creation option: %s')
1184 1186 % b', '.join(sorted(unsupported))
1185 1187 )
1186 1188
1187 1189 # Since we're a hybrid store that still relies on revlogs, we fall back
1188 1190 # to using the revlogv1 backend's storage requirements then adding our
1189 1191 # own requirement.
1190 1192 createopts[b'backend'] = b'revlogv1'
1191 1193 requirements = orig(ui, createopts)
1192 1194 requirements.add(REQUIREMENT)
1193 1195
1194 1196 compression = ui.config(b'storage', b'sqlite.compression')
1195 1197
1196 1198 if compression == b'zstd' and not zstd:
1197 1199 raise error.Abort(
1198 1200 _(
1199 1201 b'storage.sqlite.compression set to "zstd" but '
1200 1202 b'zstandard compression not available to this '
1201 1203 b'Mercurial install'
1202 1204 )
1203 1205 )
1204 1206
1205 1207 if compression == b'zstd':
1206 1208 requirements.add(REQUIREMENT_ZSTD)
1207 1209 elif compression == b'zlib':
1208 1210 requirements.add(REQUIREMENT_ZLIB)
1209 1211 elif compression == b'none':
1210 1212 requirements.add(REQUIREMENT_NONE)
1211 1213 else:
1212 1214 raise error.Abort(
1213 1215 _(
1214 1216 b'unknown compression engine defined in '
1215 1217 b'storage.sqlite.compression: %s'
1216 1218 )
1217 1219 % compression
1218 1220 )
1219 1221
1220 1222 if createopts.get(b'shallowfilestore'):
1221 1223 requirements.add(REQUIREMENT_SHALLOW_FILES)
1222 1224
1223 1225 return requirements
1224 1226
1225 1227
1226 1228 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1227 1229 class sqlitefilestorage(object):
1228 1230 """Repository file storage backed by SQLite."""
1229 1231
1230 1232 def file(self, path):
1231 1233 if path[0] == b'/':
1232 1234 path = path[1:]
1233 1235
1234 1236 if REQUIREMENT_ZSTD in self.requirements:
1235 1237 compression = b'zstd'
1236 1238 elif REQUIREMENT_ZLIB in self.requirements:
1237 1239 compression = b'zlib'
1238 1240 elif REQUIREMENT_NONE in self.requirements:
1239 1241 compression = b'none'
1240 1242 else:
1241 1243 raise error.Abort(
1242 1244 _(
1243 1245 b'unable to determine what compression engine '
1244 1246 b'to use for SQLite storage'
1245 1247 )
1246 1248 )
1247 1249
1248 1250 return sqlitefilestore(self._dbconn, path, compression)
1249 1251
1250 1252
1251 1253 def makefilestorage(orig, requirements, features, **kwargs):
1252 1254 """Produce a type conforming to ``ilocalrepositoryfilestorage``."""
1253 1255 if REQUIREMENT in requirements:
1254 1256 if REQUIREMENT_SHALLOW_FILES in requirements:
1255 1257 features.add(repository.REPO_FEATURE_SHALLOW_FILE_STORAGE)
1256 1258
1257 1259 return sqlitefilestorage
1258 1260 else:
1259 1261 return orig(requirements=requirements, features=features, **kwargs)
1260 1262
1261 1263
1262 1264 def makemain(orig, ui, requirements, **kwargs):
1263 1265 if REQUIREMENT in requirements:
1264 1266 if REQUIREMENT_ZSTD in requirements and not zstd:
1265 1267 raise error.Abort(
1266 1268 _(
1267 1269 b'repository uses zstandard compression, which '
1268 1270 b'is not available to this Mercurial install'
1269 1271 )
1270 1272 )
1271 1273
1272 1274 return sqliterepository
1273 1275
1274 1276 return orig(requirements=requirements, **kwargs)
1275 1277
1276 1278
1277 1279 def verifierinit(orig, self, *args, **kwargs):
1278 1280 orig(self, *args, **kwargs)
1279 1281
1280 1282 # We don't care that files in the store don't align with what is
1281 1283 # advertised. So suppress these warnings.
1282 1284 self.warnorphanstorefiles = False
1283 1285
1284 1286
1285 1287 def extsetup(ui):
1286 1288 localrepo.featuresetupfuncs.add(featuresetup)
1287 1289 extensions.wrapfunction(
1288 1290 localrepo, b'newreporequirements', newreporequirements
1289 1291 )
1290 1292 extensions.wrapfunction(localrepo, b'makefilestorage', makefilestorage)
1291 1293 extensions.wrapfunction(localrepo, b'makemain', makemain)
1292 1294 extensions.wrapfunction(verify.verifier, b'__init__', verifierinit)
1293 1295
1294 1296
1295 1297 def reposetup(ui, repo):
1296 1298 if isinstance(repo, sqliterepository):
1297 1299 repo._db = None
1298 1300
1299 1301 # TODO check for bundlerepository?
@@ -1,617 +1,618 b''
1 1 # changelog.py - changelog class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullid,
15 15 )
16 16 from .thirdparty import attr
17 17
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 metadata,
22 22 pycompat,
23 23 revlog,
24 24 )
25 25 from .utils import (
26 26 dateutil,
27 27 stringutil,
28 28 )
29 29 from .revlogutils import flagutil
30 30
31 31 _defaultextra = {b'branch': b'default'}
32 32
33 33
34 34 def _string_escape(text):
35 35 """
36 36 >>> from .pycompat import bytechr as chr
37 37 >>> d = {b'nl': chr(10), b'bs': chr(92), b'cr': chr(13), b'nul': chr(0)}
38 38 >>> s = b"ab%(nl)scd%(bs)s%(bs)sn%(nul)s12ab%(cr)scd%(bs)s%(nl)s" % d
39 39 >>> s
40 40 'ab\\ncd\\\\\\\\n\\x0012ab\\rcd\\\\\\n'
41 41 >>> res = _string_escape(s)
42 42 >>> s == _string_unescape(res)
43 43 True
44 44 """
45 45 # subset of the string_escape codec
46 46 text = (
47 47 text.replace(b'\\', b'\\\\')
48 48 .replace(b'\n', b'\\n')
49 49 .replace(b'\r', b'\\r')
50 50 )
51 51 return text.replace(b'\0', b'\\0')
52 52
53 53
54 54 def _string_unescape(text):
55 55 if b'\\0' in text:
56 56 # fix up \0 without getting into trouble with \\0
57 57 text = text.replace(b'\\\\', b'\\\\\n')
58 58 text = text.replace(b'\\0', b'\0')
59 59 text = text.replace(b'\n', b'')
60 60 return stringutil.unescapestr(text)
61 61
62 62
63 63 def decodeextra(text):
64 64 """
65 65 >>> from .pycompat import bytechr as chr
66 66 >>> sorted(decodeextra(encodeextra({b'foo': b'bar', b'baz': chr(0) + b'2'})
67 67 ... ).items())
68 68 [('baz', '\\x002'), ('branch', 'default'), ('foo', 'bar')]
69 69 >>> sorted(decodeextra(encodeextra({b'foo': b'bar',
70 70 ... b'baz': chr(92) + chr(0) + b'2'})
71 71 ... ).items())
72 72 [('baz', '\\\\\\x002'), ('branch', 'default'), ('foo', 'bar')]
73 73 """
74 74 extra = _defaultextra.copy()
75 75 for l in text.split(b'\0'):
76 76 if l:
77 77 k, v = _string_unescape(l).split(b':', 1)
78 78 extra[k] = v
79 79 return extra
80 80
81 81
82 82 def encodeextra(d):
83 83 # keys must be sorted to produce a deterministic changelog entry
84 84 items = [_string_escape(b'%s:%s' % (k, d[k])) for k in sorted(d)]
85 85 return b"\0".join(items)
86 86
87 87
88 88 def stripdesc(desc):
89 89 """strip trailing whitespace and leading and trailing empty lines"""
90 90 return b'\n'.join([l.rstrip() for l in desc.splitlines()]).strip(b'\n')
91 91
92 92
93 93 class appender(object):
94 94 """the changelog index must be updated last on disk, so we use this class
95 95 to delay writes to it"""
96 96
97 97 def __init__(self, vfs, name, mode, buf):
98 98 self.data = buf
99 99 fp = vfs(name, mode)
100 100 self.fp = fp
101 101 self.offset = fp.tell()
102 102 self.size = vfs.fstat(fp).st_size
103 103 self._end = self.size
104 104
105 105 def end(self):
106 106 return self._end
107 107
108 108 def tell(self):
109 109 return self.offset
110 110
111 111 def flush(self):
112 112 pass
113 113
114 114 @property
115 115 def closed(self):
116 116 return self.fp.closed
117 117
118 118 def close(self):
119 119 self.fp.close()
120 120
121 121 def seek(self, offset, whence=0):
122 122 '''virtual file offset spans real file and data'''
123 123 if whence == 0:
124 124 self.offset = offset
125 125 elif whence == 1:
126 126 self.offset += offset
127 127 elif whence == 2:
128 128 self.offset = self.end() + offset
129 129 if self.offset < self.size:
130 130 self.fp.seek(self.offset)
131 131
132 132 def read(self, count=-1):
133 133 '''only trick here is reads that span real file and data'''
134 134 ret = b""
135 135 if self.offset < self.size:
136 136 s = self.fp.read(count)
137 137 ret = s
138 138 self.offset += len(s)
139 139 if count > 0:
140 140 count -= len(s)
141 141 if count != 0:
142 142 doff = self.offset - self.size
143 143 self.data.insert(0, b"".join(self.data))
144 144 del self.data[1:]
145 145 s = self.data[0][doff : doff + count]
146 146 self.offset += len(s)
147 147 ret += s
148 148 return ret
149 149
150 150 def write(self, s):
151 151 self.data.append(bytes(s))
152 152 self.offset += len(s)
153 153 self._end += len(s)
154 154
155 155 def __enter__(self):
156 156 self.fp.__enter__()
157 157 return self
158 158
159 159 def __exit__(self, *args):
160 160 return self.fp.__exit__(*args)
161 161
162 162
163 163 class _divertopener(object):
164 164 def __init__(self, opener, target):
165 165 self._opener = opener
166 166 self._target = target
167 167
168 168 def __call__(self, name, mode=b'r', checkambig=False, **kwargs):
169 169 if name != self._target:
170 170 return self._opener(name, mode, **kwargs)
171 171 return self._opener(name + b".a", mode, **kwargs)
172 172
173 173 def __getattr__(self, attr):
174 174 return getattr(self._opener, attr)
175 175
176 176
177 177 def _delayopener(opener, target, buf):
178 178 """build an opener that stores chunks in 'buf' instead of 'target'"""
179 179
180 180 def _delay(name, mode=b'r', checkambig=False, **kwargs):
181 181 if name != target:
182 182 return opener(name, mode, **kwargs)
183 183 assert not kwargs
184 184 return appender(opener, name, mode, buf)
185 185
186 186 return _delay
187 187
188 188
189 189 @attr.s
190 190 class _changelogrevision(object):
191 191 # Extensions might modify _defaultextra, so let the constructor below pass
192 192 # it in
193 193 extra = attr.ib()
194 194 manifest = attr.ib(default=nullid)
195 195 user = attr.ib(default=b'')
196 196 date = attr.ib(default=(0, 0))
197 197 files = attr.ib(default=attr.Factory(list))
198 198 filesadded = attr.ib(default=None)
199 199 filesremoved = attr.ib(default=None)
200 200 p1copies = attr.ib(default=None)
201 201 p2copies = attr.ib(default=None)
202 202 description = attr.ib(default=b'')
203 203 branchinfo = attr.ib(default=(_defaultextra[b'branch'], False))
204 204
205 205
206 206 class changelogrevision(object):
207 207 """Holds results of a parsed changelog revision.
208 208
209 209 Changelog revisions consist of multiple pieces of data, including
210 210 the manifest node, user, and date. This object exposes a view into
211 211 the parsed object.
212 212 """
213 213
214 214 __slots__ = (
215 215 '_offsets',
216 216 '_text',
217 217 '_sidedata',
218 218 '_cpsd',
219 219 '_changes',
220 220 )
221 221
222 222 def __new__(cls, text, sidedata, cpsd):
223 223 if not text:
224 224 return _changelogrevision(extra=_defaultextra)
225 225
226 226 self = super(changelogrevision, cls).__new__(cls)
227 227 # We could return here and implement the following as an __init__.
228 228 # But doing it here is equivalent and saves an extra function call.
229 229
230 230 # format used:
231 231 # nodeid\n : manifest node in ascii
232 232 # user\n : user, no \n or \r allowed
233 233 # time tz extra\n : date (time is int or float, timezone is int)
234 234 # : extra is metadata, encoded and separated by '\0'
235 235 # : older versions ignore it
236 236 # files\n\n : files modified by the cset, no \n or \r allowed
237 237 # (.*) : comment (free text, ideally utf-8)
238 238 #
239 239 # changelog v0 doesn't use extra
240 240
241 241 nl1 = text.index(b'\n')
242 242 nl2 = text.index(b'\n', nl1 + 1)
243 243 nl3 = text.index(b'\n', nl2 + 1)
244 244
245 245 # The list of files may be empty. Which means nl3 is the first of the
246 246 # double newline that precedes the description.
247 247 if text[nl3 + 1 : nl3 + 2] == b'\n':
248 248 doublenl = nl3
249 249 else:
250 250 doublenl = text.index(b'\n\n', nl3 + 1)
251 251
252 252 self._offsets = (nl1, nl2, nl3, doublenl)
253 253 self._text = text
254 254 self._sidedata = sidedata
255 255 self._cpsd = cpsd
256 256 self._changes = None
257 257
258 258 return self
259 259
260 260 @property
261 261 def manifest(self):
262 262 return bin(self._text[0 : self._offsets[0]])
263 263
264 264 @property
265 265 def user(self):
266 266 off = self._offsets
267 267 return encoding.tolocal(self._text[off[0] + 1 : off[1]])
268 268
269 269 @property
270 270 def _rawdate(self):
271 271 off = self._offsets
272 272 dateextra = self._text[off[1] + 1 : off[2]]
273 273 return dateextra.split(b' ', 2)[0:2]
274 274
275 275 @property
276 276 def _rawextra(self):
277 277 off = self._offsets
278 278 dateextra = self._text[off[1] + 1 : off[2]]
279 279 fields = dateextra.split(b' ', 2)
280 280 if len(fields) != 3:
281 281 return None
282 282
283 283 return fields[2]
284 284
285 285 @property
286 286 def date(self):
287 287 raw = self._rawdate
288 288 time = float(raw[0])
289 289 # Various tools did silly things with the timezone.
290 290 try:
291 291 timezone = int(raw[1])
292 292 except ValueError:
293 293 timezone = 0
294 294
295 295 return time, timezone
296 296
297 297 @property
298 298 def extra(self):
299 299 raw = self._rawextra
300 300 if raw is None:
301 301 return _defaultextra
302 302
303 303 return decodeextra(raw)
304 304
305 305 @property
306 306 def changes(self):
307 307 if self._changes is not None:
308 308 return self._changes
309 309 if self._cpsd:
310 310 changes = metadata.decode_files_sidedata(self._sidedata)
311 311 else:
312 312 changes = metadata.ChangingFiles(
313 313 touched=self.files or (),
314 314 added=self.filesadded or (),
315 315 removed=self.filesremoved or (),
316 316 p1_copies=self.p1copies or {},
317 317 p2_copies=self.p2copies or {},
318 318 )
319 319 self._changes = changes
320 320 return changes
321 321
322 322 @property
323 323 def files(self):
324 324 if self._cpsd:
325 325 return sorted(self.changes.touched)
326 326 off = self._offsets
327 327 if off[2] == off[3]:
328 328 return []
329 329
330 330 return self._text[off[2] + 1 : off[3]].split(b'\n')
331 331
332 332 @property
333 333 def filesadded(self):
334 334 if self._cpsd:
335 335 return self.changes.added
336 336 else:
337 337 rawindices = self.extra.get(b'filesadded')
338 338 if rawindices is None:
339 339 return None
340 340 return metadata.decodefileindices(self.files, rawindices)
341 341
342 342 @property
343 343 def filesremoved(self):
344 344 if self._cpsd:
345 345 return self.changes.removed
346 346 else:
347 347 rawindices = self.extra.get(b'filesremoved')
348 348 if rawindices is None:
349 349 return None
350 350 return metadata.decodefileindices(self.files, rawindices)
351 351
352 352 @property
353 353 def p1copies(self):
354 354 if self._cpsd:
355 355 return self.changes.copied_from_p1
356 356 else:
357 357 rawcopies = self.extra.get(b'p1copies')
358 358 if rawcopies is None:
359 359 return None
360 360 return metadata.decodecopies(self.files, rawcopies)
361 361
362 362 @property
363 363 def p2copies(self):
364 364 if self._cpsd:
365 365 return self.changes.copied_from_p2
366 366 else:
367 367 rawcopies = self.extra.get(b'p2copies')
368 368 if rawcopies is None:
369 369 return None
370 370 return metadata.decodecopies(self.files, rawcopies)
371 371
372 372 @property
373 373 def description(self):
374 374 return encoding.tolocal(self._text[self._offsets[3] + 2 :])
375 375
376 376 @property
377 377 def branchinfo(self):
378 378 extra = self.extra
379 379 return encoding.tolocal(extra.get(b"branch")), b'close' in extra
380 380
381 381
382 382 class changelog(revlog.revlog):
383 383 def __init__(self, opener, trypending=False):
384 384 """Load a changelog revlog using an opener.
385 385
386 386 If ``trypending`` is true, we attempt to load the index from a
387 387 ``00changelog.i.a`` file instead of the default ``00changelog.i``.
388 388 The ``00changelog.i.a`` file contains index (and possibly inline
389 389 revision) data for a transaction that hasn't been finalized yet.
390 390 It exists in a separate file to facilitate readers (such as
391 391 hooks processes) accessing data before a transaction is finalized.
392 392 """
393 393 if trypending and opener.exists(b'00changelog.i.a'):
394 394 indexfile = b'00changelog.i.a'
395 395 else:
396 396 indexfile = b'00changelog.i'
397 397
398 398 datafile = b'00changelog.d'
399 399 revlog.revlog.__init__(
400 400 self,
401 401 opener,
402 402 indexfile,
403 403 datafile=datafile,
404 404 checkambig=True,
405 405 mmaplargeindex=True,
406 406 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
407 407 )
408 408
409 409 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
410 410 # changelogs don't benefit from generaldelta.
411 411
412 412 self.version &= ~revlog.FLAG_GENERALDELTA
413 413 self._generaldelta = False
414 414
415 415 # Delta chains for changelogs tend to be very small because entries
416 416 # tend to be small and don't delta well with each. So disable delta
417 417 # chains.
418 418 self._storedeltachains = False
419 419
420 420 self._realopener = opener
421 421 self._delayed = False
422 422 self._delaybuf = None
423 423 self._divert = False
424 424 self._filteredrevs = frozenset()
425 425 self._filteredrevs_hashcache = {}
426 426 self._copiesstorage = opener.options.get(b'copies-storage')
427 427
428 428 @property
429 429 def filteredrevs(self):
430 430 return self._filteredrevs
431 431
432 432 @filteredrevs.setter
433 433 def filteredrevs(self, val):
434 434 # Ensure all updates go through this function
435 435 assert isinstance(val, frozenset)
436 436 self._filteredrevs = val
437 437 self._filteredrevs_hashcache = {}
438 438
439 439 def delayupdate(self, tr):
440 440 """delay visibility of index updates to other readers"""
441 441
442 442 if not self._delayed:
443 443 if len(self) == 0:
444 444 self._divert = True
445 445 if self._realopener.exists(self.indexfile + b'.a'):
446 446 self._realopener.unlink(self.indexfile + b'.a')
447 447 self.opener = _divertopener(self._realopener, self.indexfile)
448 448 else:
449 449 self._delaybuf = []
450 450 self.opener = _delayopener(
451 451 self._realopener, self.indexfile, self._delaybuf
452 452 )
453 453 self._delayed = True
454 454 tr.addpending(b'cl-%i' % id(self), self._writepending)
455 455 tr.addfinalize(b'cl-%i' % id(self), self._finalize)
456 456
457 457 def _finalize(self, tr):
458 458 """finalize index updates"""
459 459 self._delayed = False
460 460 self.opener = self._realopener
461 461 # move redirected index data back into place
462 462 if self._divert:
463 463 assert not self._delaybuf
464 464 tmpname = self.indexfile + b".a"
465 465 nfile = self.opener.open(tmpname)
466 466 nfile.close()
467 467 self.opener.rename(tmpname, self.indexfile, checkambig=True)
468 468 elif self._delaybuf:
469 469 fp = self.opener(self.indexfile, b'a', checkambig=True)
470 470 fp.write(b"".join(self._delaybuf))
471 471 fp.close()
472 472 self._delaybuf = None
473 473 self._divert = False
474 474 # split when we're done
475 475 self._enforceinlinesize(tr)
476 476
477 477 def _writepending(self, tr):
478 478 """create a file containing the unfinalized state for
479 479 pretxnchangegroup"""
480 480 if self._delaybuf:
481 481 # make a temporary copy of the index
482 482 fp1 = self._realopener(self.indexfile)
483 483 pendingfilename = self.indexfile + b".a"
484 484 # register as a temp file to ensure cleanup on failure
485 485 tr.registertmp(pendingfilename)
486 486 # write existing data
487 487 fp2 = self._realopener(pendingfilename, b"w")
488 488 fp2.write(fp1.read())
489 489 # add pending data
490 490 fp2.write(b"".join(self._delaybuf))
491 491 fp2.close()
492 492 # switch modes so finalize can simply rename
493 493 self._delaybuf = None
494 494 self._divert = True
495 495 self.opener = _divertopener(self._realopener, self.indexfile)
496 496
497 497 if self._divert:
498 498 return True
499 499
500 500 return False
501 501
502 502 def _enforceinlinesize(self, tr, fp=None):
503 503 if not self._delayed:
504 504 revlog.revlog._enforceinlinesize(self, tr, fp)
505 505
506 506 def read(self, node):
507 507 """Obtain data from a parsed changelog revision.
508 508
509 509 Returns a 6-tuple of:
510 510
511 511 - manifest node in binary
512 512 - author/user as a localstr
513 513 - date as a 2-tuple of (time, timezone)
514 514 - list of files
515 515 - commit message as a localstr
516 516 - dict of extra metadata
517 517
518 518 Unless you need to access all fields, consider calling
519 519 ``changelogrevision`` instead, as it is faster for partial object
520 520 access.
521 521 """
522 522 d, s = self._revisiondata(node)
523 523 c = changelogrevision(
524 524 d, s, self._copiesstorage == b'changeset-sidedata'
525 525 )
526 526 return (c.manifest, c.user, c.date, c.files, c.description, c.extra)
527 527
528 528 def changelogrevision(self, nodeorrev):
529 529 """Obtain a ``changelogrevision`` for a node or revision."""
530 530 text, sidedata = self._revisiondata(nodeorrev)
531 531 return changelogrevision(
532 532 text, sidedata, self._copiesstorage == b'changeset-sidedata'
533 533 )
534 534
535 535 def readfiles(self, node):
536 536 """
537 537 short version of read that only returns the files modified by the cset
538 538 """
539 539 text = self.revision(node)
540 540 if not text:
541 541 return []
542 542 last = text.index(b"\n\n")
543 543 l = text[:last].split(b'\n')
544 544 return l[3:]
545 545
546 546 def add(
547 547 self,
548 548 manifest,
549 549 files,
550 550 desc,
551 551 transaction,
552 552 p1,
553 553 p2,
554 554 user,
555 555 date=None,
556 556 extra=None,
557 557 ):
558 558 # Convert to UTF-8 encoded bytestrings as the very first
559 559 # thing: calling any method on a localstr object will turn it
560 560 # into a str object and the cached UTF-8 string is thus lost.
561 561 user, desc = encoding.fromlocal(user), encoding.fromlocal(desc)
562 562
563 563 user = user.strip()
564 564 # An empty username or a username with a "\n" will make the
565 565 # revision text contain two "\n\n" sequences -> corrupt
566 566 # repository since read cannot unpack the revision.
567 567 if not user:
568 568 raise error.StorageError(_(b"empty username"))
569 569 if b"\n" in user:
570 570 raise error.StorageError(
571 571 _(b"username %r contains a newline") % pycompat.bytestr(user)
572 572 )
573 573
574 574 desc = stripdesc(desc)
575 575
576 576 if date:
577 577 parseddate = b"%d %d" % dateutil.parsedate(date)
578 578 else:
579 579 parseddate = b"%d %d" % dateutil.makedate()
580 580 if extra:
581 581 branch = extra.get(b"branch")
582 582 if branch in (b"default", b""):
583 583 del extra[b"branch"]
584 584 elif branch in (b".", b"null", b"tip"):
585 585 raise error.StorageError(
586 586 _(b'the name \'%s\' is reserved') % branch
587 587 )
588 588 sortedfiles = sorted(files.touched)
589 589 flags = 0
590 590 sidedata = None
591 591 if self._copiesstorage == b'changeset-sidedata':
592 592 if files.has_copies_info:
593 593 flags |= flagutil.REVIDX_HASCOPIESINFO
594 594 sidedata = metadata.encode_files_sidedata(files)
595 595
596 596 if extra:
597 597 extra = encodeextra(extra)
598 598 parseddate = b"%s %s" % (parseddate, extra)
599 599 l = [hex(manifest), user, parseddate] + sortedfiles + [b"", desc]
600 600 text = b"\n".join(l)
601 return self.addrevision(
601 rev = self.addrevision(
602 602 text, transaction, len(self), p1, p2, sidedata=sidedata, flags=flags
603 603 )
604 return self.node(rev)
604 605
605 606 def branchinfo(self, rev):
606 607 """return the branch name and open/close state of a revision
607 608
608 609 This function exists because creating a changectx object
609 610 just to access this is costly."""
610 611 return self.changelogrevision(rev).branchinfo
611 612
612 613 def _nodeduplicatecallback(self, transaction, node):
613 614 # keep track of revisions that got "re-added", eg: unbunde of know rev.
614 615 #
615 616 # We track them in a list to preserve their order from the source bundle
616 617 duplicates = transaction.changes.setdefault(b'revduplicates', [])
617 618 duplicates.append(self.rev(node))
@@ -1,292 +1,293 b''
1 1 # filelog.py - file history class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from .node import (
12 12 nullid,
13 13 nullrev,
14 14 )
15 15 from . import (
16 16 error,
17 17 revlog,
18 18 )
19 19 from .interfaces import (
20 20 repository,
21 21 util as interfaceutil,
22 22 )
23 23 from .utils import storageutil
24 24
25 25
26 26 @interfaceutil.implementer(repository.ifilestorage)
27 27 class filelog(object):
28 28 def __init__(self, opener, path):
29 29 self._revlog = revlog.revlog(
30 30 opener, b'/'.join((b'data', path + b'.i')), censorable=True
31 31 )
32 32 # Full name of the user visible file, relative to the repository root.
33 33 # Used by LFS.
34 34 self._revlog.filename = path
35 35
36 36 def __len__(self):
37 37 return len(self._revlog)
38 38
39 39 def __iter__(self):
40 40 return self._revlog.__iter__()
41 41
42 42 def hasnode(self, node):
43 43 if node in (nullid, nullrev):
44 44 return False
45 45
46 46 try:
47 47 self._revlog.rev(node)
48 48 return True
49 49 except (TypeError, ValueError, IndexError, error.LookupError):
50 50 return False
51 51
52 52 def revs(self, start=0, stop=None):
53 53 return self._revlog.revs(start=start, stop=stop)
54 54
55 55 def parents(self, node):
56 56 return self._revlog.parents(node)
57 57
58 58 def parentrevs(self, rev):
59 59 return self._revlog.parentrevs(rev)
60 60
61 61 def rev(self, node):
62 62 return self._revlog.rev(node)
63 63
64 64 def node(self, rev):
65 65 return self._revlog.node(rev)
66 66
67 67 def lookup(self, node):
68 68 return storageutil.fileidlookup(
69 69 self._revlog, node, self._revlog.indexfile
70 70 )
71 71
72 72 def linkrev(self, rev):
73 73 return self._revlog.linkrev(rev)
74 74
75 75 def commonancestorsheads(self, node1, node2):
76 76 return self._revlog.commonancestorsheads(node1, node2)
77 77
78 78 # Used by dagop.blockdescendants().
79 79 def descendants(self, revs):
80 80 return self._revlog.descendants(revs)
81 81
82 82 def heads(self, start=None, stop=None):
83 83 return self._revlog.heads(start, stop)
84 84
85 85 # Used by hgweb, children extension.
86 86 def children(self, node):
87 87 return self._revlog.children(node)
88 88
89 89 def iscensored(self, rev):
90 90 return self._revlog.iscensored(rev)
91 91
92 92 def revision(self, node, _df=None, raw=False):
93 93 return self._revlog.revision(node, _df=_df, raw=raw)
94 94
95 95 def rawdata(self, node, _df=None):
96 96 return self._revlog.rawdata(node, _df=_df)
97 97
98 98 def emitrevisions(
99 99 self,
100 100 nodes,
101 101 nodesorder=None,
102 102 revisiondata=False,
103 103 assumehaveparentrevisions=False,
104 104 deltamode=repository.CG_DELTAMODE_STD,
105 105 ):
106 106 return self._revlog.emitrevisions(
107 107 nodes,
108 108 nodesorder=nodesorder,
109 109 revisiondata=revisiondata,
110 110 assumehaveparentrevisions=assumehaveparentrevisions,
111 111 deltamode=deltamode,
112 112 )
113 113
114 114 def addrevision(
115 115 self,
116 116 revisiondata,
117 117 transaction,
118 118 linkrev,
119 119 p1,
120 120 p2,
121 121 node=None,
122 122 flags=revlog.REVIDX_DEFAULT_FLAGS,
123 123 cachedelta=None,
124 124 ):
125 125 return self._revlog.addrevision(
126 126 revisiondata,
127 127 transaction,
128 128 linkrev,
129 129 p1,
130 130 p2,
131 131 node=node,
132 132 flags=flags,
133 133 cachedelta=cachedelta,
134 134 )
135 135
136 136 def addgroup(
137 137 self,
138 138 deltas,
139 139 linkmapper,
140 140 transaction,
141 141 addrevisioncb=None,
142 142 duplicaterevisioncb=None,
143 143 maybemissingparents=False,
144 144 ):
145 145 if maybemissingparents:
146 146 raise error.Abort(
147 147 _(
148 148 b'revlog storage does not support missing '
149 149 b'parents write mode'
150 150 )
151 151 )
152 152
153 153 return self._revlog.addgroup(
154 154 deltas,
155 155 linkmapper,
156 156 transaction,
157 157 addrevisioncb=addrevisioncb,
158 158 duplicaterevisioncb=duplicaterevisioncb,
159 159 )
160 160
161 161 def getstrippoint(self, minlink):
162 162 return self._revlog.getstrippoint(minlink)
163 163
164 164 def strip(self, minlink, transaction):
165 165 return self._revlog.strip(minlink, transaction)
166 166
167 167 def censorrevision(self, tr, node, tombstone=b''):
168 168 return self._revlog.censorrevision(tr, node, tombstone=tombstone)
169 169
170 170 def files(self):
171 171 return self._revlog.files()
172 172
173 173 def read(self, node):
174 174 return storageutil.filtermetadata(self.revision(node))
175 175
176 176 def add(self, text, meta, transaction, link, p1=None, p2=None):
177 177 if meta or text.startswith(b'\1\n'):
178 178 text = storageutil.packmeta(meta, text)
179 return self.addrevision(text, transaction, link, p1, p2)
179 rev = self.addrevision(text, transaction, link, p1, p2)
180 return self.node(rev)
180 181
181 182 def renamed(self, node):
182 183 return storageutil.filerevisioncopied(self, node)
183 184
184 185 def size(self, rev):
185 186 """return the size of a given revision"""
186 187
187 188 # for revisions with renames, we have to go the slow way
188 189 node = self.node(rev)
189 190 if self.renamed(node):
190 191 return len(self.read(node))
191 192 if self.iscensored(rev):
192 193 return 0
193 194
194 195 # XXX if self.read(node).startswith("\1\n"), this returns (size+4)
195 196 return self._revlog.size(rev)
196 197
197 198 def cmp(self, node, text):
198 199 """compare text with a given file revision
199 200
200 201 returns True if text is different than what is stored.
201 202 """
202 203 return not storageutil.filedataequivalent(self, node, text)
203 204
204 205 def verifyintegrity(self, state):
205 206 return self._revlog.verifyintegrity(state)
206 207
207 208 def storageinfo(
208 209 self,
209 210 exclusivefiles=False,
210 211 sharedfiles=False,
211 212 revisionscount=False,
212 213 trackedsize=False,
213 214 storedsize=False,
214 215 ):
215 216 return self._revlog.storageinfo(
216 217 exclusivefiles=exclusivefiles,
217 218 sharedfiles=sharedfiles,
218 219 revisionscount=revisionscount,
219 220 trackedsize=trackedsize,
220 221 storedsize=storedsize,
221 222 )
222 223
223 224 # TODO these aren't part of the interface and aren't internal methods.
224 225 # Callers should be fixed to not use them.
225 226
226 227 # Used by bundlefilelog, unionfilelog.
227 228 @property
228 229 def indexfile(self):
229 230 return self._revlog.indexfile
230 231
231 232 @indexfile.setter
232 233 def indexfile(self, value):
233 234 self._revlog.indexfile = value
234 235
235 236 # Used by repo upgrade.
236 237 def clone(self, tr, destrevlog, **kwargs):
237 238 if not isinstance(destrevlog, filelog):
238 239 raise error.ProgrammingError(b'expected filelog to clone()')
239 240
240 241 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
241 242
242 243
243 244 class narrowfilelog(filelog):
244 245 """Filelog variation to be used with narrow stores."""
245 246
246 247 def __init__(self, opener, path, narrowmatch):
247 248 super(narrowfilelog, self).__init__(opener, path)
248 249 self._narrowmatch = narrowmatch
249 250
250 251 def renamed(self, node):
251 252 res = super(narrowfilelog, self).renamed(node)
252 253
253 254 # Renames that come from outside the narrowspec are problematic
254 255 # because we may lack the base text for the rename. This can result
255 256 # in code attempting to walk the ancestry or compute a diff
256 257 # encountering a missing revision. We address this by silently
257 258 # removing rename metadata if the source file is outside the
258 259 # narrow spec.
259 260 #
260 261 # A better solution would be to see if the base revision is available,
261 262 # rather than assuming it isn't.
262 263 #
263 264 # An even better solution would be to teach all consumers of rename
264 265 # metadata that the base revision may not be available.
265 266 #
266 267 # TODO consider better ways of doing this.
267 268 if res and not self._narrowmatch(res[0]):
268 269 return None
269 270
270 271 return res
271 272
272 273 def size(self, rev):
273 274 # Because we have a custom renamed() that may lie, we need to call
274 275 # the base renamed() to report accurate results.
275 276 node = self.node(rev)
276 277 if super(narrowfilelog, self).renamed(node):
277 278 return len(self.read(node))
278 279 else:
279 280 return super(narrowfilelog, self).size(rev)
280 281
281 282 def cmp(self, node, text):
282 283 different = super(narrowfilelog, self).cmp(node, text)
283 284
284 285 # Because renamed() may lie, we may get false positives for
285 286 # different content. Check for this by comparing against the original
286 287 # renamed() implementation.
287 288 if different:
288 289 if super(narrowfilelog, self).renamed(node):
289 290 t2 = self.read(node)
290 291 return t2 != text
291 292
292 293 return different
@@ -1,1993 +1,1993 b''
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from ..i18n import _
11 11 from .. import error
12 12 from . import util as interfaceutil
13 13
14 14 # Local repository feature string.
15 15
16 16 # Revlogs are being used for file storage.
17 17 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
18 18 # The storage part of the repository is shared from an external source.
19 19 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
20 20 # LFS supported for backing file storage.
21 21 REPO_FEATURE_LFS = b'lfs'
22 22 # Repository supports being stream cloned.
23 23 REPO_FEATURE_STREAM_CLONE = b'streamclone'
24 24 # Files storage may lack data for all ancestors.
25 25 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
26 26
27 27 REVISION_FLAG_CENSORED = 1 << 15
28 28 REVISION_FLAG_ELLIPSIS = 1 << 14
29 29 REVISION_FLAG_EXTSTORED = 1 << 13
30 30 REVISION_FLAG_SIDEDATA = 1 << 12
31 31 REVISION_FLAG_HASCOPIESINFO = 1 << 11
32 32
33 33 REVISION_FLAGS_KNOWN = (
34 34 REVISION_FLAG_CENSORED
35 35 | REVISION_FLAG_ELLIPSIS
36 36 | REVISION_FLAG_EXTSTORED
37 37 | REVISION_FLAG_SIDEDATA
38 38 | REVISION_FLAG_HASCOPIESINFO
39 39 )
40 40
41 41 CG_DELTAMODE_STD = b'default'
42 42 CG_DELTAMODE_PREV = b'previous'
43 43 CG_DELTAMODE_FULL = b'fulltext'
44 44 CG_DELTAMODE_P1 = b'p1'
45 45
46 46
47 47 class ipeerconnection(interfaceutil.Interface):
48 48 """Represents a "connection" to a repository.
49 49
50 50 This is the base interface for representing a connection to a repository.
51 51 It holds basic properties and methods applicable to all peer types.
52 52
53 53 This is not a complete interface definition and should not be used
54 54 outside of this module.
55 55 """
56 56
57 57 ui = interfaceutil.Attribute("""ui.ui instance""")
58 58
59 59 def url():
60 60 """Returns a URL string representing this peer.
61 61
62 62 Currently, implementations expose the raw URL used to construct the
63 63 instance. It may contain credentials as part of the URL. The
64 64 expectations of the value aren't well-defined and this could lead to
65 65 data leakage.
66 66
67 67 TODO audit/clean consumers and more clearly define the contents of this
68 68 value.
69 69 """
70 70
71 71 def local():
72 72 """Returns a local repository instance.
73 73
74 74 If the peer represents a local repository, returns an object that
75 75 can be used to interface with it. Otherwise returns ``None``.
76 76 """
77 77
78 78 def peer():
79 79 """Returns an object conforming to this interface.
80 80
81 81 Most implementations will ``return self``.
82 82 """
83 83
84 84 def canpush():
85 85 """Returns a boolean indicating if this peer can be pushed to."""
86 86
87 87 def close():
88 88 """Close the connection to this peer.
89 89
90 90 This is called when the peer will no longer be used. Resources
91 91 associated with the peer should be cleaned up.
92 92 """
93 93
94 94
95 95 class ipeercapabilities(interfaceutil.Interface):
96 96 """Peer sub-interface related to capabilities."""
97 97
98 98 def capable(name):
99 99 """Determine support for a named capability.
100 100
101 101 Returns ``False`` if capability not supported.
102 102
103 103 Returns ``True`` if boolean capability is supported. Returns a string
104 104 if capability support is non-boolean.
105 105
106 106 Capability strings may or may not map to wire protocol capabilities.
107 107 """
108 108
109 109 def requirecap(name, purpose):
110 110 """Require a capability to be present.
111 111
112 112 Raises a ``CapabilityError`` if the capability isn't present.
113 113 """
114 114
115 115
116 116 class ipeercommands(interfaceutil.Interface):
117 117 """Client-side interface for communicating over the wire protocol.
118 118
119 119 This interface is used as a gateway to the Mercurial wire protocol.
120 120 methods commonly call wire protocol commands of the same name.
121 121 """
122 122
123 123 def branchmap():
124 124 """Obtain heads in named branches.
125 125
126 126 Returns a dict mapping branch name to an iterable of nodes that are
127 127 heads on that branch.
128 128 """
129 129
130 130 def capabilities():
131 131 """Obtain capabilities of the peer.
132 132
133 133 Returns a set of string capabilities.
134 134 """
135 135
136 136 def clonebundles():
137 137 """Obtains the clone bundles manifest for the repo.
138 138
139 139 Returns the manifest as unparsed bytes.
140 140 """
141 141
142 142 def debugwireargs(one, two, three=None, four=None, five=None):
143 143 """Used to facilitate debugging of arguments passed over the wire."""
144 144
145 145 def getbundle(source, **kwargs):
146 146 """Obtain remote repository data as a bundle.
147 147
148 148 This command is how the bulk of repository data is transferred from
149 149 the peer to the local repository
150 150
151 151 Returns a generator of bundle data.
152 152 """
153 153
154 154 def heads():
155 155 """Determine all known head revisions in the peer.
156 156
157 157 Returns an iterable of binary nodes.
158 158 """
159 159
160 160 def known(nodes):
161 161 """Determine whether multiple nodes are known.
162 162
163 163 Accepts an iterable of nodes whose presence to check for.
164 164
165 165 Returns an iterable of booleans indicating of the corresponding node
166 166 at that index is known to the peer.
167 167 """
168 168
169 169 def listkeys(namespace):
170 170 """Obtain all keys in a pushkey namespace.
171 171
172 172 Returns an iterable of key names.
173 173 """
174 174
175 175 def lookup(key):
176 176 """Resolve a value to a known revision.
177 177
178 178 Returns a binary node of the resolved revision on success.
179 179 """
180 180
181 181 def pushkey(namespace, key, old, new):
182 182 """Set a value using the ``pushkey`` protocol.
183 183
184 184 Arguments correspond to the pushkey namespace and key to operate on and
185 185 the old and new values for that key.
186 186
187 187 Returns a string with the peer result. The value inside varies by the
188 188 namespace.
189 189 """
190 190
191 191 def stream_out():
192 192 """Obtain streaming clone data.
193 193
194 194 Successful result should be a generator of data chunks.
195 195 """
196 196
197 197 def unbundle(bundle, heads, url):
198 198 """Transfer repository data to the peer.
199 199
200 200 This is how the bulk of data during a push is transferred.
201 201
202 202 Returns the integer number of heads added to the peer.
203 203 """
204 204
205 205
206 206 class ipeerlegacycommands(interfaceutil.Interface):
207 207 """Interface for implementing support for legacy wire protocol commands.
208 208
209 209 Wire protocol commands transition to legacy status when they are no longer
210 210 used by modern clients. To facilitate identifying which commands are
211 211 legacy, the interfaces are split.
212 212 """
213 213
214 214 def between(pairs):
215 215 """Obtain nodes between pairs of nodes.
216 216
217 217 ``pairs`` is an iterable of node pairs.
218 218
219 219 Returns an iterable of iterables of nodes corresponding to each
220 220 requested pair.
221 221 """
222 222
223 223 def branches(nodes):
224 224 """Obtain ancestor changesets of specific nodes back to a branch point.
225 225
226 226 For each requested node, the peer finds the first ancestor node that is
227 227 a DAG root or is a merge.
228 228
229 229 Returns an iterable of iterables with the resolved values for each node.
230 230 """
231 231
232 232 def changegroup(nodes, source):
233 233 """Obtain a changegroup with data for descendants of specified nodes."""
234 234
235 235 def changegroupsubset(bases, heads, source):
236 236 pass
237 237
238 238
239 239 class ipeercommandexecutor(interfaceutil.Interface):
240 240 """Represents a mechanism to execute remote commands.
241 241
242 242 This is the primary interface for requesting that wire protocol commands
243 243 be executed. Instances of this interface are active in a context manager
244 244 and have a well-defined lifetime. When the context manager exits, all
245 245 outstanding requests are waited on.
246 246 """
247 247
248 248 def callcommand(name, args):
249 249 """Request that a named command be executed.
250 250
251 251 Receives the command name and a dictionary of command arguments.
252 252
253 253 Returns a ``concurrent.futures.Future`` that will resolve to the
254 254 result of that command request. That exact value is left up to
255 255 the implementation and possibly varies by command.
256 256
257 257 Not all commands can coexist with other commands in an executor
258 258 instance: it depends on the underlying wire protocol transport being
259 259 used and the command itself.
260 260
261 261 Implementations MAY call ``sendcommands()`` automatically if the
262 262 requested command can not coexist with other commands in this executor.
263 263
264 264 Implementations MAY call ``sendcommands()`` automatically when the
265 265 future's ``result()`` is called. So, consumers using multiple
266 266 commands with an executor MUST ensure that ``result()`` is not called
267 267 until all command requests have been issued.
268 268 """
269 269
270 270 def sendcommands():
271 271 """Trigger submission of queued command requests.
272 272
273 273 Not all transports submit commands as soon as they are requested to
274 274 run. When called, this method forces queued command requests to be
275 275 issued. It will no-op if all commands have already been sent.
276 276
277 277 When called, no more new commands may be issued with this executor.
278 278 """
279 279
280 280 def close():
281 281 """Signal that this command request is finished.
282 282
283 283 When called, no more new commands may be issued. All outstanding
284 284 commands that have previously been issued are waited on before
285 285 returning. This not only includes waiting for the futures to resolve,
286 286 but also waiting for all response data to arrive. In other words,
287 287 calling this waits for all on-wire state for issued command requests
288 288 to finish.
289 289
290 290 When used as a context manager, this method is called when exiting the
291 291 context manager.
292 292
293 293 This method may call ``sendcommands()`` if there are buffered commands.
294 294 """
295 295
296 296
297 297 class ipeerrequests(interfaceutil.Interface):
298 298 """Interface for executing commands on a peer."""
299 299
300 300 limitedarguments = interfaceutil.Attribute(
301 301 """True if the peer cannot receive large argument value for commands."""
302 302 )
303 303
304 304 def commandexecutor():
305 305 """A context manager that resolves to an ipeercommandexecutor.
306 306
307 307 The object this resolves to can be used to issue command requests
308 308 to the peer.
309 309
310 310 Callers should call its ``callcommand`` method to issue command
311 311 requests.
312 312
313 313 A new executor should be obtained for each distinct set of commands
314 314 (possibly just a single command) that the consumer wants to execute
315 315 as part of a single operation or round trip. This is because some
316 316 peers are half-duplex and/or don't support persistent connections.
317 317 e.g. in the case of HTTP peers, commands sent to an executor represent
318 318 a single HTTP request. While some peers may support multiple command
319 319 sends over the wire per executor, consumers need to code to the least
320 320 capable peer. So it should be assumed that command executors buffer
321 321 called commands until they are told to send them and that each
322 322 command executor could result in a new connection or wire-level request
323 323 being issued.
324 324 """
325 325
326 326
327 327 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
328 328 """Unified interface for peer repositories.
329 329
330 330 All peer instances must conform to this interface.
331 331 """
332 332
333 333
334 334 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
335 335 """Unified peer interface for wire protocol version 2 peers."""
336 336
337 337 apidescriptor = interfaceutil.Attribute(
338 338 """Data structure holding description of server API."""
339 339 )
340 340
341 341
342 342 @interfaceutil.implementer(ipeerbase)
343 343 class peer(object):
344 344 """Base class for peer repositories."""
345 345
346 346 limitedarguments = False
347 347
348 348 def capable(self, name):
349 349 caps = self.capabilities()
350 350 if name in caps:
351 351 return True
352 352
353 353 name = b'%s=' % name
354 354 for cap in caps:
355 355 if cap.startswith(name):
356 356 return cap[len(name) :]
357 357
358 358 return False
359 359
360 360 def requirecap(self, name, purpose):
361 361 if self.capable(name):
362 362 return
363 363
364 364 raise error.CapabilityError(
365 365 _(
366 366 b'cannot %s; remote repository does not support the '
367 367 b'\'%s\' capability'
368 368 )
369 369 % (purpose, name)
370 370 )
371 371
372 372
373 373 class iverifyproblem(interfaceutil.Interface):
374 374 """Represents a problem with the integrity of the repository.
375 375
376 376 Instances of this interface are emitted to describe an integrity issue
377 377 with a repository (e.g. corrupt storage, missing data, etc).
378 378
379 379 Instances are essentially messages associated with severity.
380 380 """
381 381
382 382 warning = interfaceutil.Attribute(
383 383 """Message indicating a non-fatal problem."""
384 384 )
385 385
386 386 error = interfaceutil.Attribute("""Message indicating a fatal problem.""")
387 387
388 388 node = interfaceutil.Attribute(
389 389 """Revision encountering the problem.
390 390
391 391 ``None`` means the problem doesn't apply to a single revision.
392 392 """
393 393 )
394 394
395 395
396 396 class irevisiondelta(interfaceutil.Interface):
397 397 """Represents a delta between one revision and another.
398 398
399 399 Instances convey enough information to allow a revision to be exchanged
400 400 with another repository.
401 401
402 402 Instances represent the fulltext revision data or a delta against
403 403 another revision. Therefore the ``revision`` and ``delta`` attributes
404 404 are mutually exclusive.
405 405
406 406 Typically used for changegroup generation.
407 407 """
408 408
409 409 node = interfaceutil.Attribute("""20 byte node of this revision.""")
410 410
411 411 p1node = interfaceutil.Attribute(
412 412 """20 byte node of 1st parent of this revision."""
413 413 )
414 414
415 415 p2node = interfaceutil.Attribute(
416 416 """20 byte node of 2nd parent of this revision."""
417 417 )
418 418
419 419 linknode = interfaceutil.Attribute(
420 420 """20 byte node of the changelog revision this node is linked to."""
421 421 )
422 422
423 423 flags = interfaceutil.Attribute(
424 424 """2 bytes of integer flags that apply to this revision.
425 425
426 426 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
427 427 """
428 428 )
429 429
430 430 basenode = interfaceutil.Attribute(
431 431 """20 byte node of the revision this data is a delta against.
432 432
433 433 ``nullid`` indicates that the revision is a full revision and not
434 434 a delta.
435 435 """
436 436 )
437 437
438 438 baserevisionsize = interfaceutil.Attribute(
439 439 """Size of base revision this delta is against.
440 440
441 441 May be ``None`` if ``basenode`` is ``nullid``.
442 442 """
443 443 )
444 444
445 445 revision = interfaceutil.Attribute(
446 446 """Raw fulltext of revision data for this node."""
447 447 )
448 448
449 449 delta = interfaceutil.Attribute(
450 450 """Delta between ``basenode`` and ``node``.
451 451
452 452 Stored in the bdiff delta format.
453 453 """
454 454 )
455 455
456 456
457 457 class ifilerevisionssequence(interfaceutil.Interface):
458 458 """Contains index data for all revisions of a file.
459 459
460 460 Types implementing this behave like lists of tuples. The index
461 461 in the list corresponds to the revision number. The values contain
462 462 index metadata.
463 463
464 464 The *null* revision (revision number -1) is always the last item
465 465 in the index.
466 466 """
467 467
468 468 def __len__():
469 469 """The total number of revisions."""
470 470
471 471 def __getitem__(rev):
472 472 """Returns the object having a specific revision number.
473 473
474 474 Returns an 8-tuple with the following fields:
475 475
476 476 offset+flags
477 477 Contains the offset and flags for the revision. 64-bit unsigned
478 478 integer where first 6 bytes are the offset and the next 2 bytes
479 479 are flags. The offset can be 0 if it is not used by the store.
480 480 compressed size
481 481 Size of the revision data in the store. It can be 0 if it isn't
482 482 needed by the store.
483 483 uncompressed size
484 484 Fulltext size. It can be 0 if it isn't needed by the store.
485 485 base revision
486 486 Revision number of revision the delta for storage is encoded
487 487 against. -1 indicates not encoded against a base revision.
488 488 link revision
489 489 Revision number of changelog revision this entry is related to.
490 490 p1 revision
491 491 Revision number of 1st parent. -1 if no 1st parent.
492 492 p2 revision
493 493 Revision number of 2nd parent. -1 if no 1st parent.
494 494 node
495 495 Binary node value for this revision number.
496 496
497 497 Negative values should index off the end of the sequence. ``-1``
498 498 should return the null revision. ``-2`` should return the most
499 499 recent revision.
500 500 """
501 501
502 502 def __contains__(rev):
503 503 """Whether a revision number exists."""
504 504
505 505 def insert(self, i, entry):
506 506 """Add an item to the index at specific revision."""
507 507
508 508
509 509 class ifileindex(interfaceutil.Interface):
510 510 """Storage interface for index data of a single file.
511 511
512 512 File storage data is divided into index metadata and data storage.
513 513 This interface defines the index portion of the interface.
514 514
515 515 The index logically consists of:
516 516
517 517 * A mapping between revision numbers and nodes.
518 518 * DAG data (storing and querying the relationship between nodes).
519 519 * Metadata to facilitate storage.
520 520 """
521 521
522 522 def __len__():
523 523 """Obtain the number of revisions stored for this file."""
524 524
525 525 def __iter__():
526 526 """Iterate over revision numbers for this file."""
527 527
528 528 def hasnode(node):
529 529 """Returns a bool indicating if a node is known to this store.
530 530
531 531 Implementations must only return True for full, binary node values:
532 532 hex nodes, revision numbers, and partial node matches must be
533 533 rejected.
534 534
535 535 The null node is never present.
536 536 """
537 537
538 538 def revs(start=0, stop=None):
539 539 """Iterate over revision numbers for this file, with control."""
540 540
541 541 def parents(node):
542 542 """Returns a 2-tuple of parent nodes for a revision.
543 543
544 544 Values will be ``nullid`` if the parent is empty.
545 545 """
546 546
547 547 def parentrevs(rev):
548 548 """Like parents() but operates on revision numbers."""
549 549
550 550 def rev(node):
551 551 """Obtain the revision number given a node.
552 552
553 553 Raises ``error.LookupError`` if the node is not known.
554 554 """
555 555
556 556 def node(rev):
557 557 """Obtain the node value given a revision number.
558 558
559 559 Raises ``IndexError`` if the node is not known.
560 560 """
561 561
562 562 def lookup(node):
563 563 """Attempt to resolve a value to a node.
564 564
565 565 Value can be a binary node, hex node, revision number, or a string
566 566 that can be converted to an integer.
567 567
568 568 Raises ``error.LookupError`` if a node could not be resolved.
569 569 """
570 570
571 571 def linkrev(rev):
572 572 """Obtain the changeset revision number a revision is linked to."""
573 573
574 574 def iscensored(rev):
575 575 """Return whether a revision's content has been censored."""
576 576
577 577 def commonancestorsheads(node1, node2):
578 578 """Obtain an iterable of nodes containing heads of common ancestors.
579 579
580 580 See ``ancestor.commonancestorsheads()``.
581 581 """
582 582
583 583 def descendants(revs):
584 584 """Obtain descendant revision numbers for a set of revision numbers.
585 585
586 586 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
587 587 """
588 588
589 589 def heads(start=None, stop=None):
590 590 """Obtain a list of nodes that are DAG heads, with control.
591 591
592 592 The set of revisions examined can be limited by specifying
593 593 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
594 594 iterable of nodes. DAG traversal starts at earlier revision
595 595 ``start`` and iterates forward until any node in ``stop`` is
596 596 encountered.
597 597 """
598 598
599 599 def children(node):
600 600 """Obtain nodes that are children of a node.
601 601
602 602 Returns a list of nodes.
603 603 """
604 604
605 605
606 606 class ifiledata(interfaceutil.Interface):
607 607 """Storage interface for data storage of a specific file.
608 608
609 609 This complements ``ifileindex`` and provides an interface for accessing
610 610 data for a tracked file.
611 611 """
612 612
613 613 def size(rev):
614 614 """Obtain the fulltext size of file data.
615 615
616 616 Any metadata is excluded from size measurements.
617 617 """
618 618
619 619 def revision(node, raw=False):
620 620 """Obtain fulltext data for a node.
621 621
622 622 By default, any storage transformations are applied before the data
623 623 is returned. If ``raw`` is True, non-raw storage transformations
624 624 are not applied.
625 625
626 626 The fulltext data may contain a header containing metadata. Most
627 627 consumers should use ``read()`` to obtain the actual file data.
628 628 """
629 629
630 630 def rawdata(node):
631 631 """Obtain raw data for a node."""
632 632
633 633 def read(node):
634 634 """Resolve file fulltext data.
635 635
636 636 This is similar to ``revision()`` except any metadata in the data
637 637 headers is stripped.
638 638 """
639 639
640 640 def renamed(node):
641 641 """Obtain copy metadata for a node.
642 642
643 643 Returns ``False`` if no copy metadata is stored or a 2-tuple of
644 644 (path, node) from which this revision was copied.
645 645 """
646 646
647 647 def cmp(node, fulltext):
648 648 """Compare fulltext to another revision.
649 649
650 650 Returns True if the fulltext is different from what is stored.
651 651
652 652 This takes copy metadata into account.
653 653
654 654 TODO better document the copy metadata and censoring logic.
655 655 """
656 656
657 657 def emitrevisions(
658 658 nodes,
659 659 nodesorder=None,
660 660 revisiondata=False,
661 661 assumehaveparentrevisions=False,
662 662 deltamode=CG_DELTAMODE_STD,
663 663 ):
664 664 """Produce ``irevisiondelta`` for revisions.
665 665
666 666 Given an iterable of nodes, emits objects conforming to the
667 667 ``irevisiondelta`` interface that describe revisions in storage.
668 668
669 669 This method is a generator.
670 670
671 671 The input nodes may be unordered. Implementations must ensure that a
672 672 node's parents are emitted before the node itself. Transitively, this
673 673 means that a node may only be emitted once all its ancestors in
674 674 ``nodes`` have also been emitted.
675 675
676 676 By default, emits "index" data (the ``node``, ``p1node``, and
677 677 ``p2node`` attributes). If ``revisiondata`` is set, revision data
678 678 will also be present on the emitted objects.
679 679
680 680 With default argument values, implementations can choose to emit
681 681 either fulltext revision data or a delta. When emitting deltas,
682 682 implementations must consider whether the delta's base revision
683 683 fulltext is available to the receiver.
684 684
685 685 The base revision fulltext is guaranteed to be available if any of
686 686 the following are met:
687 687
688 688 * Its fulltext revision was emitted by this method call.
689 689 * A delta for that revision was emitted by this method call.
690 690 * ``assumehaveparentrevisions`` is True and the base revision is a
691 691 parent of the node.
692 692
693 693 ``nodesorder`` can be used to control the order that revisions are
694 694 emitted. By default, revisions can be reordered as long as they are
695 695 in DAG topological order (see above). If the value is ``nodes``,
696 696 the iteration order from ``nodes`` should be used. If the value is
697 697 ``storage``, then the native order from the backing storage layer
698 698 is used. (Not all storage layers will have strong ordering and behavior
699 699 of this mode is storage-dependent.) ``nodes`` ordering can force
700 700 revisions to be emitted before their ancestors, so consumers should
701 701 use it with care.
702 702
703 703 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
704 704 be set and it is the caller's responsibility to resolve it, if needed.
705 705
706 706 If ``deltamode`` is CG_DELTAMODE_PREV and revision data is requested,
707 707 all revision data should be emitted as deltas against the revision
708 708 emitted just prior. The initial revision should be a delta against its
709 709 1st parent.
710 710 """
711 711
712 712
713 713 class ifilemutation(interfaceutil.Interface):
714 714 """Storage interface for mutation events of a tracked file."""
715 715
716 716 def add(filedata, meta, transaction, linkrev, p1, p2):
717 717 """Add a new revision to the store.
718 718
719 719 Takes file data, dictionary of metadata, a transaction, linkrev,
720 720 and parent nodes.
721 721
722 722 Returns the node that was added.
723 723
724 724 May no-op if a revision matching the supplied data is already stored.
725 725 """
726 726
727 727 def addrevision(
728 728 revisiondata,
729 729 transaction,
730 730 linkrev,
731 731 p1,
732 732 p2,
733 733 node=None,
734 734 flags=0,
735 735 cachedelta=None,
736 736 ):
737 """Add a new revision to the store.
737 """Add a new revision to the store and return its number.
738 738
739 739 This is similar to ``add()`` except it operates at a lower level.
740 740
741 741 The data passed in already contains a metadata header, if any.
742 742
743 743 ``node`` and ``flags`` can be used to define the expected node and
744 744 the flags to use with storage. ``flags`` is a bitwise value composed
745 745 of the various ``REVISION_FLAG_*`` constants.
746 746
747 747 ``add()`` is usually called when adding files from e.g. the working
748 748 directory. ``addrevision()`` is often called by ``add()`` and for
749 749 scenarios where revision data has already been computed, such as when
750 750 applying raw data from a peer repo.
751 751 """
752 752
753 753 def addgroup(
754 754 deltas,
755 755 linkmapper,
756 756 transaction,
757 757 addrevisioncb=None,
758 758 duplicaterevisioncb=None,
759 759 maybemissingparents=False,
760 760 ):
761 761 """Process a series of deltas for storage.
762 762
763 763 ``deltas`` is an iterable of 7-tuples of
764 764 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
765 765 to add.
766 766
767 767 The ``delta`` field contains ``mpatch`` data to apply to a base
768 768 revision, identified by ``deltabase``. The base node can be
769 769 ``nullid``, in which case the header from the delta can be ignored
770 770 and the delta used as the fulltext.
771 771
772 772 ``alwayscache`` instructs the lower layers to cache the content of the
773 773 newly added revision, even if it needs to be explicitly computed.
774 774 This used to be the default when ``addrevisioncb`` was provided up to
775 775 Mercurial 5.8.
776 776
777 777 ``addrevisioncb`` should be called for each node as it is committed.
778 778 ``duplicaterevisioncb`` should be called for each pre-existing node.
779 779
780 780 ``maybemissingparents`` is a bool indicating whether the incoming
781 781 data may reference parents/ancestor revisions that aren't present.
782 782 This flag is set when receiving data into a "shallow" store that
783 783 doesn't hold all history.
784 784
785 785 Returns a list of nodes that were processed. A node will be in the list
786 786 even if it existed in the store previously.
787 787 """
788 788
789 789 def censorrevision(tr, node, tombstone=b''):
790 790 """Remove the content of a single revision.
791 791
792 792 The specified ``node`` will have its content purged from storage.
793 793 Future attempts to access the revision data for this node will
794 794 result in failure.
795 795
796 796 A ``tombstone`` message can optionally be stored. This message may be
797 797 displayed to users when they attempt to access the missing revision
798 798 data.
799 799
800 800 Storage backends may have stored deltas against the previous content
801 801 in this revision. As part of censoring a revision, these storage
802 802 backends are expected to rewrite any internally stored deltas such
803 803 that they no longer reference the deleted content.
804 804 """
805 805
806 806 def getstrippoint(minlink):
807 807 """Find the minimum revision that must be stripped to strip a linkrev.
808 808
809 809 Returns a 2-tuple containing the minimum revision number and a set
810 810 of all revisions numbers that would be broken by this strip.
811 811
812 812 TODO this is highly revlog centric and should be abstracted into
813 813 a higher-level deletion API. ``repair.strip()`` relies on this.
814 814 """
815 815
816 816 def strip(minlink, transaction):
817 817 """Remove storage of items starting at a linkrev.
818 818
819 819 This uses ``getstrippoint()`` to determine the first node to remove.
820 820 Then it effectively truncates storage for all revisions after that.
821 821
822 822 TODO this is highly revlog centric and should be abstracted into a
823 823 higher-level deletion API.
824 824 """
825 825
826 826
827 827 class ifilestorage(ifileindex, ifiledata, ifilemutation):
828 828 """Complete storage interface for a single tracked file."""
829 829
830 830 def files():
831 831 """Obtain paths that are backing storage for this file.
832 832
833 833 TODO this is used heavily by verify code and there should probably
834 834 be a better API for that.
835 835 """
836 836
837 837 def storageinfo(
838 838 exclusivefiles=False,
839 839 sharedfiles=False,
840 840 revisionscount=False,
841 841 trackedsize=False,
842 842 storedsize=False,
843 843 ):
844 844 """Obtain information about storage for this file's data.
845 845
846 846 Returns a dict describing storage for this tracked path. The keys
847 847 in the dict map to arguments of the same. The arguments are bools
848 848 indicating whether to calculate and obtain that data.
849 849
850 850 exclusivefiles
851 851 Iterable of (vfs, path) describing files that are exclusively
852 852 used to back storage for this tracked path.
853 853
854 854 sharedfiles
855 855 Iterable of (vfs, path) describing files that are used to back
856 856 storage for this tracked path. Those files may also provide storage
857 857 for other stored entities.
858 858
859 859 revisionscount
860 860 Number of revisions available for retrieval.
861 861
862 862 trackedsize
863 863 Total size in bytes of all tracked revisions. This is a sum of the
864 864 length of the fulltext of all revisions.
865 865
866 866 storedsize
867 867 Total size in bytes used to store data for all tracked revisions.
868 868 This is commonly less than ``trackedsize`` due to internal usage
869 869 of deltas rather than fulltext revisions.
870 870
871 871 Not all storage backends may support all queries are have a reasonable
872 872 value to use. In that case, the value should be set to ``None`` and
873 873 callers are expected to handle this special value.
874 874 """
875 875
876 876 def verifyintegrity(state):
877 877 """Verifies the integrity of file storage.
878 878
879 879 ``state`` is a dict holding state of the verifier process. It can be
880 880 used to communicate data between invocations of multiple storage
881 881 primitives.
882 882
883 883 If individual revisions cannot have their revision content resolved,
884 884 the method is expected to set the ``skipread`` key to a set of nodes
885 885 that encountered problems. If set, the method can also add the node(s)
886 886 to ``safe_renamed`` in order to indicate nodes that may perform the
887 887 rename checks with currently accessible data.
888 888
889 889 The method yields objects conforming to the ``iverifyproblem``
890 890 interface.
891 891 """
892 892
893 893
894 894 class idirs(interfaceutil.Interface):
895 895 """Interface representing a collection of directories from paths.
896 896
897 897 This interface is essentially a derived data structure representing
898 898 directories from a collection of paths.
899 899 """
900 900
901 901 def addpath(path):
902 902 """Add a path to the collection.
903 903
904 904 All directories in the path will be added to the collection.
905 905 """
906 906
907 907 def delpath(path):
908 908 """Remove a path from the collection.
909 909
910 910 If the removal was the last path in a particular directory, the
911 911 directory is removed from the collection.
912 912 """
913 913
914 914 def __iter__():
915 915 """Iterate over the directories in this collection of paths."""
916 916
917 917 def __contains__(path):
918 918 """Whether a specific directory is in this collection."""
919 919
920 920
921 921 class imanifestdict(interfaceutil.Interface):
922 922 """Interface representing a manifest data structure.
923 923
924 924 A manifest is effectively a dict mapping paths to entries. Each entry
925 925 consists of a binary node and extra flags affecting that entry.
926 926 """
927 927
928 928 def __getitem__(path):
929 929 """Returns the binary node value for a path in the manifest.
930 930
931 931 Raises ``KeyError`` if the path does not exist in the manifest.
932 932
933 933 Equivalent to ``self.find(path)[0]``.
934 934 """
935 935
936 936 def find(path):
937 937 """Returns the entry for a path in the manifest.
938 938
939 939 Returns a 2-tuple of (node, flags).
940 940
941 941 Raises ``KeyError`` if the path does not exist in the manifest.
942 942 """
943 943
944 944 def __len__():
945 945 """Return the number of entries in the manifest."""
946 946
947 947 def __nonzero__():
948 948 """Returns True if the manifest has entries, False otherwise."""
949 949
950 950 __bool__ = __nonzero__
951 951
952 952 def __setitem__(path, node):
953 953 """Define the node value for a path in the manifest.
954 954
955 955 If the path is already in the manifest, its flags will be copied to
956 956 the new entry.
957 957 """
958 958
959 959 def __contains__(path):
960 960 """Whether a path exists in the manifest."""
961 961
962 962 def __delitem__(path):
963 963 """Remove a path from the manifest.
964 964
965 965 Raises ``KeyError`` if the path is not in the manifest.
966 966 """
967 967
968 968 def __iter__():
969 969 """Iterate over paths in the manifest."""
970 970
971 971 def iterkeys():
972 972 """Iterate over paths in the manifest."""
973 973
974 974 def keys():
975 975 """Obtain a list of paths in the manifest."""
976 976
977 977 def filesnotin(other, match=None):
978 978 """Obtain the set of paths in this manifest but not in another.
979 979
980 980 ``match`` is an optional matcher function to be applied to both
981 981 manifests.
982 982
983 983 Returns a set of paths.
984 984 """
985 985
986 986 def dirs():
987 987 """Returns an object implementing the ``idirs`` interface."""
988 988
989 989 def hasdir(dir):
990 990 """Returns a bool indicating if a directory is in this manifest."""
991 991
992 992 def walk(match):
993 993 """Generator of paths in manifest satisfying a matcher.
994 994
995 995 If the matcher has explicit files listed and they don't exist in
996 996 the manifest, ``match.bad()`` is called for each missing file.
997 997 """
998 998
999 999 def diff(other, match=None, clean=False):
1000 1000 """Find differences between this manifest and another.
1001 1001
1002 1002 This manifest is compared to ``other``.
1003 1003
1004 1004 If ``match`` is provided, the two manifests are filtered against this
1005 1005 matcher and only entries satisfying the matcher are compared.
1006 1006
1007 1007 If ``clean`` is True, unchanged files are included in the returned
1008 1008 object.
1009 1009
1010 1010 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
1011 1011 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
1012 1012 represents the node and flags for this manifest and ``(node2, flag2)``
1013 1013 are the same for the other manifest.
1014 1014 """
1015 1015
1016 1016 def setflag(path, flag):
1017 1017 """Set the flag value for a given path.
1018 1018
1019 1019 Raises ``KeyError`` if the path is not already in the manifest.
1020 1020 """
1021 1021
1022 1022 def get(path, default=None):
1023 1023 """Obtain the node value for a path or a default value if missing."""
1024 1024
1025 1025 def flags(path):
1026 1026 """Return the flags value for a path (default: empty bytestring)."""
1027 1027
1028 1028 def copy():
1029 1029 """Return a copy of this manifest."""
1030 1030
1031 1031 def items():
1032 1032 """Returns an iterable of (path, node) for items in this manifest."""
1033 1033
1034 1034 def iteritems():
1035 1035 """Identical to items()."""
1036 1036
1037 1037 def iterentries():
1038 1038 """Returns an iterable of (path, node, flags) for this manifest.
1039 1039
1040 1040 Similar to ``iteritems()`` except items are a 3-tuple and include
1041 1041 flags.
1042 1042 """
1043 1043
1044 1044 def text():
1045 1045 """Obtain the raw data representation for this manifest.
1046 1046
1047 1047 Result is used to create a manifest revision.
1048 1048 """
1049 1049
1050 1050 def fastdelta(base, changes):
1051 1051 """Obtain a delta between this manifest and another given changes.
1052 1052
1053 1053 ``base`` in the raw data representation for another manifest.
1054 1054
1055 1055 ``changes`` is an iterable of ``(path, to_delete)``.
1056 1056
1057 1057 Returns a 2-tuple containing ``bytearray(self.text())`` and the
1058 1058 delta between ``base`` and this manifest.
1059 1059
1060 1060 If this manifest implementation can't support ``fastdelta()``,
1061 1061 raise ``mercurial.manifest.FastdeltaUnavailable``.
1062 1062 """
1063 1063
1064 1064
1065 1065 class imanifestrevisionbase(interfaceutil.Interface):
1066 1066 """Base interface representing a single revision of a manifest.
1067 1067
1068 1068 Should not be used as a primary interface: should always be inherited
1069 1069 as part of a larger interface.
1070 1070 """
1071 1071
1072 1072 def copy():
1073 1073 """Obtain a copy of this manifest instance.
1074 1074
1075 1075 Returns an object conforming to the ``imanifestrevisionwritable``
1076 1076 interface. The instance will be associated with the same
1077 1077 ``imanifestlog`` collection as this instance.
1078 1078 """
1079 1079
1080 1080 def read():
1081 1081 """Obtain the parsed manifest data structure.
1082 1082
1083 1083 The returned object conforms to the ``imanifestdict`` interface.
1084 1084 """
1085 1085
1086 1086
1087 1087 class imanifestrevisionstored(imanifestrevisionbase):
1088 1088 """Interface representing a manifest revision committed to storage."""
1089 1089
1090 1090 def node():
1091 1091 """The binary node for this manifest."""
1092 1092
1093 1093 parents = interfaceutil.Attribute(
1094 1094 """List of binary nodes that are parents for this manifest revision."""
1095 1095 )
1096 1096
1097 1097 def readdelta(shallow=False):
1098 1098 """Obtain the manifest data structure representing changes from parent.
1099 1099
1100 1100 This manifest is compared to its 1st parent. A new manifest representing
1101 1101 those differences is constructed.
1102 1102
1103 1103 The returned object conforms to the ``imanifestdict`` interface.
1104 1104 """
1105 1105
1106 1106 def readfast(shallow=False):
1107 1107 """Calls either ``read()`` or ``readdelta()``.
1108 1108
1109 1109 The faster of the two options is called.
1110 1110 """
1111 1111
1112 1112 def find(key):
1113 1113 """Calls self.read().find(key)``.
1114 1114
1115 1115 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1116 1116 """
1117 1117
1118 1118
1119 1119 class imanifestrevisionwritable(imanifestrevisionbase):
1120 1120 """Interface representing a manifest revision that can be committed."""
1121 1121
1122 1122 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1123 1123 """Add this revision to storage.
1124 1124
1125 1125 Takes a transaction object, the changeset revision number it will
1126 1126 be associated with, its parent nodes, and lists of added and
1127 1127 removed paths.
1128 1128
1129 1129 If match is provided, storage can choose not to inspect or write out
1130 1130 items that do not match. Storage is still required to be able to provide
1131 1131 the full manifest in the future for any directories written (these
1132 1132 manifests should not be "narrowed on disk").
1133 1133
1134 1134 Returns the binary node of the created revision.
1135 1135 """
1136 1136
1137 1137
1138 1138 class imanifeststorage(interfaceutil.Interface):
1139 1139 """Storage interface for manifest data."""
1140 1140
1141 1141 tree = interfaceutil.Attribute(
1142 1142 """The path to the directory this manifest tracks.
1143 1143
1144 1144 The empty bytestring represents the root manifest.
1145 1145 """
1146 1146 )
1147 1147
1148 1148 index = interfaceutil.Attribute(
1149 1149 """An ``ifilerevisionssequence`` instance."""
1150 1150 )
1151 1151
1152 1152 indexfile = interfaceutil.Attribute(
1153 1153 """Path of revlog index file.
1154 1154
1155 1155 TODO this is revlog specific and should not be exposed.
1156 1156 """
1157 1157 )
1158 1158
1159 1159 opener = interfaceutil.Attribute(
1160 1160 """VFS opener to use to access underlying files used for storage.
1161 1161
1162 1162 TODO this is revlog specific and should not be exposed.
1163 1163 """
1164 1164 )
1165 1165
1166 1166 version = interfaceutil.Attribute(
1167 1167 """Revlog version number.
1168 1168
1169 1169 TODO this is revlog specific and should not be exposed.
1170 1170 """
1171 1171 )
1172 1172
1173 1173 _generaldelta = interfaceutil.Attribute(
1174 1174 """Whether generaldelta storage is being used.
1175 1175
1176 1176 TODO this is revlog specific and should not be exposed.
1177 1177 """
1178 1178 )
1179 1179
1180 1180 fulltextcache = interfaceutil.Attribute(
1181 1181 """Dict with cache of fulltexts.
1182 1182
1183 1183 TODO this doesn't feel appropriate for the storage interface.
1184 1184 """
1185 1185 )
1186 1186
1187 1187 def __len__():
1188 1188 """Obtain the number of revisions stored for this manifest."""
1189 1189
1190 1190 def __iter__():
1191 1191 """Iterate over revision numbers for this manifest."""
1192 1192
1193 1193 def rev(node):
1194 1194 """Obtain the revision number given a binary node.
1195 1195
1196 1196 Raises ``error.LookupError`` if the node is not known.
1197 1197 """
1198 1198
1199 1199 def node(rev):
1200 1200 """Obtain the node value given a revision number.
1201 1201
1202 1202 Raises ``error.LookupError`` if the revision is not known.
1203 1203 """
1204 1204
1205 1205 def lookup(value):
1206 1206 """Attempt to resolve a value to a node.
1207 1207
1208 1208 Value can be a binary node, hex node, revision number, or a bytes
1209 1209 that can be converted to an integer.
1210 1210
1211 1211 Raises ``error.LookupError`` if a ndoe could not be resolved.
1212 1212 """
1213 1213
1214 1214 def parents(node):
1215 1215 """Returns a 2-tuple of parent nodes for a node.
1216 1216
1217 1217 Values will be ``nullid`` if the parent is empty.
1218 1218 """
1219 1219
1220 1220 def parentrevs(rev):
1221 1221 """Like parents() but operates on revision numbers."""
1222 1222
1223 1223 def linkrev(rev):
1224 1224 """Obtain the changeset revision number a revision is linked to."""
1225 1225
1226 1226 def revision(node, _df=None, raw=False):
1227 1227 """Obtain fulltext data for a node."""
1228 1228
1229 1229 def rawdata(node, _df=None):
1230 1230 """Obtain raw data for a node."""
1231 1231
1232 1232 def revdiff(rev1, rev2):
1233 1233 """Obtain a delta between two revision numbers.
1234 1234
1235 1235 The returned data is the result of ``bdiff.bdiff()`` on the raw
1236 1236 revision data.
1237 1237 """
1238 1238
1239 1239 def cmp(node, fulltext):
1240 1240 """Compare fulltext to another revision.
1241 1241
1242 1242 Returns True if the fulltext is different from what is stored.
1243 1243 """
1244 1244
1245 1245 def emitrevisions(
1246 1246 nodes,
1247 1247 nodesorder=None,
1248 1248 revisiondata=False,
1249 1249 assumehaveparentrevisions=False,
1250 1250 ):
1251 1251 """Produce ``irevisiondelta`` describing revisions.
1252 1252
1253 1253 See the documentation for ``ifiledata`` for more.
1254 1254 """
1255 1255
1256 1256 def addgroup(
1257 1257 deltas,
1258 1258 linkmapper,
1259 1259 transaction,
1260 1260 addrevisioncb=None,
1261 1261 duplicaterevisioncb=None,
1262 1262 ):
1263 1263 """Process a series of deltas for storage.
1264 1264
1265 1265 See the documentation in ``ifilemutation`` for more.
1266 1266 """
1267 1267
1268 1268 def rawsize(rev):
1269 1269 """Obtain the size of tracked data.
1270 1270
1271 1271 Is equivalent to ``len(m.rawdata(node))``.
1272 1272
1273 1273 TODO this method is only used by upgrade code and may be removed.
1274 1274 """
1275 1275
1276 1276 def getstrippoint(minlink):
1277 1277 """Find minimum revision that must be stripped to strip a linkrev.
1278 1278
1279 1279 See the documentation in ``ifilemutation`` for more.
1280 1280 """
1281 1281
1282 1282 def strip(minlink, transaction):
1283 1283 """Remove storage of items starting at a linkrev.
1284 1284
1285 1285 See the documentation in ``ifilemutation`` for more.
1286 1286 """
1287 1287
1288 1288 def checksize():
1289 1289 """Obtain the expected sizes of backing files.
1290 1290
1291 1291 TODO this is used by verify and it should not be part of the interface.
1292 1292 """
1293 1293
1294 1294 def files():
1295 1295 """Obtain paths that are backing storage for this manifest.
1296 1296
1297 1297 TODO this is used by verify and there should probably be a better API
1298 1298 for this functionality.
1299 1299 """
1300 1300
1301 1301 def deltaparent(rev):
1302 1302 """Obtain the revision that a revision is delta'd against.
1303 1303
1304 1304 TODO delta encoding is an implementation detail of storage and should
1305 1305 not be exposed to the storage interface.
1306 1306 """
1307 1307
1308 1308 def clone(tr, dest, **kwargs):
1309 1309 """Clone this instance to another."""
1310 1310
1311 1311 def clearcaches(clear_persisted_data=False):
1312 1312 """Clear any caches associated with this instance."""
1313 1313
1314 1314 def dirlog(d):
1315 1315 """Obtain a manifest storage instance for a tree."""
1316 1316
1317 1317 def add(
1318 1318 m, transaction, link, p1, p2, added, removed, readtree=None, match=None
1319 1319 ):
1320 1320 """Add a revision to storage.
1321 1321
1322 1322 ``m`` is an object conforming to ``imanifestdict``.
1323 1323
1324 1324 ``link`` is the linkrev revision number.
1325 1325
1326 1326 ``p1`` and ``p2`` are the parent revision numbers.
1327 1327
1328 1328 ``added`` and ``removed`` are iterables of added and removed paths,
1329 1329 respectively.
1330 1330
1331 1331 ``readtree`` is a function that can be used to read the child tree(s)
1332 1332 when recursively writing the full tree structure when using
1333 1333 treemanifets.
1334 1334
1335 1335 ``match`` is a matcher that can be used to hint to storage that not all
1336 1336 paths must be inspected; this is an optimization and can be safely
1337 1337 ignored. Note that the storage must still be able to reproduce a full
1338 1338 manifest including files that did not match.
1339 1339 """
1340 1340
1341 1341 def storageinfo(
1342 1342 exclusivefiles=False,
1343 1343 sharedfiles=False,
1344 1344 revisionscount=False,
1345 1345 trackedsize=False,
1346 1346 storedsize=False,
1347 1347 ):
1348 1348 """Obtain information about storage for this manifest's data.
1349 1349
1350 1350 See ``ifilestorage.storageinfo()`` for a description of this method.
1351 1351 This one behaves the same way, except for manifest data.
1352 1352 """
1353 1353
1354 1354
1355 1355 class imanifestlog(interfaceutil.Interface):
1356 1356 """Interface representing a collection of manifest snapshots.
1357 1357
1358 1358 Represents the root manifest in a repository.
1359 1359
1360 1360 Also serves as a means to access nested tree manifests and to cache
1361 1361 tree manifests.
1362 1362 """
1363 1363
1364 1364 def __getitem__(node):
1365 1365 """Obtain a manifest instance for a given binary node.
1366 1366
1367 1367 Equivalent to calling ``self.get('', node)``.
1368 1368
1369 1369 The returned object conforms to the ``imanifestrevisionstored``
1370 1370 interface.
1371 1371 """
1372 1372
1373 1373 def get(tree, node, verify=True):
1374 1374 """Retrieve the manifest instance for a given directory and binary node.
1375 1375
1376 1376 ``node`` always refers to the node of the root manifest (which will be
1377 1377 the only manifest if flat manifests are being used).
1378 1378
1379 1379 If ``tree`` is the empty string, the root manifest is returned.
1380 1380 Otherwise the manifest for the specified directory will be returned
1381 1381 (requires tree manifests).
1382 1382
1383 1383 If ``verify`` is True, ``LookupError`` is raised if the node is not
1384 1384 known.
1385 1385
1386 1386 The returned object conforms to the ``imanifestrevisionstored``
1387 1387 interface.
1388 1388 """
1389 1389
1390 1390 def getstorage(tree):
1391 1391 """Retrieve an interface to storage for a particular tree.
1392 1392
1393 1393 If ``tree`` is the empty bytestring, storage for the root manifest will
1394 1394 be returned. Otherwise storage for a tree manifest is returned.
1395 1395
1396 1396 TODO formalize interface for returned object.
1397 1397 """
1398 1398
1399 1399 def clearcaches():
1400 1400 """Clear caches associated with this collection."""
1401 1401
1402 1402 def rev(node):
1403 1403 """Obtain the revision number for a binary node.
1404 1404
1405 1405 Raises ``error.LookupError`` if the node is not known.
1406 1406 """
1407 1407
1408 1408 def update_caches(transaction):
1409 1409 """update whatever cache are relevant for the used storage."""
1410 1410
1411 1411
1412 1412 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1413 1413 """Local repository sub-interface providing access to tracked file storage.
1414 1414
1415 1415 This interface defines how a repository accesses storage for a single
1416 1416 tracked file path.
1417 1417 """
1418 1418
1419 1419 def file(f):
1420 1420 """Obtain a filelog for a tracked path.
1421 1421
1422 1422 The returned type conforms to the ``ifilestorage`` interface.
1423 1423 """
1424 1424
1425 1425
1426 1426 class ilocalrepositorymain(interfaceutil.Interface):
1427 1427 """Main interface for local repositories.
1428 1428
1429 1429 This currently captures the reality of things - not how things should be.
1430 1430 """
1431 1431
1432 1432 supportedformats = interfaceutil.Attribute(
1433 1433 """Set of requirements that apply to stream clone.
1434 1434
1435 1435 This is actually a class attribute and is shared among all instances.
1436 1436 """
1437 1437 )
1438 1438
1439 1439 supported = interfaceutil.Attribute(
1440 1440 """Set of requirements that this repo is capable of opening."""
1441 1441 )
1442 1442
1443 1443 requirements = interfaceutil.Attribute(
1444 1444 """Set of requirements this repo uses."""
1445 1445 )
1446 1446
1447 1447 features = interfaceutil.Attribute(
1448 1448 """Set of "features" this repository supports.
1449 1449
1450 1450 A "feature" is a loosely-defined term. It can refer to a feature
1451 1451 in the classical sense or can describe an implementation detail
1452 1452 of the repository. For example, a ``readonly`` feature may denote
1453 1453 the repository as read-only. Or a ``revlogfilestore`` feature may
1454 1454 denote that the repository is using revlogs for file storage.
1455 1455
1456 1456 The intent of features is to provide a machine-queryable mechanism
1457 1457 for repo consumers to test for various repository characteristics.
1458 1458
1459 1459 Features are similar to ``requirements``. The main difference is that
1460 1460 requirements are stored on-disk and represent requirements to open the
1461 1461 repository. Features are more run-time capabilities of the repository
1462 1462 and more granular capabilities (which may be derived from requirements).
1463 1463 """
1464 1464 )
1465 1465
1466 1466 filtername = interfaceutil.Attribute(
1467 1467 """Name of the repoview that is active on this repo."""
1468 1468 )
1469 1469
1470 1470 wvfs = interfaceutil.Attribute(
1471 1471 """VFS used to access the working directory."""
1472 1472 )
1473 1473
1474 1474 vfs = interfaceutil.Attribute(
1475 1475 """VFS rooted at the .hg directory.
1476 1476
1477 1477 Used to access repository data not in the store.
1478 1478 """
1479 1479 )
1480 1480
1481 1481 svfs = interfaceutil.Attribute(
1482 1482 """VFS rooted at the store.
1483 1483
1484 1484 Used to access repository data in the store. Typically .hg/store.
1485 1485 But can point elsewhere if the store is shared.
1486 1486 """
1487 1487 )
1488 1488
1489 1489 root = interfaceutil.Attribute(
1490 1490 """Path to the root of the working directory."""
1491 1491 )
1492 1492
1493 1493 path = interfaceutil.Attribute("""Path to the .hg directory.""")
1494 1494
1495 1495 origroot = interfaceutil.Attribute(
1496 1496 """The filesystem path that was used to construct the repo."""
1497 1497 )
1498 1498
1499 1499 auditor = interfaceutil.Attribute(
1500 1500 """A pathauditor for the working directory.
1501 1501
1502 1502 This checks if a path refers to a nested repository.
1503 1503
1504 1504 Operates on the filesystem.
1505 1505 """
1506 1506 )
1507 1507
1508 1508 nofsauditor = interfaceutil.Attribute(
1509 1509 """A pathauditor for the working directory.
1510 1510
1511 1511 This is like ``auditor`` except it doesn't do filesystem checks.
1512 1512 """
1513 1513 )
1514 1514
1515 1515 baseui = interfaceutil.Attribute(
1516 1516 """Original ui instance passed into constructor."""
1517 1517 )
1518 1518
1519 1519 ui = interfaceutil.Attribute("""Main ui instance for this instance.""")
1520 1520
1521 1521 sharedpath = interfaceutil.Attribute(
1522 1522 """Path to the .hg directory of the repo this repo was shared from."""
1523 1523 )
1524 1524
1525 1525 store = interfaceutil.Attribute("""A store instance.""")
1526 1526
1527 1527 spath = interfaceutil.Attribute("""Path to the store.""")
1528 1528
1529 1529 sjoin = interfaceutil.Attribute("""Alias to self.store.join.""")
1530 1530
1531 1531 cachevfs = interfaceutil.Attribute(
1532 1532 """A VFS used to access the cache directory.
1533 1533
1534 1534 Typically .hg/cache.
1535 1535 """
1536 1536 )
1537 1537
1538 1538 wcachevfs = interfaceutil.Attribute(
1539 1539 """A VFS used to access the cache directory dedicated to working copy
1540 1540
1541 1541 Typically .hg/wcache.
1542 1542 """
1543 1543 )
1544 1544
1545 1545 filteredrevcache = interfaceutil.Attribute(
1546 1546 """Holds sets of revisions to be filtered."""
1547 1547 )
1548 1548
1549 1549 names = interfaceutil.Attribute("""A ``namespaces`` instance.""")
1550 1550
1551 1551 filecopiesmode = interfaceutil.Attribute(
1552 1552 """The way files copies should be dealt with in this repo."""
1553 1553 )
1554 1554
1555 1555 def close():
1556 1556 """Close the handle on this repository."""
1557 1557
1558 1558 def peer():
1559 1559 """Obtain an object conforming to the ``peer`` interface."""
1560 1560
1561 1561 def unfiltered():
1562 1562 """Obtain an unfiltered/raw view of this repo."""
1563 1563
1564 1564 def filtered(name, visibilityexceptions=None):
1565 1565 """Obtain a named view of this repository."""
1566 1566
1567 1567 obsstore = interfaceutil.Attribute("""A store of obsolescence data.""")
1568 1568
1569 1569 changelog = interfaceutil.Attribute("""A handle on the changelog revlog.""")
1570 1570
1571 1571 manifestlog = interfaceutil.Attribute(
1572 1572 """An instance conforming to the ``imanifestlog`` interface.
1573 1573
1574 1574 Provides access to manifests for the repository.
1575 1575 """
1576 1576 )
1577 1577
1578 1578 dirstate = interfaceutil.Attribute("""Working directory state.""")
1579 1579
1580 1580 narrowpats = interfaceutil.Attribute(
1581 1581 """Matcher patterns for this repository's narrowspec."""
1582 1582 )
1583 1583
1584 1584 def narrowmatch(match=None, includeexact=False):
1585 1585 """Obtain a matcher for the narrowspec."""
1586 1586
1587 1587 def setnarrowpats(newincludes, newexcludes):
1588 1588 """Define the narrowspec for this repository."""
1589 1589
1590 1590 def __getitem__(changeid):
1591 1591 """Try to resolve a changectx."""
1592 1592
1593 1593 def __contains__(changeid):
1594 1594 """Whether a changeset exists."""
1595 1595
1596 1596 def __nonzero__():
1597 1597 """Always returns True."""
1598 1598 return True
1599 1599
1600 1600 __bool__ = __nonzero__
1601 1601
1602 1602 def __len__():
1603 1603 """Returns the number of changesets in the repo."""
1604 1604
1605 1605 def __iter__():
1606 1606 """Iterate over revisions in the changelog."""
1607 1607
1608 1608 def revs(expr, *args):
1609 1609 """Evaluate a revset.
1610 1610
1611 1611 Emits revisions.
1612 1612 """
1613 1613
1614 1614 def set(expr, *args):
1615 1615 """Evaluate a revset.
1616 1616
1617 1617 Emits changectx instances.
1618 1618 """
1619 1619
1620 1620 def anyrevs(specs, user=False, localalias=None):
1621 1621 """Find revisions matching one of the given revsets."""
1622 1622
1623 1623 def url():
1624 1624 """Returns a string representing the location of this repo."""
1625 1625
1626 1626 def hook(name, throw=False, **args):
1627 1627 """Call a hook."""
1628 1628
1629 1629 def tags():
1630 1630 """Return a mapping of tag to node."""
1631 1631
1632 1632 def tagtype(tagname):
1633 1633 """Return the type of a given tag."""
1634 1634
1635 1635 def tagslist():
1636 1636 """Return a list of tags ordered by revision."""
1637 1637
1638 1638 def nodetags(node):
1639 1639 """Return the tags associated with a node."""
1640 1640
1641 1641 def nodebookmarks(node):
1642 1642 """Return the list of bookmarks pointing to the specified node."""
1643 1643
1644 1644 def branchmap():
1645 1645 """Return a mapping of branch to heads in that branch."""
1646 1646
1647 1647 def revbranchcache():
1648 1648 pass
1649 1649
1650 1650 def register_changeset(rev, changelogrevision):
1651 1651 """Extension point for caches for new nodes.
1652 1652
1653 1653 Multiple consumers are expected to need parts of the changelogrevision,
1654 1654 so it is provided as optimization to avoid duplicate lookups. A simple
1655 1655 cache would be fragile when other revisions are accessed, too."""
1656 1656 pass
1657 1657
1658 1658 def branchtip(branchtip, ignoremissing=False):
1659 1659 """Return the tip node for a given branch."""
1660 1660
1661 1661 def lookup(key):
1662 1662 """Resolve the node for a revision."""
1663 1663
1664 1664 def lookupbranch(key):
1665 1665 """Look up the branch name of the given revision or branch name."""
1666 1666
1667 1667 def known(nodes):
1668 1668 """Determine whether a series of nodes is known.
1669 1669
1670 1670 Returns a list of bools.
1671 1671 """
1672 1672
1673 1673 def local():
1674 1674 """Whether the repository is local."""
1675 1675 return True
1676 1676
1677 1677 def publishing():
1678 1678 """Whether the repository is a publishing repository."""
1679 1679
1680 1680 def cancopy():
1681 1681 pass
1682 1682
1683 1683 def shared():
1684 1684 """The type of shared repository or None."""
1685 1685
1686 1686 def wjoin(f, *insidef):
1687 1687 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1688 1688
1689 1689 def setparents(p1, p2):
1690 1690 """Set the parent nodes of the working directory."""
1691 1691
1692 1692 def filectx(path, changeid=None, fileid=None):
1693 1693 """Obtain a filectx for the given file revision."""
1694 1694
1695 1695 def getcwd():
1696 1696 """Obtain the current working directory from the dirstate."""
1697 1697
1698 1698 def pathto(f, cwd=None):
1699 1699 """Obtain the relative path to a file."""
1700 1700
1701 1701 def adddatafilter(name, fltr):
1702 1702 pass
1703 1703
1704 1704 def wread(filename):
1705 1705 """Read a file from wvfs, using data filters."""
1706 1706
1707 1707 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1708 1708 """Write data to a file in the wvfs, using data filters."""
1709 1709
1710 1710 def wwritedata(filename, data):
1711 1711 """Resolve data for writing to the wvfs, using data filters."""
1712 1712
1713 1713 def currenttransaction():
1714 1714 """Obtain the current transaction instance or None."""
1715 1715
1716 1716 def transaction(desc, report=None):
1717 1717 """Open a new transaction to write to the repository."""
1718 1718
1719 1719 def undofiles():
1720 1720 """Returns a list of (vfs, path) for files to undo transactions."""
1721 1721
1722 1722 def recover():
1723 1723 """Roll back an interrupted transaction."""
1724 1724
1725 1725 def rollback(dryrun=False, force=False):
1726 1726 """Undo the last transaction.
1727 1727
1728 1728 DANGEROUS.
1729 1729 """
1730 1730
1731 1731 def updatecaches(tr=None, full=False):
1732 1732 """Warm repo caches."""
1733 1733
1734 1734 def invalidatecaches():
1735 1735 """Invalidate cached data due to the repository mutating."""
1736 1736
1737 1737 def invalidatevolatilesets():
1738 1738 pass
1739 1739
1740 1740 def invalidatedirstate():
1741 1741 """Invalidate the dirstate."""
1742 1742
1743 1743 def invalidate(clearfilecache=False):
1744 1744 pass
1745 1745
1746 1746 def invalidateall():
1747 1747 pass
1748 1748
1749 1749 def lock(wait=True):
1750 1750 """Lock the repository store and return a lock instance."""
1751 1751
1752 1752 def wlock(wait=True):
1753 1753 """Lock the non-store parts of the repository."""
1754 1754
1755 1755 def currentwlock():
1756 1756 """Return the wlock if it's held or None."""
1757 1757
1758 1758 def checkcommitpatterns(wctx, match, status, fail):
1759 1759 pass
1760 1760
1761 1761 def commit(
1762 1762 text=b'',
1763 1763 user=None,
1764 1764 date=None,
1765 1765 match=None,
1766 1766 force=False,
1767 1767 editor=False,
1768 1768 extra=None,
1769 1769 ):
1770 1770 """Add a new revision to the repository."""
1771 1771
1772 1772 def commitctx(ctx, error=False, origctx=None):
1773 1773 """Commit a commitctx instance to the repository."""
1774 1774
1775 1775 def destroying():
1776 1776 """Inform the repository that nodes are about to be destroyed."""
1777 1777
1778 1778 def destroyed():
1779 1779 """Inform the repository that nodes have been destroyed."""
1780 1780
1781 1781 def status(
1782 1782 node1=b'.',
1783 1783 node2=None,
1784 1784 match=None,
1785 1785 ignored=False,
1786 1786 clean=False,
1787 1787 unknown=False,
1788 1788 listsubrepos=False,
1789 1789 ):
1790 1790 """Convenience method to call repo[x].status()."""
1791 1791
1792 1792 def addpostdsstatus(ps):
1793 1793 pass
1794 1794
1795 1795 def postdsstatus():
1796 1796 pass
1797 1797
1798 1798 def clearpostdsstatus():
1799 1799 pass
1800 1800
1801 1801 def heads(start=None):
1802 1802 """Obtain list of nodes that are DAG heads."""
1803 1803
1804 1804 def branchheads(branch=None, start=None, closed=False):
1805 1805 pass
1806 1806
1807 1807 def branches(nodes):
1808 1808 pass
1809 1809
1810 1810 def between(pairs):
1811 1811 pass
1812 1812
1813 1813 def checkpush(pushop):
1814 1814 pass
1815 1815
1816 1816 prepushoutgoinghooks = interfaceutil.Attribute("""util.hooks instance.""")
1817 1817
1818 1818 def pushkey(namespace, key, old, new):
1819 1819 pass
1820 1820
1821 1821 def listkeys(namespace):
1822 1822 pass
1823 1823
1824 1824 def debugwireargs(one, two, three=None, four=None, five=None):
1825 1825 pass
1826 1826
1827 1827 def savecommitmessage(text):
1828 1828 pass
1829 1829
1830 1830
1831 1831 class completelocalrepository(
1832 1832 ilocalrepositorymain, ilocalrepositoryfilestorage
1833 1833 ):
1834 1834 """Complete interface for a local repository."""
1835 1835
1836 1836
1837 1837 class iwireprotocolcommandcacher(interfaceutil.Interface):
1838 1838 """Represents a caching backend for wire protocol commands.
1839 1839
1840 1840 Wire protocol version 2 supports transparent caching of many commands.
1841 1841 To leverage this caching, servers can activate objects that cache
1842 1842 command responses. Objects handle both cache writing and reading.
1843 1843 This interface defines how that response caching mechanism works.
1844 1844
1845 1845 Wire protocol version 2 commands emit a series of objects that are
1846 1846 serialized and sent to the client. The caching layer exists between
1847 1847 the invocation of the command function and the sending of its output
1848 1848 objects to an output layer.
1849 1849
1850 1850 Instances of this interface represent a binding to a cache that
1851 1851 can serve a response (in place of calling a command function) and/or
1852 1852 write responses to a cache for subsequent use.
1853 1853
1854 1854 When a command request arrives, the following happens with regards
1855 1855 to this interface:
1856 1856
1857 1857 1. The server determines whether the command request is cacheable.
1858 1858 2. If it is, an instance of this interface is spawned.
1859 1859 3. The cacher is activated in a context manager (``__enter__`` is called).
1860 1860 4. A cache *key* for that request is derived. This will call the
1861 1861 instance's ``adjustcachekeystate()`` method so the derivation
1862 1862 can be influenced.
1863 1863 5. The cacher is informed of the derived cache key via a call to
1864 1864 ``setcachekey()``.
1865 1865 6. The cacher's ``lookup()`` method is called to test for presence of
1866 1866 the derived key in the cache.
1867 1867 7. If ``lookup()`` returns a hit, that cached result is used in place
1868 1868 of invoking the command function. ``__exit__`` is called and the instance
1869 1869 is discarded.
1870 1870 8. The command function is invoked.
1871 1871 9. ``onobject()`` is called for each object emitted by the command
1872 1872 function.
1873 1873 10. After the final object is seen, ``onfinished()`` is called.
1874 1874 11. ``__exit__`` is called to signal the end of use of the instance.
1875 1875
1876 1876 Cache *key* derivation can be influenced by the instance.
1877 1877
1878 1878 Cache keys are initially derived by a deterministic representation of
1879 1879 the command request. This includes the command name, arguments, protocol
1880 1880 version, etc. This initial key derivation is performed by CBOR-encoding a
1881 1881 data structure and feeding that output into a hasher.
1882 1882
1883 1883 Instances of this interface can influence this initial key derivation
1884 1884 via ``adjustcachekeystate()``.
1885 1885
1886 1886 The instance is informed of the derived cache key via a call to
1887 1887 ``setcachekey()``. The instance must store the key locally so it can
1888 1888 be consulted on subsequent operations that may require it.
1889 1889
1890 1890 When constructed, the instance has access to a callable that can be used
1891 1891 for encoding response objects. This callable receives as its single
1892 1892 argument an object emitted by a command function. It returns an iterable
1893 1893 of bytes chunks representing the encoded object. Unless the cacher is
1894 1894 caching native Python objects in memory or has a way of reconstructing
1895 1895 the original Python objects, implementations typically call this function
1896 1896 to produce bytes from the output objects and then store those bytes in
1897 1897 the cache. When it comes time to re-emit those bytes, they are wrapped
1898 1898 in a ``wireprototypes.encodedresponse`` instance to tell the output
1899 1899 layer that they are pre-encoded.
1900 1900
1901 1901 When receiving the objects emitted by the command function, instances
1902 1902 can choose what to do with those objects. The simplest thing to do is
1903 1903 re-emit the original objects. They will be forwarded to the output
1904 1904 layer and will be processed as if the cacher did not exist.
1905 1905
1906 1906 Implementations could also choose to not emit objects - instead locally
1907 1907 buffering objects or their encoded representation. They could then emit
1908 1908 a single "coalesced" object when ``onfinished()`` is called. In
1909 1909 this way, the implementation would function as a filtering layer of
1910 1910 sorts.
1911 1911
1912 1912 When caching objects, typically the encoded form of the object will
1913 1913 be stored. Keep in mind that if the original object is forwarded to
1914 1914 the output layer, it will need to be encoded there as well. For large
1915 1915 output, this redundant encoding could add overhead. Implementations
1916 1916 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1917 1917 instances to avoid this overhead.
1918 1918 """
1919 1919
1920 1920 def __enter__():
1921 1921 """Marks the instance as active.
1922 1922
1923 1923 Should return self.
1924 1924 """
1925 1925
1926 1926 def __exit__(exctype, excvalue, exctb):
1927 1927 """Called when cacher is no longer used.
1928 1928
1929 1929 This can be used by implementations to perform cleanup actions (e.g.
1930 1930 disconnecting network sockets, aborting a partially cached response.
1931 1931 """
1932 1932
1933 1933 def adjustcachekeystate(state):
1934 1934 """Influences cache key derivation by adjusting state to derive key.
1935 1935
1936 1936 A dict defining the state used to derive the cache key is passed.
1937 1937
1938 1938 Implementations can modify this dict to record additional state that
1939 1939 is wanted to influence key derivation.
1940 1940
1941 1941 Implementations are *highly* encouraged to not modify or delete
1942 1942 existing keys.
1943 1943 """
1944 1944
1945 1945 def setcachekey(key):
1946 1946 """Record the derived cache key for this request.
1947 1947
1948 1948 Instances may mutate the key for internal usage, as desired. e.g.
1949 1949 instances may wish to prepend the repo name, introduce path
1950 1950 components for filesystem or URL addressing, etc. Behavior is up to
1951 1951 the cache.
1952 1952
1953 1953 Returns a bool indicating if the request is cacheable by this
1954 1954 instance.
1955 1955 """
1956 1956
1957 1957 def lookup():
1958 1958 """Attempt to resolve an entry in the cache.
1959 1959
1960 1960 The instance is instructed to look for the cache key that it was
1961 1961 informed about via the call to ``setcachekey()``.
1962 1962
1963 1963 If there's no cache hit or the cacher doesn't wish to use the cached
1964 1964 entry, ``None`` should be returned.
1965 1965
1966 1966 Else, a dict defining the cached result should be returned. The
1967 1967 dict may have the following keys:
1968 1968
1969 1969 objs
1970 1970 An iterable of objects that should be sent to the client. That
1971 1971 iterable of objects is expected to be what the command function
1972 1972 would return if invoked or an equivalent representation thereof.
1973 1973 """
1974 1974
1975 1975 def onobject(obj):
1976 1976 """Called when a new object is emitted from the command function.
1977 1977
1978 1978 Receives as its argument the object that was emitted from the
1979 1979 command function.
1980 1980
1981 1981 This method returns an iterator of objects to forward to the output
1982 1982 layer. The easiest implementation is a generator that just
1983 1983 ``yield obj``.
1984 1984 """
1985 1985
1986 1986 def onfinished():
1987 1987 """Called after all objects have been emitted from the command function.
1988 1988
1989 1989 Implementations should return an iterator of objects to forward to
1990 1990 the output layer.
1991 1991
1992 1992 This method can be a generator.
1993 1993 """
@@ -1,2351 +1,2354 b''
1 1 # manifest.py - manifest revision class for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import heapq
11 11 import itertools
12 12 import struct
13 13 import weakref
14 14
15 15 from .i18n import _
16 16 from .node import (
17 17 bin,
18 18 hex,
19 19 nullid,
20 20 nullrev,
21 21 )
22 22 from .pycompat import getattr
23 23 from . import (
24 24 encoding,
25 25 error,
26 26 match as matchmod,
27 27 mdiff,
28 28 pathutil,
29 29 policy,
30 30 pycompat,
31 31 revlog,
32 32 util,
33 33 )
34 34 from .interfaces import (
35 35 repository,
36 36 util as interfaceutil,
37 37 )
38 38
39 39 parsers = policy.importmod('parsers')
40 40 propertycache = util.propertycache
41 41
42 42 # Allow tests to more easily test the alternate path in manifestdict.fastdelta()
43 43 FASTDELTA_TEXTDIFF_THRESHOLD = 1000
44 44
45 45
46 46 def _parse(data):
47 47 # This method does a little bit of excessive-looking
48 48 # precondition checking. This is so that the behavior of this
49 49 # class exactly matches its C counterpart to try and help
50 50 # prevent surprise breakage for anyone that develops against
51 51 # the pure version.
52 52 if data and data[-1:] != b'\n':
53 53 raise ValueError(b'Manifest did not end in a newline.')
54 54 prev = None
55 55 for l in data.splitlines():
56 56 if prev is not None and prev > l:
57 57 raise ValueError(b'Manifest lines not in sorted order.')
58 58 prev = l
59 59 f, n = l.split(b'\0')
60 60 nl = len(n)
61 61 flags = n[-1:]
62 62 if flags in _manifestflags:
63 63 n = n[:-1]
64 64 nl -= 1
65 65 else:
66 66 flags = b''
67 67 if nl not in (40, 64):
68 68 raise ValueError(b'Invalid manifest line')
69 69
70 70 yield f, bin(n), flags
71 71
72 72
73 73 def _text(it):
74 74 files = []
75 75 lines = []
76 76 for f, n, fl in it:
77 77 files.append(f)
78 78 # if this is changed to support newlines in filenames,
79 79 # be sure to check the templates/ dir again (especially *-raw.tmpl)
80 80 lines.append(b"%s\0%s%s\n" % (f, hex(n), fl))
81 81
82 82 _checkforbidden(files)
83 83 return b''.join(lines)
84 84
85 85
86 86 class lazymanifestiter(object):
87 87 def __init__(self, lm):
88 88 self.pos = 0
89 89 self.lm = lm
90 90
91 91 def __iter__(self):
92 92 return self
93 93
94 94 def next(self):
95 95 try:
96 96 data, pos = self.lm._get(self.pos)
97 97 except IndexError:
98 98 raise StopIteration
99 99 if pos == -1:
100 100 self.pos += 1
101 101 return data[0]
102 102 self.pos += 1
103 103 zeropos = data.find(b'\x00', pos)
104 104 return data[pos:zeropos]
105 105
106 106 __next__ = next
107 107
108 108
109 109 class lazymanifestiterentries(object):
110 110 def __init__(self, lm):
111 111 self.lm = lm
112 112 self.pos = 0
113 113
114 114 def __iter__(self):
115 115 return self
116 116
117 117 def next(self):
118 118 try:
119 119 data, pos = self.lm._get(self.pos)
120 120 except IndexError:
121 121 raise StopIteration
122 122 if pos == -1:
123 123 self.pos += 1
124 124 return data
125 125 zeropos = data.find(b'\x00', pos)
126 126 nlpos = data.find(b'\n', pos)
127 127 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
128 128 raise error.StorageError(b'Invalid manifest line')
129 129 flags = data[nlpos - 1 : nlpos]
130 130 if flags in _manifestflags:
131 131 hlen = nlpos - zeropos - 2
132 132 else:
133 133 hlen = nlpos - zeropos - 1
134 134 flags = b''
135 135 if hlen not in (40, 64):
136 136 raise error.StorageError(b'Invalid manifest line')
137 137 hashval = unhexlify(
138 138 data, self.lm.extrainfo[self.pos], zeropos + 1, hlen
139 139 )
140 140 self.pos += 1
141 141 return (data[pos:zeropos], hashval, flags)
142 142
143 143 __next__ = next
144 144
145 145
146 146 def unhexlify(data, extra, pos, length):
147 147 s = bin(data[pos : pos + length])
148 148 if extra:
149 149 s += chr(extra & 0xFF)
150 150 return s
151 151
152 152
153 153 def _cmp(a, b):
154 154 return (a > b) - (a < b)
155 155
156 156
157 157 _manifestflags = {b'', b'l', b't', b'x'}
158 158
159 159
160 160 class _lazymanifest(object):
161 161 """A pure python manifest backed by a byte string. It is supplimented with
162 162 internal lists as it is modified, until it is compacted back to a pure byte
163 163 string.
164 164
165 165 ``data`` is the initial manifest data.
166 166
167 167 ``positions`` is a list of offsets, one per manifest entry. Positive
168 168 values are offsets into ``data``, negative values are offsets into the
169 169 ``extradata`` list. When an entry is removed, its entry is dropped from
170 170 ``positions``. The values are encoded such that when walking the list and
171 171 indexing into ``data`` or ``extradata`` as appropriate, the entries are
172 172 sorted by filename.
173 173
174 174 ``extradata`` is a list of (key, hash, flags) for entries that were added or
175 175 modified since the manifest was created or compacted.
176 176 """
177 177
178 178 def __init__(
179 179 self,
180 180 data,
181 181 positions=None,
182 182 extrainfo=None,
183 183 extradata=None,
184 184 hasremovals=False,
185 185 ):
186 186 if positions is None:
187 187 self.positions = self.findlines(data)
188 188 self.extrainfo = [0] * len(self.positions)
189 189 self.data = data
190 190 self.extradata = []
191 191 self.hasremovals = False
192 192 else:
193 193 self.positions = positions[:]
194 194 self.extrainfo = extrainfo[:]
195 195 self.extradata = extradata[:]
196 196 self.data = data
197 197 self.hasremovals = hasremovals
198 198
199 199 def findlines(self, data):
200 200 if not data:
201 201 return []
202 202 pos = data.find(b"\n")
203 203 if pos == -1 or data[-1:] != b'\n':
204 204 raise ValueError(b"Manifest did not end in a newline.")
205 205 positions = [0]
206 206 prev = data[: data.find(b'\x00')]
207 207 while pos < len(data) - 1 and pos != -1:
208 208 positions.append(pos + 1)
209 209 nexts = data[pos + 1 : data.find(b'\x00', pos + 1)]
210 210 if nexts < prev:
211 211 raise ValueError(b"Manifest lines not in sorted order.")
212 212 prev = nexts
213 213 pos = data.find(b"\n", pos + 1)
214 214 return positions
215 215
216 216 def _get(self, index):
217 217 # get the position encoded in pos:
218 218 # positive number is an index in 'data'
219 219 # negative number is in extrapieces
220 220 pos = self.positions[index]
221 221 if pos >= 0:
222 222 return self.data, pos
223 223 return self.extradata[-pos - 1], -1
224 224
225 225 def _getkey(self, pos):
226 226 if pos >= 0:
227 227 return self.data[pos : self.data.find(b'\x00', pos + 1)]
228 228 return self.extradata[-pos - 1][0]
229 229
230 230 def bsearch(self, key):
231 231 first = 0
232 232 last = len(self.positions) - 1
233 233
234 234 while first <= last:
235 235 midpoint = (first + last) // 2
236 236 nextpos = self.positions[midpoint]
237 237 candidate = self._getkey(nextpos)
238 238 r = _cmp(key, candidate)
239 239 if r == 0:
240 240 return midpoint
241 241 else:
242 242 if r < 0:
243 243 last = midpoint - 1
244 244 else:
245 245 first = midpoint + 1
246 246 return -1
247 247
248 248 def bsearch2(self, key):
249 249 # same as the above, but will always return the position
250 250 # done for performance reasons
251 251 first = 0
252 252 last = len(self.positions) - 1
253 253
254 254 while first <= last:
255 255 midpoint = (first + last) // 2
256 256 nextpos = self.positions[midpoint]
257 257 candidate = self._getkey(nextpos)
258 258 r = _cmp(key, candidate)
259 259 if r == 0:
260 260 return (midpoint, True)
261 261 else:
262 262 if r < 0:
263 263 last = midpoint - 1
264 264 else:
265 265 first = midpoint + 1
266 266 return (first, False)
267 267
268 268 def __contains__(self, key):
269 269 return self.bsearch(key) != -1
270 270
271 271 def __getitem__(self, key):
272 272 if not isinstance(key, bytes):
273 273 raise TypeError(b"getitem: manifest keys must be a bytes.")
274 274 needle = self.bsearch(key)
275 275 if needle == -1:
276 276 raise KeyError
277 277 data, pos = self._get(needle)
278 278 if pos == -1:
279 279 return (data[1], data[2])
280 280 zeropos = data.find(b'\x00', pos)
281 281 nlpos = data.find(b'\n', zeropos)
282 282 assert 0 <= needle <= len(self.positions)
283 283 assert len(self.extrainfo) == len(self.positions)
284 284 if zeropos == -1 or nlpos == -1 or nlpos < zeropos:
285 285 raise error.StorageError(b'Invalid manifest line')
286 286 hlen = nlpos - zeropos - 1
287 287 flags = data[nlpos - 1 : nlpos]
288 288 if flags in _manifestflags:
289 289 hlen -= 1
290 290 else:
291 291 flags = b''
292 292 if hlen not in (40, 64):
293 293 raise error.StorageError(b'Invalid manifest line')
294 294 hashval = unhexlify(data, self.extrainfo[needle], zeropos + 1, hlen)
295 295 return (hashval, flags)
296 296
297 297 def __delitem__(self, key):
298 298 needle, found = self.bsearch2(key)
299 299 if not found:
300 300 raise KeyError
301 301 cur = self.positions[needle]
302 302 self.positions = self.positions[:needle] + self.positions[needle + 1 :]
303 303 self.extrainfo = self.extrainfo[:needle] + self.extrainfo[needle + 1 :]
304 304 if cur >= 0:
305 305 # This does NOT unsort the list as far as the search functions are
306 306 # concerned, as they only examine lines mapped by self.positions.
307 307 self.data = self.data[:cur] + b'\x00' + self.data[cur + 1 :]
308 308 self.hasremovals = True
309 309
310 310 def __setitem__(self, key, value):
311 311 if not isinstance(key, bytes):
312 312 raise TypeError(b"setitem: manifest keys must be a byte string.")
313 313 if not isinstance(value, tuple) or len(value) != 2:
314 314 raise TypeError(
315 315 b"Manifest values must be a tuple of (node, flags)."
316 316 )
317 317 hashval = value[0]
318 318 if not isinstance(hashval, bytes) or len(hashval) not in (20, 32):
319 319 raise TypeError(b"node must be a 20-byte or 32-byte byte string")
320 320 flags = value[1]
321 321 if not isinstance(flags, bytes) or len(flags) > 1:
322 322 raise TypeError(b"flags must a 0 or 1 byte string, got %r", flags)
323 323 needle, found = self.bsearch2(key)
324 324 if found:
325 325 # put the item
326 326 pos = self.positions[needle]
327 327 if pos < 0:
328 328 self.extradata[-pos - 1] = (key, hashval, value[1])
329 329 else:
330 330 # just don't bother
331 331 self.extradata.append((key, hashval, value[1]))
332 332 self.positions[needle] = -len(self.extradata)
333 333 else:
334 334 # not found, put it in with extra positions
335 335 self.extradata.append((key, hashval, value[1]))
336 336 self.positions = (
337 337 self.positions[:needle]
338 338 + [-len(self.extradata)]
339 339 + self.positions[needle:]
340 340 )
341 341 self.extrainfo = (
342 342 self.extrainfo[:needle] + [0] + self.extrainfo[needle:]
343 343 )
344 344
345 345 def copy(self):
346 346 # XXX call _compact like in C?
347 347 return _lazymanifest(
348 348 self.data,
349 349 self.positions,
350 350 self.extrainfo,
351 351 self.extradata,
352 352 self.hasremovals,
353 353 )
354 354
355 355 def _compact(self):
356 356 # hopefully not called TOO often
357 357 if len(self.extradata) == 0 and not self.hasremovals:
358 358 return
359 359 l = []
360 360 i = 0
361 361 offset = 0
362 362 self.extrainfo = [0] * len(self.positions)
363 363 while i < len(self.positions):
364 364 if self.positions[i] >= 0:
365 365 cur = self.positions[i]
366 366 last_cut = cur
367 367
368 368 # Collect all contiguous entries in the buffer at the current
369 369 # offset, breaking out only for added/modified items held in
370 370 # extradata, or a deleted line prior to the next position.
371 371 while True:
372 372 self.positions[i] = offset
373 373 i += 1
374 374 if i == len(self.positions) or self.positions[i] < 0:
375 375 break
376 376
377 377 # A removed file has no positions[] entry, but does have an
378 378 # overwritten first byte. Break out and find the end of the
379 379 # current good entry/entries if there is a removed file
380 380 # before the next position.
381 381 if (
382 382 self.hasremovals
383 383 and self.data.find(b'\n\x00', cur, self.positions[i])
384 384 != -1
385 385 ):
386 386 break
387 387
388 388 offset += self.positions[i] - cur
389 389 cur = self.positions[i]
390 390 end_cut = self.data.find(b'\n', cur)
391 391 if end_cut != -1:
392 392 end_cut += 1
393 393 offset += end_cut - cur
394 394 l.append(self.data[last_cut:end_cut])
395 395 else:
396 396 while i < len(self.positions) and self.positions[i] < 0:
397 397 cur = self.positions[i]
398 398 t = self.extradata[-cur - 1]
399 399 l.append(self._pack(t))
400 400 self.positions[i] = offset
401 401 # Hashes are either 20 bytes (old sha1s) or 32
402 402 # bytes (new non-sha1).
403 403 hlen = 20
404 404 if len(t[1]) > 25:
405 405 hlen = 32
406 406 if len(t[1]) > hlen:
407 407 self.extrainfo[i] = ord(t[1][hlen + 1])
408 408 offset += len(l[-1])
409 409 i += 1
410 410 self.data = b''.join(l)
411 411 self.hasremovals = False
412 412 self.extradata = []
413 413
414 414 def _pack(self, d):
415 415 n = d[1]
416 416 assert len(n) in (20, 32)
417 417 return d[0] + b'\x00' + hex(n) + d[2] + b'\n'
418 418
419 419 def text(self):
420 420 self._compact()
421 421 return self.data
422 422
423 423 def diff(self, m2, clean=False):
424 424 '''Finds changes between the current manifest and m2.'''
425 425 # XXX think whether efficiency matters here
426 426 diff = {}
427 427
428 428 for fn, e1, flags in self.iterentries():
429 429 if fn not in m2:
430 430 diff[fn] = (e1, flags), (None, b'')
431 431 else:
432 432 e2 = m2[fn]
433 433 if (e1, flags) != e2:
434 434 diff[fn] = (e1, flags), e2
435 435 elif clean:
436 436 diff[fn] = None
437 437
438 438 for fn, e2, flags in m2.iterentries():
439 439 if fn not in self:
440 440 diff[fn] = (None, b''), (e2, flags)
441 441
442 442 return diff
443 443
444 444 def iterentries(self):
445 445 return lazymanifestiterentries(self)
446 446
447 447 def iterkeys(self):
448 448 return lazymanifestiter(self)
449 449
450 450 def __iter__(self):
451 451 return lazymanifestiter(self)
452 452
453 453 def __len__(self):
454 454 return len(self.positions)
455 455
456 456 def filtercopy(self, filterfn):
457 457 # XXX should be optimized
458 458 c = _lazymanifest(b'')
459 459 for f, n, fl in self.iterentries():
460 460 if filterfn(f):
461 461 c[f] = n, fl
462 462 return c
463 463
464 464
465 465 try:
466 466 _lazymanifest = parsers.lazymanifest
467 467 except AttributeError:
468 468 pass
469 469
470 470
471 471 @interfaceutil.implementer(repository.imanifestdict)
472 472 class manifestdict(object):
473 473 def __init__(self, data=b''):
474 474 self._lm = _lazymanifest(data)
475 475
476 476 def __getitem__(self, key):
477 477 return self._lm[key][0]
478 478
479 479 def find(self, key):
480 480 return self._lm[key]
481 481
482 482 def __len__(self):
483 483 return len(self._lm)
484 484
485 485 def __nonzero__(self):
486 486 # nonzero is covered by the __len__ function, but implementing it here
487 487 # makes it easier for extensions to override.
488 488 return len(self._lm) != 0
489 489
490 490 __bool__ = __nonzero__
491 491
492 492 def __setitem__(self, key, node):
493 493 self._lm[key] = node, self.flags(key)
494 494
495 495 def __contains__(self, key):
496 496 if key is None:
497 497 return False
498 498 return key in self._lm
499 499
500 500 def __delitem__(self, key):
501 501 del self._lm[key]
502 502
503 503 def __iter__(self):
504 504 return self._lm.__iter__()
505 505
506 506 def iterkeys(self):
507 507 return self._lm.iterkeys()
508 508
509 509 def keys(self):
510 510 return list(self.iterkeys())
511 511
512 512 def filesnotin(self, m2, match=None):
513 513 '''Set of files in this manifest that are not in the other'''
514 514 if match is not None:
515 515 match = matchmod.badmatch(match, lambda path, msg: None)
516 516 sm2 = set(m2.walk(match))
517 517 return {f for f in self.walk(match) if f not in sm2}
518 518 return {f for f in self if f not in m2}
519 519
520 520 @propertycache
521 521 def _dirs(self):
522 522 return pathutil.dirs(self)
523 523
524 524 def dirs(self):
525 525 return self._dirs
526 526
527 527 def hasdir(self, dir):
528 528 return dir in self._dirs
529 529
530 530 def _filesfastpath(self, match):
531 531 """Checks whether we can correctly and quickly iterate over matcher
532 532 files instead of over manifest files."""
533 533 files = match.files()
534 534 return len(files) < 100 and (
535 535 match.isexact()
536 536 or (match.prefix() and all(fn in self for fn in files))
537 537 )
538 538
539 539 def walk(self, match):
540 540 """Generates matching file names.
541 541
542 542 Equivalent to manifest.matches(match).iterkeys(), but without creating
543 543 an entirely new manifest.
544 544
545 545 It also reports nonexistent files by marking them bad with match.bad().
546 546 """
547 547 if match.always():
548 548 for f in iter(self):
549 549 yield f
550 550 return
551 551
552 552 fset = set(match.files())
553 553
554 554 # avoid the entire walk if we're only looking for specific files
555 555 if self._filesfastpath(match):
556 556 for fn in sorted(fset):
557 557 if fn in self:
558 558 yield fn
559 559 return
560 560
561 561 for fn in self:
562 562 if fn in fset:
563 563 # specified pattern is the exact name
564 564 fset.remove(fn)
565 565 if match(fn):
566 566 yield fn
567 567
568 568 # for dirstate.walk, files=[''] means "walk the whole tree".
569 569 # follow that here, too
570 570 fset.discard(b'')
571 571
572 572 for fn in sorted(fset):
573 573 if not self.hasdir(fn):
574 574 match.bad(fn, None)
575 575
576 576 def _matches(self, match):
577 577 '''generate a new manifest filtered by the match argument'''
578 578 if match.always():
579 579 return self.copy()
580 580
581 581 if self._filesfastpath(match):
582 582 m = manifestdict()
583 583 lm = self._lm
584 584 for fn in match.files():
585 585 if fn in lm:
586 586 m._lm[fn] = lm[fn]
587 587 return m
588 588
589 589 m = manifestdict()
590 590 m._lm = self._lm.filtercopy(match)
591 591 return m
592 592
593 593 def diff(self, m2, match=None, clean=False):
594 594 """Finds changes between the current manifest and m2.
595 595
596 596 Args:
597 597 m2: the manifest to which this manifest should be compared.
598 598 clean: if true, include files unchanged between these manifests
599 599 with a None value in the returned dictionary.
600 600
601 601 The result is returned as a dict with filename as key and
602 602 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
603 603 nodeid in the current/other manifest and fl1/fl2 is the flag
604 604 in the current/other manifest. Where the file does not exist,
605 605 the nodeid will be None and the flags will be the empty
606 606 string.
607 607 """
608 608 if match:
609 609 m1 = self._matches(match)
610 610 m2 = m2._matches(match)
611 611 return m1.diff(m2, clean=clean)
612 612 return self._lm.diff(m2._lm, clean)
613 613
614 614 def setflag(self, key, flag):
615 615 if flag not in _manifestflags:
616 616 raise TypeError(b"Invalid manifest flag set.")
617 617 self._lm[key] = self[key], flag
618 618
619 619 def get(self, key, default=None):
620 620 try:
621 621 return self._lm[key][0]
622 622 except KeyError:
623 623 return default
624 624
625 625 def flags(self, key):
626 626 try:
627 627 return self._lm[key][1]
628 628 except KeyError:
629 629 return b''
630 630
631 631 def copy(self):
632 632 c = manifestdict()
633 633 c._lm = self._lm.copy()
634 634 return c
635 635
636 636 def items(self):
637 637 return (x[:2] for x in self._lm.iterentries())
638 638
639 639 def iteritems(self):
640 640 return (x[:2] for x in self._lm.iterentries())
641 641
642 642 def iterentries(self):
643 643 return self._lm.iterentries()
644 644
645 645 def text(self):
646 646 # most likely uses native version
647 647 return self._lm.text()
648 648
649 649 def fastdelta(self, base, changes):
650 650 """Given a base manifest text as a bytearray and a list of changes
651 651 relative to that text, compute a delta that can be used by revlog.
652 652 """
653 653 delta = []
654 654 dstart = None
655 655 dend = None
656 656 dline = [b""]
657 657 start = 0
658 658 # zero copy representation of base as a buffer
659 659 addbuf = util.buffer(base)
660 660
661 661 changes = list(changes)
662 662 if len(changes) < FASTDELTA_TEXTDIFF_THRESHOLD:
663 663 # start with a readonly loop that finds the offset of
664 664 # each line and creates the deltas
665 665 for f, todelete in changes:
666 666 # bs will either be the index of the item or the insert point
667 667 start, end = _msearch(addbuf, f, start)
668 668 if not todelete:
669 669 h, fl = self._lm[f]
670 670 l = b"%s\0%s%s\n" % (f, hex(h), fl)
671 671 else:
672 672 if start == end:
673 673 # item we want to delete was not found, error out
674 674 raise AssertionError(
675 675 _(b"failed to remove %s from manifest") % f
676 676 )
677 677 l = b""
678 678 if dstart is not None and dstart <= start and dend >= start:
679 679 if dend < end:
680 680 dend = end
681 681 if l:
682 682 dline.append(l)
683 683 else:
684 684 if dstart is not None:
685 685 delta.append([dstart, dend, b"".join(dline)])
686 686 dstart = start
687 687 dend = end
688 688 dline = [l]
689 689
690 690 if dstart is not None:
691 691 delta.append([dstart, dend, b"".join(dline)])
692 692 # apply the delta to the base, and get a delta for addrevision
693 693 deltatext, arraytext = _addlistdelta(base, delta)
694 694 else:
695 695 # For large changes, it's much cheaper to just build the text and
696 696 # diff it.
697 697 arraytext = bytearray(self.text())
698 698 deltatext = mdiff.textdiff(
699 699 util.buffer(base), util.buffer(arraytext)
700 700 )
701 701
702 702 return arraytext, deltatext
703 703
704 704
705 705 def _msearch(m, s, lo=0, hi=None):
706 706 """return a tuple (start, end) that says where to find s within m.
707 707
708 708 If the string is found m[start:end] are the line containing
709 709 that string. If start == end the string was not found and
710 710 they indicate the proper sorted insertion point.
711 711
712 712 m should be a buffer, a memoryview or a byte string.
713 713 s is a byte string"""
714 714
715 715 def advance(i, c):
716 716 while i < lenm and m[i : i + 1] != c:
717 717 i += 1
718 718 return i
719 719
720 720 if not s:
721 721 return (lo, lo)
722 722 lenm = len(m)
723 723 if not hi:
724 724 hi = lenm
725 725 while lo < hi:
726 726 mid = (lo + hi) // 2
727 727 start = mid
728 728 while start > 0 and m[start - 1 : start] != b'\n':
729 729 start -= 1
730 730 end = advance(start, b'\0')
731 731 if bytes(m[start:end]) < s:
732 732 # we know that after the null there are 40 bytes of sha1
733 733 # this translates to the bisect lo = mid + 1
734 734 lo = advance(end + 40, b'\n') + 1
735 735 else:
736 736 # this translates to the bisect hi = mid
737 737 hi = start
738 738 end = advance(lo, b'\0')
739 739 found = m[lo:end]
740 740 if s == found:
741 741 # we know that after the null there are 40 bytes of sha1
742 742 end = advance(end + 40, b'\n')
743 743 return (lo, end + 1)
744 744 else:
745 745 return (lo, lo)
746 746
747 747
748 748 def _checkforbidden(l):
749 749 """Check filenames for illegal characters."""
750 750 for f in l:
751 751 if b'\n' in f or b'\r' in f:
752 752 raise error.StorageError(
753 753 _(b"'\\n' and '\\r' disallowed in filenames: %r")
754 754 % pycompat.bytestr(f)
755 755 )
756 756
757 757
758 758 # apply the changes collected during the bisect loop to our addlist
759 759 # return a delta suitable for addrevision
760 760 def _addlistdelta(addlist, x):
761 761 # for large addlist arrays, building a new array is cheaper
762 762 # than repeatedly modifying the existing one
763 763 currentposition = 0
764 764 newaddlist = bytearray()
765 765
766 766 for start, end, content in x:
767 767 newaddlist += addlist[currentposition:start]
768 768 if content:
769 769 newaddlist += bytearray(content)
770 770
771 771 currentposition = end
772 772
773 773 newaddlist += addlist[currentposition:]
774 774
775 775 deltatext = b"".join(
776 776 struct.pack(b">lll", start, end, len(content)) + content
777 777 for start, end, content in x
778 778 )
779 779 return deltatext, newaddlist
780 780
781 781
782 782 def _splittopdir(f):
783 783 if b'/' in f:
784 784 dir, subpath = f.split(b'/', 1)
785 785 return dir + b'/', subpath
786 786 else:
787 787 return b'', f
788 788
789 789
790 790 _noop = lambda s: None
791 791
792 792
793 793 @interfaceutil.implementer(repository.imanifestdict)
794 794 class treemanifest(object):
795 795 def __init__(self, dir=b'', text=b''):
796 796 self._dir = dir
797 797 self._node = nullid
798 798 self._loadfunc = _noop
799 799 self._copyfunc = _noop
800 800 self._dirty = False
801 801 self._dirs = {}
802 802 self._lazydirs = {}
803 803 # Using _lazymanifest here is a little slower than plain old dicts
804 804 self._files = {}
805 805 self._flags = {}
806 806 if text:
807 807
808 808 def readsubtree(subdir, subm):
809 809 raise AssertionError(
810 810 b'treemanifest constructor only accepts flat manifests'
811 811 )
812 812
813 813 self.parse(text, readsubtree)
814 814 self._dirty = True # Mark flat manifest dirty after parsing
815 815
816 816 def _subpath(self, path):
817 817 return self._dir + path
818 818
819 819 def _loadalllazy(self):
820 820 selfdirs = self._dirs
821 821 subpath = self._subpath
822 822 for d, (node, readsubtree, docopy) in pycompat.iteritems(
823 823 self._lazydirs
824 824 ):
825 825 if docopy:
826 826 selfdirs[d] = readsubtree(subpath(d), node).copy()
827 827 else:
828 828 selfdirs[d] = readsubtree(subpath(d), node)
829 829 self._lazydirs = {}
830 830
831 831 def _loadlazy(self, d):
832 832 v = self._lazydirs.get(d)
833 833 if v:
834 834 node, readsubtree, docopy = v
835 835 if docopy:
836 836 self._dirs[d] = readsubtree(self._subpath(d), node).copy()
837 837 else:
838 838 self._dirs[d] = readsubtree(self._subpath(d), node)
839 839 del self._lazydirs[d]
840 840
841 841 def _loadchildrensetlazy(self, visit):
842 842 if not visit:
843 843 return None
844 844 if visit == b'all' or visit == b'this':
845 845 self._loadalllazy()
846 846 return None
847 847
848 848 loadlazy = self._loadlazy
849 849 for k in visit:
850 850 loadlazy(k + b'/')
851 851 return visit
852 852
853 853 def _loaddifflazy(self, t1, t2):
854 854 """load items in t1 and t2 if they're needed for diffing.
855 855
856 856 The criteria currently is:
857 857 - if it's not present in _lazydirs in either t1 or t2, load it in the
858 858 other (it may already be loaded or it may not exist, doesn't matter)
859 859 - if it's present in _lazydirs in both, compare the nodeid; if it
860 860 differs, load it in both
861 861 """
862 862 toloadlazy = []
863 863 for d, v1 in pycompat.iteritems(t1._lazydirs):
864 864 v2 = t2._lazydirs.get(d)
865 865 if not v2 or v2[0] != v1[0]:
866 866 toloadlazy.append(d)
867 867 for d, v1 in pycompat.iteritems(t2._lazydirs):
868 868 if d not in t1._lazydirs:
869 869 toloadlazy.append(d)
870 870
871 871 for d in toloadlazy:
872 872 t1._loadlazy(d)
873 873 t2._loadlazy(d)
874 874
875 875 def __len__(self):
876 876 self._load()
877 877 size = len(self._files)
878 878 self._loadalllazy()
879 879 for m in self._dirs.values():
880 880 size += m.__len__()
881 881 return size
882 882
883 883 def __nonzero__(self):
884 884 # Faster than "__len() != 0" since it avoids loading sub-manifests
885 885 return not self._isempty()
886 886
887 887 __bool__ = __nonzero__
888 888
889 889 def _isempty(self):
890 890 self._load() # for consistency; already loaded by all callers
891 891 # See if we can skip loading everything.
892 892 if self._files or (
893 893 self._dirs and any(not m._isempty() for m in self._dirs.values())
894 894 ):
895 895 return False
896 896 self._loadalllazy()
897 897 return not self._dirs or all(m._isempty() for m in self._dirs.values())
898 898
899 899 @encoding.strmethod
900 900 def __repr__(self):
901 901 return (
902 902 b'<treemanifest dir=%s, node=%s, loaded=%r, dirty=%r at 0x%x>'
903 903 % (
904 904 self._dir,
905 905 hex(self._node),
906 906 bool(self._loadfunc is _noop),
907 907 self._dirty,
908 908 id(self),
909 909 )
910 910 )
911 911
912 912 def dir(self):
913 913 """The directory that this tree manifest represents, including a
914 914 trailing '/'. Empty string for the repo root directory."""
915 915 return self._dir
916 916
917 917 def node(self):
918 918 """This node of this instance. nullid for unsaved instances. Should
919 919 be updated when the instance is read or written from a revlog.
920 920 """
921 921 assert not self._dirty
922 922 return self._node
923 923
924 924 def setnode(self, node):
925 925 self._node = node
926 926 self._dirty = False
927 927
928 928 def iterentries(self):
929 929 self._load()
930 930 self._loadalllazy()
931 931 for p, n in sorted(
932 932 itertools.chain(self._dirs.items(), self._files.items())
933 933 ):
934 934 if p in self._files:
935 935 yield self._subpath(p), n, self._flags.get(p, b'')
936 936 else:
937 937 for x in n.iterentries():
938 938 yield x
939 939
940 940 def items(self):
941 941 self._load()
942 942 self._loadalllazy()
943 943 for p, n in sorted(
944 944 itertools.chain(self._dirs.items(), self._files.items())
945 945 ):
946 946 if p in self._files:
947 947 yield self._subpath(p), n
948 948 else:
949 949 for f, sn in pycompat.iteritems(n):
950 950 yield f, sn
951 951
952 952 iteritems = items
953 953
954 954 def iterkeys(self):
955 955 self._load()
956 956 self._loadalllazy()
957 957 for p in sorted(itertools.chain(self._dirs, self._files)):
958 958 if p in self._files:
959 959 yield self._subpath(p)
960 960 else:
961 961 for f in self._dirs[p]:
962 962 yield f
963 963
964 964 def keys(self):
965 965 return list(self.iterkeys())
966 966
967 967 def __iter__(self):
968 968 return self.iterkeys()
969 969
970 970 def __contains__(self, f):
971 971 if f is None:
972 972 return False
973 973 self._load()
974 974 dir, subpath = _splittopdir(f)
975 975 if dir:
976 976 self._loadlazy(dir)
977 977
978 978 if dir not in self._dirs:
979 979 return False
980 980
981 981 return self._dirs[dir].__contains__(subpath)
982 982 else:
983 983 return f in self._files
984 984
985 985 def get(self, f, default=None):
986 986 self._load()
987 987 dir, subpath = _splittopdir(f)
988 988 if dir:
989 989 self._loadlazy(dir)
990 990
991 991 if dir not in self._dirs:
992 992 return default
993 993 return self._dirs[dir].get(subpath, default)
994 994 else:
995 995 return self._files.get(f, default)
996 996
997 997 def __getitem__(self, f):
998 998 self._load()
999 999 dir, subpath = _splittopdir(f)
1000 1000 if dir:
1001 1001 self._loadlazy(dir)
1002 1002
1003 1003 return self._dirs[dir].__getitem__(subpath)
1004 1004 else:
1005 1005 return self._files[f]
1006 1006
1007 1007 def flags(self, f):
1008 1008 self._load()
1009 1009 dir, subpath = _splittopdir(f)
1010 1010 if dir:
1011 1011 self._loadlazy(dir)
1012 1012
1013 1013 if dir not in self._dirs:
1014 1014 return b''
1015 1015 return self._dirs[dir].flags(subpath)
1016 1016 else:
1017 1017 if f in self._lazydirs or f in self._dirs:
1018 1018 return b''
1019 1019 return self._flags.get(f, b'')
1020 1020
1021 1021 def find(self, f):
1022 1022 self._load()
1023 1023 dir, subpath = _splittopdir(f)
1024 1024 if dir:
1025 1025 self._loadlazy(dir)
1026 1026
1027 1027 return self._dirs[dir].find(subpath)
1028 1028 else:
1029 1029 return self._files[f], self._flags.get(f, b'')
1030 1030
1031 1031 def __delitem__(self, f):
1032 1032 self._load()
1033 1033 dir, subpath = _splittopdir(f)
1034 1034 if dir:
1035 1035 self._loadlazy(dir)
1036 1036
1037 1037 self._dirs[dir].__delitem__(subpath)
1038 1038 # If the directory is now empty, remove it
1039 1039 if self._dirs[dir]._isempty():
1040 1040 del self._dirs[dir]
1041 1041 else:
1042 1042 del self._files[f]
1043 1043 if f in self._flags:
1044 1044 del self._flags[f]
1045 1045 self._dirty = True
1046 1046
1047 1047 def __setitem__(self, f, n):
1048 1048 assert n is not None
1049 1049 self._load()
1050 1050 dir, subpath = _splittopdir(f)
1051 1051 if dir:
1052 1052 self._loadlazy(dir)
1053 1053 if dir not in self._dirs:
1054 1054 self._dirs[dir] = treemanifest(self._subpath(dir))
1055 1055 self._dirs[dir].__setitem__(subpath, n)
1056 1056 else:
1057 1057 # manifest nodes are either 20 bytes or 32 bytes,
1058 1058 # depending on the hash in use. Assert this as historically
1059 1059 # sometimes extra bytes were added.
1060 1060 assert len(n) in (20, 32)
1061 1061 self._files[f] = n
1062 1062 self._dirty = True
1063 1063
1064 1064 def _load(self):
1065 1065 if self._loadfunc is not _noop:
1066 1066 lf, self._loadfunc = self._loadfunc, _noop
1067 1067 lf(self)
1068 1068 elif self._copyfunc is not _noop:
1069 1069 cf, self._copyfunc = self._copyfunc, _noop
1070 1070 cf(self)
1071 1071
1072 1072 def setflag(self, f, flags):
1073 1073 """Set the flags (symlink, executable) for path f."""
1074 1074 if flags not in _manifestflags:
1075 1075 raise TypeError(b"Invalid manifest flag set.")
1076 1076 self._load()
1077 1077 dir, subpath = _splittopdir(f)
1078 1078 if dir:
1079 1079 self._loadlazy(dir)
1080 1080 if dir not in self._dirs:
1081 1081 self._dirs[dir] = treemanifest(self._subpath(dir))
1082 1082 self._dirs[dir].setflag(subpath, flags)
1083 1083 else:
1084 1084 self._flags[f] = flags
1085 1085 self._dirty = True
1086 1086
1087 1087 def copy(self):
1088 1088 copy = treemanifest(self._dir)
1089 1089 copy._node = self._node
1090 1090 copy._dirty = self._dirty
1091 1091 if self._copyfunc is _noop:
1092 1092
1093 1093 def _copyfunc(s):
1094 1094 self._load()
1095 1095 s._lazydirs = {
1096 1096 d: (n, r, True)
1097 1097 for d, (n, r, c) in pycompat.iteritems(self._lazydirs)
1098 1098 }
1099 1099 sdirs = s._dirs
1100 1100 for d, v in pycompat.iteritems(self._dirs):
1101 1101 sdirs[d] = v.copy()
1102 1102 s._files = dict.copy(self._files)
1103 1103 s._flags = dict.copy(self._flags)
1104 1104
1105 1105 if self._loadfunc is _noop:
1106 1106 _copyfunc(copy)
1107 1107 else:
1108 1108 copy._copyfunc = _copyfunc
1109 1109 else:
1110 1110 copy._copyfunc = self._copyfunc
1111 1111 return copy
1112 1112
1113 1113 def filesnotin(self, m2, match=None):
1114 1114 '''Set of files in this manifest that are not in the other'''
1115 1115 if match and not match.always():
1116 1116 m1 = self._matches(match)
1117 1117 m2 = m2._matches(match)
1118 1118 return m1.filesnotin(m2)
1119 1119
1120 1120 files = set()
1121 1121
1122 1122 def _filesnotin(t1, t2):
1123 1123 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1124 1124 return
1125 1125 t1._load()
1126 1126 t2._load()
1127 1127 self._loaddifflazy(t1, t2)
1128 1128 for d, m1 in pycompat.iteritems(t1._dirs):
1129 1129 if d in t2._dirs:
1130 1130 m2 = t2._dirs[d]
1131 1131 _filesnotin(m1, m2)
1132 1132 else:
1133 1133 files.update(m1.iterkeys())
1134 1134
1135 1135 for fn in t1._files:
1136 1136 if fn not in t2._files:
1137 1137 files.add(t1._subpath(fn))
1138 1138
1139 1139 _filesnotin(self, m2)
1140 1140 return files
1141 1141
1142 1142 @propertycache
1143 1143 def _alldirs(self):
1144 1144 return pathutil.dirs(self)
1145 1145
1146 1146 def dirs(self):
1147 1147 return self._alldirs
1148 1148
1149 1149 def hasdir(self, dir):
1150 1150 self._load()
1151 1151 topdir, subdir = _splittopdir(dir)
1152 1152 if topdir:
1153 1153 self._loadlazy(topdir)
1154 1154 if topdir in self._dirs:
1155 1155 return self._dirs[topdir].hasdir(subdir)
1156 1156 return False
1157 1157 dirslash = dir + b'/'
1158 1158 return dirslash in self._dirs or dirslash in self._lazydirs
1159 1159
1160 1160 def walk(self, match):
1161 1161 """Generates matching file names.
1162 1162
1163 1163 It also reports nonexistent files by marking them bad with match.bad().
1164 1164 """
1165 1165 if match.always():
1166 1166 for f in iter(self):
1167 1167 yield f
1168 1168 return
1169 1169
1170 1170 fset = set(match.files())
1171 1171
1172 1172 for fn in self._walk(match):
1173 1173 if fn in fset:
1174 1174 # specified pattern is the exact name
1175 1175 fset.remove(fn)
1176 1176 yield fn
1177 1177
1178 1178 # for dirstate.walk, files=[''] means "walk the whole tree".
1179 1179 # follow that here, too
1180 1180 fset.discard(b'')
1181 1181
1182 1182 for fn in sorted(fset):
1183 1183 if not self.hasdir(fn):
1184 1184 match.bad(fn, None)
1185 1185
1186 1186 def _walk(self, match):
1187 1187 '''Recursively generates matching file names for walk().'''
1188 1188 visit = match.visitchildrenset(self._dir[:-1])
1189 1189 if not visit:
1190 1190 return
1191 1191
1192 1192 # yield this dir's files and walk its submanifests
1193 1193 self._load()
1194 1194 visit = self._loadchildrensetlazy(visit)
1195 1195 for p in sorted(list(self._dirs) + list(self._files)):
1196 1196 if p in self._files:
1197 1197 fullp = self._subpath(p)
1198 1198 if match(fullp):
1199 1199 yield fullp
1200 1200 else:
1201 1201 if not visit or p[:-1] in visit:
1202 1202 for f in self._dirs[p]._walk(match):
1203 1203 yield f
1204 1204
1205 1205 def _matches(self, match):
1206 1206 """recursively generate a new manifest filtered by the match argument."""
1207 1207 if match.always():
1208 1208 return self.copy()
1209 1209 return self._matches_inner(match)
1210 1210
1211 1211 def _matches_inner(self, match):
1212 1212 if match.always():
1213 1213 return self.copy()
1214 1214
1215 1215 visit = match.visitchildrenset(self._dir[:-1])
1216 1216 if visit == b'all':
1217 1217 return self.copy()
1218 1218 ret = treemanifest(self._dir)
1219 1219 if not visit:
1220 1220 return ret
1221 1221
1222 1222 self._load()
1223 1223 for fn in self._files:
1224 1224 # While visitchildrenset *usually* lists only subdirs, this is
1225 1225 # actually up to the matcher and may have some files in the set().
1226 1226 # If visit == 'this', we should obviously look at the files in this
1227 1227 # directory; if visit is a set, and fn is in it, we should inspect
1228 1228 # fn (but no need to inspect things not in the set).
1229 1229 if visit != b'this' and fn not in visit:
1230 1230 continue
1231 1231 fullp = self._subpath(fn)
1232 1232 # visitchildrenset isn't perfect, we still need to call the regular
1233 1233 # matcher code to further filter results.
1234 1234 if not match(fullp):
1235 1235 continue
1236 1236 ret._files[fn] = self._files[fn]
1237 1237 if fn in self._flags:
1238 1238 ret._flags[fn] = self._flags[fn]
1239 1239
1240 1240 visit = self._loadchildrensetlazy(visit)
1241 1241 for dir, subm in pycompat.iteritems(self._dirs):
1242 1242 if visit and dir[:-1] not in visit:
1243 1243 continue
1244 1244 m = subm._matches_inner(match)
1245 1245 if not m._isempty():
1246 1246 ret._dirs[dir] = m
1247 1247
1248 1248 if not ret._isempty():
1249 1249 ret._dirty = True
1250 1250 return ret
1251 1251
1252 1252 def fastdelta(self, base, changes):
1253 1253 raise FastdeltaUnavailable()
1254 1254
1255 1255 def diff(self, m2, match=None, clean=False):
1256 1256 """Finds changes between the current manifest and m2.
1257 1257
1258 1258 Args:
1259 1259 m2: the manifest to which this manifest should be compared.
1260 1260 clean: if true, include files unchanged between these manifests
1261 1261 with a None value in the returned dictionary.
1262 1262
1263 1263 The result is returned as a dict with filename as key and
1264 1264 values of the form ((n1,fl1),(n2,fl2)), where n1/n2 is the
1265 1265 nodeid in the current/other manifest and fl1/fl2 is the flag
1266 1266 in the current/other manifest. Where the file does not exist,
1267 1267 the nodeid will be None and the flags will be the empty
1268 1268 string.
1269 1269 """
1270 1270 if match and not match.always():
1271 1271 m1 = self._matches(match)
1272 1272 m2 = m2._matches(match)
1273 1273 return m1.diff(m2, clean=clean)
1274 1274 result = {}
1275 1275 emptytree = treemanifest()
1276 1276
1277 1277 def _iterativediff(t1, t2, stack):
1278 1278 """compares two tree manifests and append new tree-manifests which
1279 1279 needs to be compared to stack"""
1280 1280 if t1._node == t2._node and not t1._dirty and not t2._dirty:
1281 1281 return
1282 1282 t1._load()
1283 1283 t2._load()
1284 1284 self._loaddifflazy(t1, t2)
1285 1285
1286 1286 for d, m1 in pycompat.iteritems(t1._dirs):
1287 1287 m2 = t2._dirs.get(d, emptytree)
1288 1288 stack.append((m1, m2))
1289 1289
1290 1290 for d, m2 in pycompat.iteritems(t2._dirs):
1291 1291 if d not in t1._dirs:
1292 1292 stack.append((emptytree, m2))
1293 1293
1294 1294 for fn, n1 in pycompat.iteritems(t1._files):
1295 1295 fl1 = t1._flags.get(fn, b'')
1296 1296 n2 = t2._files.get(fn, None)
1297 1297 fl2 = t2._flags.get(fn, b'')
1298 1298 if n1 != n2 or fl1 != fl2:
1299 1299 result[t1._subpath(fn)] = ((n1, fl1), (n2, fl2))
1300 1300 elif clean:
1301 1301 result[t1._subpath(fn)] = None
1302 1302
1303 1303 for fn, n2 in pycompat.iteritems(t2._files):
1304 1304 if fn not in t1._files:
1305 1305 fl2 = t2._flags.get(fn, b'')
1306 1306 result[t2._subpath(fn)] = ((None, b''), (n2, fl2))
1307 1307
1308 1308 stackls = []
1309 1309 _iterativediff(self, m2, stackls)
1310 1310 while stackls:
1311 1311 t1, t2 = stackls.pop()
1312 1312 # stackls is populated in the function call
1313 1313 _iterativediff(t1, t2, stackls)
1314 1314 return result
1315 1315
1316 1316 def unmodifiedsince(self, m2):
1317 1317 return not self._dirty and not m2._dirty and self._node == m2._node
1318 1318
1319 1319 def parse(self, text, readsubtree):
1320 1320 selflazy = self._lazydirs
1321 1321 for f, n, fl in _parse(text):
1322 1322 if fl == b't':
1323 1323 f = f + b'/'
1324 1324 # False below means "doesn't need to be copied" and can use the
1325 1325 # cached value from readsubtree directly.
1326 1326 selflazy[f] = (n, readsubtree, False)
1327 1327 elif b'/' in f:
1328 1328 # This is a flat manifest, so use __setitem__ and setflag rather
1329 1329 # than assigning directly to _files and _flags, so we can
1330 1330 # assign a path in a subdirectory, and to mark dirty (compared
1331 1331 # to nullid).
1332 1332 self[f] = n
1333 1333 if fl:
1334 1334 self.setflag(f, fl)
1335 1335 else:
1336 1336 # Assigning to _files and _flags avoids marking as dirty,
1337 1337 # and should be a little faster.
1338 1338 self._files[f] = n
1339 1339 if fl:
1340 1340 self._flags[f] = fl
1341 1341
1342 1342 def text(self):
1343 1343 """Get the full data of this manifest as a bytestring."""
1344 1344 self._load()
1345 1345 return _text(self.iterentries())
1346 1346
1347 1347 def dirtext(self):
1348 1348 """Get the full data of this directory as a bytestring. Make sure that
1349 1349 any submanifests have been written first, so their nodeids are correct.
1350 1350 """
1351 1351 self._load()
1352 1352 flags = self.flags
1353 1353 lazydirs = [
1354 1354 (d[:-1], v[0], b't') for d, v in pycompat.iteritems(self._lazydirs)
1355 1355 ]
1356 1356 dirs = [(d[:-1], self._dirs[d]._node, b't') for d in self._dirs]
1357 1357 files = [(f, self._files[f], flags(f)) for f in self._files]
1358 1358 return _text(sorted(dirs + files + lazydirs))
1359 1359
1360 1360 def read(self, gettext, readsubtree):
1361 1361 def _load_for_read(s):
1362 1362 s.parse(gettext(), readsubtree)
1363 1363 s._dirty = False
1364 1364
1365 1365 self._loadfunc = _load_for_read
1366 1366
1367 1367 def writesubtrees(self, m1, m2, writesubtree, match):
1368 1368 self._load() # for consistency; should never have any effect here
1369 1369 m1._load()
1370 1370 m2._load()
1371 1371 emptytree = treemanifest()
1372 1372
1373 1373 def getnode(m, d):
1374 1374 ld = m._lazydirs.get(d)
1375 1375 if ld:
1376 1376 return ld[0]
1377 1377 return m._dirs.get(d, emptytree)._node
1378 1378
1379 1379 # let's skip investigating things that `match` says we do not need.
1380 1380 visit = match.visitchildrenset(self._dir[:-1])
1381 1381 visit = self._loadchildrensetlazy(visit)
1382 1382 if visit == b'this' or visit == b'all':
1383 1383 visit = None
1384 1384 for d, subm in pycompat.iteritems(self._dirs):
1385 1385 if visit and d[:-1] not in visit:
1386 1386 continue
1387 1387 subp1 = getnode(m1, d)
1388 1388 subp2 = getnode(m2, d)
1389 1389 if subp1 == nullid:
1390 1390 subp1, subp2 = subp2, subp1
1391 1391 writesubtree(subm, subp1, subp2, match)
1392 1392
1393 1393 def walksubtrees(self, matcher=None):
1394 1394 """Returns an iterator of the subtrees of this manifest, including this
1395 1395 manifest itself.
1396 1396
1397 1397 If `matcher` is provided, it only returns subtrees that match.
1398 1398 """
1399 1399 if matcher and not matcher.visitdir(self._dir[:-1]):
1400 1400 return
1401 1401 if not matcher or matcher(self._dir[:-1]):
1402 1402 yield self
1403 1403
1404 1404 self._load()
1405 1405 # OPT: use visitchildrenset to avoid loading everything.
1406 1406 self._loadalllazy()
1407 1407 for d, subm in pycompat.iteritems(self._dirs):
1408 1408 for subtree in subm.walksubtrees(matcher=matcher):
1409 1409 yield subtree
1410 1410
1411 1411
1412 1412 class manifestfulltextcache(util.lrucachedict):
1413 1413 """File-backed LRU cache for the manifest cache
1414 1414
1415 1415 File consists of entries, up to EOF:
1416 1416
1417 1417 - 20 bytes node, 4 bytes length, <length> manifest data
1418 1418
1419 1419 These are written in reverse cache order (oldest to newest).
1420 1420
1421 1421 """
1422 1422
1423 1423 _file = b'manifestfulltextcache'
1424 1424
1425 1425 def __init__(self, max):
1426 1426 super(manifestfulltextcache, self).__init__(max)
1427 1427 self._dirty = False
1428 1428 self._read = False
1429 1429 self._opener = None
1430 1430
1431 1431 def read(self):
1432 1432 if self._read or self._opener is None:
1433 1433 return
1434 1434
1435 1435 try:
1436 1436 with self._opener(self._file) as fp:
1437 1437 set = super(manifestfulltextcache, self).__setitem__
1438 1438 # ignore trailing data, this is a cache, corruption is skipped
1439 1439 while True:
1440 1440 # TODO do we need to do work here for sha1 portability?
1441 1441 node = fp.read(20)
1442 1442 if len(node) < 20:
1443 1443 break
1444 1444 try:
1445 1445 size = struct.unpack(b'>L', fp.read(4))[0]
1446 1446 except struct.error:
1447 1447 break
1448 1448 value = bytearray(fp.read(size))
1449 1449 if len(value) != size:
1450 1450 break
1451 1451 set(node, value)
1452 1452 except IOError:
1453 1453 # the file is allowed to be missing
1454 1454 pass
1455 1455
1456 1456 self._read = True
1457 1457 self._dirty = False
1458 1458
1459 1459 def write(self):
1460 1460 if not self._dirty or self._opener is None:
1461 1461 return
1462 1462 # rotate backwards to the first used node
1463 1463 try:
1464 1464 with self._opener(
1465 1465 self._file, b'w', atomictemp=True, checkambig=True
1466 1466 ) as fp:
1467 1467 node = self._head.prev
1468 1468 while True:
1469 1469 if node.key in self._cache:
1470 1470 fp.write(node.key)
1471 1471 fp.write(struct.pack(b'>L', len(node.value)))
1472 1472 fp.write(node.value)
1473 1473 if node is self._head:
1474 1474 break
1475 1475 node = node.prev
1476 1476 except IOError:
1477 1477 # We could not write the cache (eg: permission error)
1478 1478 # the content can be missing.
1479 1479 #
1480 1480 # We could try harder and see if we could recreate a wcache
1481 1481 # directory were we coudl write too.
1482 1482 #
1483 1483 # XXX the error pass silently, having some way to issue an error
1484 1484 # log `ui.log` would be nice.
1485 1485 pass
1486 1486
1487 1487 def __len__(self):
1488 1488 if not self._read:
1489 1489 self.read()
1490 1490 return super(manifestfulltextcache, self).__len__()
1491 1491
1492 1492 def __contains__(self, k):
1493 1493 if not self._read:
1494 1494 self.read()
1495 1495 return super(manifestfulltextcache, self).__contains__(k)
1496 1496
1497 1497 def __iter__(self):
1498 1498 if not self._read:
1499 1499 self.read()
1500 1500 return super(manifestfulltextcache, self).__iter__()
1501 1501
1502 1502 def __getitem__(self, k):
1503 1503 if not self._read:
1504 1504 self.read()
1505 1505 # the cache lru order can change on read
1506 1506 setdirty = self._cache.get(k) is not self._head
1507 1507 value = super(manifestfulltextcache, self).__getitem__(k)
1508 1508 if setdirty:
1509 1509 self._dirty = True
1510 1510 return value
1511 1511
1512 1512 def __setitem__(self, k, v):
1513 1513 if not self._read:
1514 1514 self.read()
1515 1515 super(manifestfulltextcache, self).__setitem__(k, v)
1516 1516 self._dirty = True
1517 1517
1518 1518 def __delitem__(self, k):
1519 1519 if not self._read:
1520 1520 self.read()
1521 1521 super(manifestfulltextcache, self).__delitem__(k)
1522 1522 self._dirty = True
1523 1523
1524 1524 def get(self, k, default=None):
1525 1525 if not self._read:
1526 1526 self.read()
1527 1527 return super(manifestfulltextcache, self).get(k, default=default)
1528 1528
1529 1529 def clear(self, clear_persisted_data=False):
1530 1530 super(manifestfulltextcache, self).clear()
1531 1531 if clear_persisted_data:
1532 1532 self._dirty = True
1533 1533 self.write()
1534 1534 self._read = False
1535 1535
1536 1536
1537 1537 # and upper bound of what we expect from compression
1538 1538 # (real live value seems to be "3")
1539 1539 MAXCOMPRESSION = 3
1540 1540
1541 1541
1542 1542 class FastdeltaUnavailable(Exception):
1543 1543 """Exception raised when fastdelta isn't usable on a manifest."""
1544 1544
1545 1545
1546 1546 @interfaceutil.implementer(repository.imanifeststorage)
1547 1547 class manifestrevlog(object):
1548 1548 """A revlog that stores manifest texts. This is responsible for caching the
1549 1549 full-text manifest contents.
1550 1550 """
1551 1551
1552 1552 def __init__(
1553 1553 self,
1554 1554 opener,
1555 1555 tree=b'',
1556 1556 dirlogcache=None,
1557 1557 indexfile=None,
1558 1558 treemanifest=False,
1559 1559 ):
1560 1560 """Constructs a new manifest revlog
1561 1561
1562 1562 `indexfile` - used by extensions to have two manifests at once, like
1563 1563 when transitioning between flatmanifeset and treemanifests.
1564 1564
1565 1565 `treemanifest` - used to indicate this is a tree manifest revlog. Opener
1566 1566 options can also be used to make this a tree manifest revlog. The opener
1567 1567 option takes precedence, so if it is set to True, we ignore whatever
1568 1568 value is passed in to the constructor.
1569 1569 """
1570 1570 # During normal operations, we expect to deal with not more than four
1571 1571 # revs at a time (such as during commit --amend). When rebasing large
1572 1572 # stacks of commits, the number can go up, hence the config knob below.
1573 1573 cachesize = 4
1574 1574 optiontreemanifest = False
1575 1575 opts = getattr(opener, 'options', None)
1576 1576 if opts is not None:
1577 1577 cachesize = opts.get(b'manifestcachesize', cachesize)
1578 1578 optiontreemanifest = opts.get(b'treemanifest', False)
1579 1579
1580 1580 self._treeondisk = optiontreemanifest or treemanifest
1581 1581
1582 1582 self._fulltextcache = manifestfulltextcache(cachesize)
1583 1583
1584 1584 if tree:
1585 1585 assert self._treeondisk, b'opts is %r' % opts
1586 1586
1587 1587 if indexfile is None:
1588 1588 indexfile = b'00manifest.i'
1589 1589 if tree:
1590 1590 indexfile = b"meta/" + tree + indexfile
1591 1591
1592 1592 self.tree = tree
1593 1593
1594 1594 # The dirlogcache is kept on the root manifest log
1595 1595 if tree:
1596 1596 self._dirlogcache = dirlogcache
1597 1597 else:
1598 1598 self._dirlogcache = {b'': self}
1599 1599
1600 1600 self._revlog = revlog.revlog(
1601 1601 opener,
1602 1602 indexfile,
1603 1603 # only root indexfile is cached
1604 1604 checkambig=not bool(tree),
1605 1605 mmaplargeindex=True,
1606 1606 upperboundcomp=MAXCOMPRESSION,
1607 1607 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
1608 1608 )
1609 1609
1610 1610 self.index = self._revlog.index
1611 1611 self.version = self._revlog.version
1612 1612 self._generaldelta = self._revlog._generaldelta
1613 1613
1614 1614 def _setupmanifestcachehooks(self, repo):
1615 1615 """Persist the manifestfulltextcache on lock release"""
1616 1616 if not util.safehasattr(repo, b'_wlockref'):
1617 1617 return
1618 1618
1619 1619 self._fulltextcache._opener = repo.wcachevfs
1620 1620 if repo._currentlock(repo._wlockref) is None:
1621 1621 return
1622 1622
1623 1623 reporef = weakref.ref(repo)
1624 1624 manifestrevlogref = weakref.ref(self)
1625 1625
1626 1626 def persistmanifestcache(success):
1627 1627 # Repo is in an unknown state, do not persist.
1628 1628 if not success:
1629 1629 return
1630 1630
1631 1631 repo = reporef()
1632 1632 self = manifestrevlogref()
1633 1633 if repo is None or self is None:
1634 1634 return
1635 1635 if repo.manifestlog.getstorage(b'') is not self:
1636 1636 # there's a different manifest in play now, abort
1637 1637 return
1638 1638 self._fulltextcache.write()
1639 1639
1640 1640 repo._afterlock(persistmanifestcache)
1641 1641
1642 1642 @property
1643 1643 def fulltextcache(self):
1644 1644 return self._fulltextcache
1645 1645
1646 1646 def clearcaches(self, clear_persisted_data=False):
1647 1647 self._revlog.clearcaches()
1648 1648 self._fulltextcache.clear(clear_persisted_data=clear_persisted_data)
1649 1649 self._dirlogcache = {self.tree: self}
1650 1650
1651 1651 def dirlog(self, d):
1652 1652 if d:
1653 1653 assert self._treeondisk
1654 1654 if d not in self._dirlogcache:
1655 1655 mfrevlog = manifestrevlog(
1656 1656 self.opener, d, self._dirlogcache, treemanifest=self._treeondisk
1657 1657 )
1658 1658 self._dirlogcache[d] = mfrevlog
1659 1659 return self._dirlogcache[d]
1660 1660
1661 1661 def add(
1662 1662 self,
1663 1663 m,
1664 1664 transaction,
1665 1665 link,
1666 1666 p1,
1667 1667 p2,
1668 1668 added,
1669 1669 removed,
1670 1670 readtree=None,
1671 1671 match=None,
1672 1672 ):
1673 1673 """add some manifest entry in to the manifest log
1674 1674
1675 1675 input:
1676 1676
1677 1677 m: the manifest dict we want to store
1678 1678 transaction: the open transaction
1679 1679 p1: manifest-node of p1
1680 1680 p2: manifest-node of p2
1681 1681 added: file added/changed compared to parent
1682 1682 removed: file removed compared to parent
1683 1683
1684 1684 tree manifest input:
1685 1685
1686 1686 readtree: a function to read a subtree
1687 1687 match: a filematcher for the subpart of the tree manifest
1688 1688 """
1689 1689 try:
1690 1690 if p1 not in self.fulltextcache:
1691 1691 raise FastdeltaUnavailable()
1692 1692 # If our first parent is in the manifest cache, we can
1693 1693 # compute a delta here using properties we know about the
1694 1694 # manifest up-front, which may save time later for the
1695 1695 # revlog layer.
1696 1696
1697 1697 _checkforbidden(added)
1698 1698 # combine the changed lists into one sorted iterator
1699 1699 work = heapq.merge(
1700 1700 [(x, False) for x in sorted(added)],
1701 1701 [(x, True) for x in sorted(removed)],
1702 1702 )
1703 1703
1704 1704 arraytext, deltatext = m.fastdelta(self.fulltextcache[p1], work)
1705 1705 cachedelta = self._revlog.rev(p1), deltatext
1706 1706 text = util.buffer(arraytext)
1707 n = self._revlog.addrevision(
1707 rev = self._revlog.addrevision(
1708 1708 text, transaction, link, p1, p2, cachedelta
1709 1709 )
1710 n = self._revlog.node(rev)
1710 1711 except FastdeltaUnavailable:
1711 1712 # The first parent manifest isn't already loaded or the
1712 1713 # manifest implementation doesn't support fastdelta, so
1713 1714 # we'll just encode a fulltext of the manifest and pass
1714 1715 # that through to the revlog layer, and let it handle the
1715 1716 # delta process.
1716 1717 if self._treeondisk:
1717 1718 assert readtree, b"readtree must be set for treemanifest writes"
1718 1719 assert match, b"match must be specified for treemanifest writes"
1719 1720 m1 = readtree(self.tree, p1)
1720 1721 m2 = readtree(self.tree, p2)
1721 1722 n = self._addtree(
1722 1723 m, transaction, link, m1, m2, readtree, match=match
1723 1724 )
1724 1725 arraytext = None
1725 1726 else:
1726 1727 text = m.text()
1727 n = self._revlog.addrevision(text, transaction, link, p1, p2)
1728 rev = self._revlog.addrevision(text, transaction, link, p1, p2)
1729 n = self._revlog.node(rev)
1728 1730 arraytext = bytearray(text)
1729 1731
1730 1732 if arraytext is not None:
1731 1733 self.fulltextcache[n] = arraytext
1732 1734
1733 1735 return n
1734 1736
1735 1737 def _addtree(self, m, transaction, link, m1, m2, readtree, match):
1736 1738 # If the manifest is unchanged compared to one parent,
1737 1739 # don't write a new revision
1738 1740 if self.tree != b'' and (
1739 1741 m.unmodifiedsince(m1) or m.unmodifiedsince(m2)
1740 1742 ):
1741 1743 return m.node()
1742 1744
1743 1745 def writesubtree(subm, subp1, subp2, match):
1744 1746 sublog = self.dirlog(subm.dir())
1745 1747 sublog.add(
1746 1748 subm,
1747 1749 transaction,
1748 1750 link,
1749 1751 subp1,
1750 1752 subp2,
1751 1753 None,
1752 1754 None,
1753 1755 readtree=readtree,
1754 1756 match=match,
1755 1757 )
1756 1758
1757 1759 m.writesubtrees(m1, m2, writesubtree, match)
1758 1760 text = m.dirtext()
1759 1761 n = None
1760 1762 if self.tree != b'':
1761 1763 # Double-check whether contents are unchanged to one parent
1762 1764 if text == m1.dirtext():
1763 1765 n = m1.node()
1764 1766 elif text == m2.dirtext():
1765 1767 n = m2.node()
1766 1768
1767 1769 if not n:
1768 n = self._revlog.addrevision(
1770 rev = self._revlog.addrevision(
1769 1771 text, transaction, link, m1.node(), m2.node()
1770 1772 )
1773 n = self._revlog.node(rev)
1771 1774
1772 1775 # Save nodeid so parent manifest can calculate its nodeid
1773 1776 m.setnode(n)
1774 1777 return n
1775 1778
1776 1779 def __len__(self):
1777 1780 return len(self._revlog)
1778 1781
1779 1782 def __iter__(self):
1780 1783 return self._revlog.__iter__()
1781 1784
1782 1785 def rev(self, node):
1783 1786 return self._revlog.rev(node)
1784 1787
1785 1788 def node(self, rev):
1786 1789 return self._revlog.node(rev)
1787 1790
1788 1791 def lookup(self, value):
1789 1792 return self._revlog.lookup(value)
1790 1793
1791 1794 def parentrevs(self, rev):
1792 1795 return self._revlog.parentrevs(rev)
1793 1796
1794 1797 def parents(self, node):
1795 1798 return self._revlog.parents(node)
1796 1799
1797 1800 def linkrev(self, rev):
1798 1801 return self._revlog.linkrev(rev)
1799 1802
1800 1803 def checksize(self):
1801 1804 return self._revlog.checksize()
1802 1805
1803 1806 def revision(self, node, _df=None, raw=False):
1804 1807 return self._revlog.revision(node, _df=_df, raw=raw)
1805 1808
1806 1809 def rawdata(self, node, _df=None):
1807 1810 return self._revlog.rawdata(node, _df=_df)
1808 1811
1809 1812 def revdiff(self, rev1, rev2):
1810 1813 return self._revlog.revdiff(rev1, rev2)
1811 1814
1812 1815 def cmp(self, node, text):
1813 1816 return self._revlog.cmp(node, text)
1814 1817
1815 1818 def deltaparent(self, rev):
1816 1819 return self._revlog.deltaparent(rev)
1817 1820
1818 1821 def emitrevisions(
1819 1822 self,
1820 1823 nodes,
1821 1824 nodesorder=None,
1822 1825 revisiondata=False,
1823 1826 assumehaveparentrevisions=False,
1824 1827 deltamode=repository.CG_DELTAMODE_STD,
1825 1828 ):
1826 1829 return self._revlog.emitrevisions(
1827 1830 nodes,
1828 1831 nodesorder=nodesorder,
1829 1832 revisiondata=revisiondata,
1830 1833 assumehaveparentrevisions=assumehaveparentrevisions,
1831 1834 deltamode=deltamode,
1832 1835 )
1833 1836
1834 1837 def addgroup(
1835 1838 self,
1836 1839 deltas,
1837 1840 linkmapper,
1838 1841 transaction,
1839 1842 alwayscache=False,
1840 1843 addrevisioncb=None,
1841 1844 duplicaterevisioncb=None,
1842 1845 ):
1843 1846 return self._revlog.addgroup(
1844 1847 deltas,
1845 1848 linkmapper,
1846 1849 transaction,
1847 1850 alwayscache=alwayscache,
1848 1851 addrevisioncb=addrevisioncb,
1849 1852 duplicaterevisioncb=duplicaterevisioncb,
1850 1853 )
1851 1854
1852 1855 def rawsize(self, rev):
1853 1856 return self._revlog.rawsize(rev)
1854 1857
1855 1858 def getstrippoint(self, minlink):
1856 1859 return self._revlog.getstrippoint(minlink)
1857 1860
1858 1861 def strip(self, minlink, transaction):
1859 1862 return self._revlog.strip(minlink, transaction)
1860 1863
1861 1864 def files(self):
1862 1865 return self._revlog.files()
1863 1866
1864 1867 def clone(self, tr, destrevlog, **kwargs):
1865 1868 if not isinstance(destrevlog, manifestrevlog):
1866 1869 raise error.ProgrammingError(b'expected manifestrevlog to clone()')
1867 1870
1868 1871 return self._revlog.clone(tr, destrevlog._revlog, **kwargs)
1869 1872
1870 1873 def storageinfo(
1871 1874 self,
1872 1875 exclusivefiles=False,
1873 1876 sharedfiles=False,
1874 1877 revisionscount=False,
1875 1878 trackedsize=False,
1876 1879 storedsize=False,
1877 1880 ):
1878 1881 return self._revlog.storageinfo(
1879 1882 exclusivefiles=exclusivefiles,
1880 1883 sharedfiles=sharedfiles,
1881 1884 revisionscount=revisionscount,
1882 1885 trackedsize=trackedsize,
1883 1886 storedsize=storedsize,
1884 1887 )
1885 1888
1886 1889 @property
1887 1890 def indexfile(self):
1888 1891 return self._revlog.indexfile
1889 1892
1890 1893 @indexfile.setter
1891 1894 def indexfile(self, value):
1892 1895 self._revlog.indexfile = value
1893 1896
1894 1897 @property
1895 1898 def opener(self):
1896 1899 return self._revlog.opener
1897 1900
1898 1901 @opener.setter
1899 1902 def opener(self, value):
1900 1903 self._revlog.opener = value
1901 1904
1902 1905
1903 1906 @interfaceutil.implementer(repository.imanifestlog)
1904 1907 class manifestlog(object):
1905 1908 """A collection class representing the collection of manifest snapshots
1906 1909 referenced by commits in the repository.
1907 1910
1908 1911 In this situation, 'manifest' refers to the abstract concept of a snapshot
1909 1912 of the list of files in the given commit. Consumers of the output of this
1910 1913 class do not care about the implementation details of the actual manifests
1911 1914 they receive (i.e. tree or flat or lazily loaded, etc)."""
1912 1915
1913 1916 def __init__(self, opener, repo, rootstore, narrowmatch):
1914 1917 usetreemanifest = False
1915 1918 cachesize = 4
1916 1919
1917 1920 opts = getattr(opener, 'options', None)
1918 1921 if opts is not None:
1919 1922 usetreemanifest = opts.get(b'treemanifest', usetreemanifest)
1920 1923 cachesize = opts.get(b'manifestcachesize', cachesize)
1921 1924
1922 1925 self._treemanifests = usetreemanifest
1923 1926
1924 1927 self._rootstore = rootstore
1925 1928 self._rootstore._setupmanifestcachehooks(repo)
1926 1929 self._narrowmatch = narrowmatch
1927 1930
1928 1931 # A cache of the manifestctx or treemanifestctx for each directory
1929 1932 self._dirmancache = {}
1930 1933 self._dirmancache[b''] = util.lrucachedict(cachesize)
1931 1934
1932 1935 self._cachesize = cachesize
1933 1936
1934 1937 def __getitem__(self, node):
1935 1938 """Retrieves the manifest instance for the given node. Throws a
1936 1939 LookupError if not found.
1937 1940 """
1938 1941 return self.get(b'', node)
1939 1942
1940 1943 def get(self, tree, node, verify=True):
1941 1944 """Retrieves the manifest instance for the given node. Throws a
1942 1945 LookupError if not found.
1943 1946
1944 1947 `verify` - if True an exception will be thrown if the node is not in
1945 1948 the revlog
1946 1949 """
1947 1950 if node in self._dirmancache.get(tree, ()):
1948 1951 return self._dirmancache[tree][node]
1949 1952
1950 1953 if not self._narrowmatch.always():
1951 1954 if not self._narrowmatch.visitdir(tree[:-1]):
1952 1955 return excludeddirmanifestctx(tree, node)
1953 1956 if tree:
1954 1957 if self._rootstore._treeondisk:
1955 1958 if verify:
1956 1959 # Side-effect is LookupError is raised if node doesn't
1957 1960 # exist.
1958 1961 self.getstorage(tree).rev(node)
1959 1962
1960 1963 m = treemanifestctx(self, tree, node)
1961 1964 else:
1962 1965 raise error.Abort(
1963 1966 _(
1964 1967 b"cannot ask for manifest directory '%s' in a flat "
1965 1968 b"manifest"
1966 1969 )
1967 1970 % tree
1968 1971 )
1969 1972 else:
1970 1973 if verify:
1971 1974 # Side-effect is LookupError is raised if node doesn't exist.
1972 1975 self._rootstore.rev(node)
1973 1976
1974 1977 if self._treemanifests:
1975 1978 m = treemanifestctx(self, b'', node)
1976 1979 else:
1977 1980 m = manifestctx(self, node)
1978 1981
1979 1982 if node != nullid:
1980 1983 mancache = self._dirmancache.get(tree)
1981 1984 if not mancache:
1982 1985 mancache = util.lrucachedict(self._cachesize)
1983 1986 self._dirmancache[tree] = mancache
1984 1987 mancache[node] = m
1985 1988 return m
1986 1989
1987 1990 def getstorage(self, tree):
1988 1991 return self._rootstore.dirlog(tree)
1989 1992
1990 1993 def clearcaches(self, clear_persisted_data=False):
1991 1994 self._dirmancache.clear()
1992 1995 self._rootstore.clearcaches(clear_persisted_data=clear_persisted_data)
1993 1996
1994 1997 def rev(self, node):
1995 1998 return self._rootstore.rev(node)
1996 1999
1997 2000 def update_caches(self, transaction):
1998 2001 return self._rootstore._revlog.update_caches(transaction=transaction)
1999 2002
2000 2003
2001 2004 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2002 2005 class memmanifestctx(object):
2003 2006 def __init__(self, manifestlog):
2004 2007 self._manifestlog = manifestlog
2005 2008 self._manifestdict = manifestdict()
2006 2009
2007 2010 def _storage(self):
2008 2011 return self._manifestlog.getstorage(b'')
2009 2012
2010 2013 def copy(self):
2011 2014 memmf = memmanifestctx(self._manifestlog)
2012 2015 memmf._manifestdict = self.read().copy()
2013 2016 return memmf
2014 2017
2015 2018 def read(self):
2016 2019 return self._manifestdict
2017 2020
2018 2021 def write(self, transaction, link, p1, p2, added, removed, match=None):
2019 2022 return self._storage().add(
2020 2023 self._manifestdict,
2021 2024 transaction,
2022 2025 link,
2023 2026 p1,
2024 2027 p2,
2025 2028 added,
2026 2029 removed,
2027 2030 match=match,
2028 2031 )
2029 2032
2030 2033
2031 2034 @interfaceutil.implementer(repository.imanifestrevisionstored)
2032 2035 class manifestctx(object):
2033 2036 """A class representing a single revision of a manifest, including its
2034 2037 contents, its parent revs, and its linkrev.
2035 2038 """
2036 2039
2037 2040 def __init__(self, manifestlog, node):
2038 2041 self._manifestlog = manifestlog
2039 2042 self._data = None
2040 2043
2041 2044 self._node = node
2042 2045
2043 2046 # TODO: We eventually want p1, p2, and linkrev exposed on this class,
2044 2047 # but let's add it later when something needs it and we can load it
2045 2048 # lazily.
2046 2049 # self.p1, self.p2 = store.parents(node)
2047 2050 # rev = store.rev(node)
2048 2051 # self.linkrev = store.linkrev(rev)
2049 2052
2050 2053 def _storage(self):
2051 2054 return self._manifestlog.getstorage(b'')
2052 2055
2053 2056 def node(self):
2054 2057 return self._node
2055 2058
2056 2059 def copy(self):
2057 2060 memmf = memmanifestctx(self._manifestlog)
2058 2061 memmf._manifestdict = self.read().copy()
2059 2062 return memmf
2060 2063
2061 2064 @propertycache
2062 2065 def parents(self):
2063 2066 return self._storage().parents(self._node)
2064 2067
2065 2068 def read(self):
2066 2069 if self._data is None:
2067 2070 if self._node == nullid:
2068 2071 self._data = manifestdict()
2069 2072 else:
2070 2073 store = self._storage()
2071 2074 if self._node in store.fulltextcache:
2072 2075 text = pycompat.bytestr(store.fulltextcache[self._node])
2073 2076 else:
2074 2077 text = store.revision(self._node)
2075 2078 arraytext = bytearray(text)
2076 2079 store.fulltextcache[self._node] = arraytext
2077 2080 self._data = manifestdict(text)
2078 2081 return self._data
2079 2082
2080 2083 def readfast(self, shallow=False):
2081 2084 """Calls either readdelta or read, based on which would be less work.
2082 2085 readdelta is called if the delta is against the p1, and therefore can be
2083 2086 read quickly.
2084 2087
2085 2088 If `shallow` is True, nothing changes since this is a flat manifest.
2086 2089 """
2087 2090 store = self._storage()
2088 2091 r = store.rev(self._node)
2089 2092 deltaparent = store.deltaparent(r)
2090 2093 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2091 2094 return self.readdelta()
2092 2095 return self.read()
2093 2096
2094 2097 def readdelta(self, shallow=False):
2095 2098 """Returns a manifest containing just the entries that are present
2096 2099 in this manifest, but not in its p1 manifest. This is efficient to read
2097 2100 if the revlog delta is already p1.
2098 2101
2099 2102 Changing the value of `shallow` has no effect on flat manifests.
2100 2103 """
2101 2104 store = self._storage()
2102 2105 r = store.rev(self._node)
2103 2106 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2104 2107 return manifestdict(d)
2105 2108
2106 2109 def find(self, key):
2107 2110 return self.read().find(key)
2108 2111
2109 2112
2110 2113 @interfaceutil.implementer(repository.imanifestrevisionwritable)
2111 2114 class memtreemanifestctx(object):
2112 2115 def __init__(self, manifestlog, dir=b''):
2113 2116 self._manifestlog = manifestlog
2114 2117 self._dir = dir
2115 2118 self._treemanifest = treemanifest()
2116 2119
2117 2120 def _storage(self):
2118 2121 return self._manifestlog.getstorage(b'')
2119 2122
2120 2123 def copy(self):
2121 2124 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2122 2125 memmf._treemanifest = self._treemanifest.copy()
2123 2126 return memmf
2124 2127
2125 2128 def read(self):
2126 2129 return self._treemanifest
2127 2130
2128 2131 def write(self, transaction, link, p1, p2, added, removed, match=None):
2129 2132 def readtree(dir, node):
2130 2133 return self._manifestlog.get(dir, node).read()
2131 2134
2132 2135 return self._storage().add(
2133 2136 self._treemanifest,
2134 2137 transaction,
2135 2138 link,
2136 2139 p1,
2137 2140 p2,
2138 2141 added,
2139 2142 removed,
2140 2143 readtree=readtree,
2141 2144 match=match,
2142 2145 )
2143 2146
2144 2147
2145 2148 @interfaceutil.implementer(repository.imanifestrevisionstored)
2146 2149 class treemanifestctx(object):
2147 2150 def __init__(self, manifestlog, dir, node):
2148 2151 self._manifestlog = manifestlog
2149 2152 self._dir = dir
2150 2153 self._data = None
2151 2154
2152 2155 self._node = node
2153 2156
2154 2157 # TODO: Load p1/p2/linkrev lazily. They need to be lazily loaded so that
2155 2158 # we can instantiate treemanifestctx objects for directories we don't
2156 2159 # have on disk.
2157 2160 # self.p1, self.p2 = store.parents(node)
2158 2161 # rev = store.rev(node)
2159 2162 # self.linkrev = store.linkrev(rev)
2160 2163
2161 2164 def _storage(self):
2162 2165 narrowmatch = self._manifestlog._narrowmatch
2163 2166 if not narrowmatch.always():
2164 2167 if not narrowmatch.visitdir(self._dir[:-1]):
2165 2168 return excludedmanifestrevlog(self._dir)
2166 2169 return self._manifestlog.getstorage(self._dir)
2167 2170
2168 2171 def read(self):
2169 2172 if self._data is None:
2170 2173 store = self._storage()
2171 2174 if self._node == nullid:
2172 2175 self._data = treemanifest()
2173 2176 # TODO accessing non-public API
2174 2177 elif store._treeondisk:
2175 2178 m = treemanifest(dir=self._dir)
2176 2179
2177 2180 def gettext():
2178 2181 return store.revision(self._node)
2179 2182
2180 2183 def readsubtree(dir, subm):
2181 2184 # Set verify to False since we need to be able to create
2182 2185 # subtrees for trees that don't exist on disk.
2183 2186 return self._manifestlog.get(dir, subm, verify=False).read()
2184 2187
2185 2188 m.read(gettext, readsubtree)
2186 2189 m.setnode(self._node)
2187 2190 self._data = m
2188 2191 else:
2189 2192 if self._node in store.fulltextcache:
2190 2193 text = pycompat.bytestr(store.fulltextcache[self._node])
2191 2194 else:
2192 2195 text = store.revision(self._node)
2193 2196 arraytext = bytearray(text)
2194 2197 store.fulltextcache[self._node] = arraytext
2195 2198 self._data = treemanifest(dir=self._dir, text=text)
2196 2199
2197 2200 return self._data
2198 2201
2199 2202 def node(self):
2200 2203 return self._node
2201 2204
2202 2205 def copy(self):
2203 2206 memmf = memtreemanifestctx(self._manifestlog, dir=self._dir)
2204 2207 memmf._treemanifest = self.read().copy()
2205 2208 return memmf
2206 2209
2207 2210 @propertycache
2208 2211 def parents(self):
2209 2212 return self._storage().parents(self._node)
2210 2213
2211 2214 def readdelta(self, shallow=False):
2212 2215 """Returns a manifest containing just the entries that are present
2213 2216 in this manifest, but not in its p1 manifest. This is efficient to read
2214 2217 if the revlog delta is already p1.
2215 2218
2216 2219 If `shallow` is True, this will read the delta for this directory,
2217 2220 without recursively reading subdirectory manifests. Instead, any
2218 2221 subdirectory entry will be reported as it appears in the manifest, i.e.
2219 2222 the subdirectory will be reported among files and distinguished only by
2220 2223 its 't' flag.
2221 2224 """
2222 2225 store = self._storage()
2223 2226 if shallow:
2224 2227 r = store.rev(self._node)
2225 2228 d = mdiff.patchtext(store.revdiff(store.deltaparent(r), r))
2226 2229 return manifestdict(d)
2227 2230 else:
2228 2231 # Need to perform a slow delta
2229 2232 r0 = store.deltaparent(store.rev(self._node))
2230 2233 m0 = self._manifestlog.get(self._dir, store.node(r0)).read()
2231 2234 m1 = self.read()
2232 2235 md = treemanifest(dir=self._dir)
2233 2236 for f, ((n0, fl0), (n1, fl1)) in pycompat.iteritems(m0.diff(m1)):
2234 2237 if n1:
2235 2238 md[f] = n1
2236 2239 if fl1:
2237 2240 md.setflag(f, fl1)
2238 2241 return md
2239 2242
2240 2243 def readfast(self, shallow=False):
2241 2244 """Calls either readdelta or read, based on which would be less work.
2242 2245 readdelta is called if the delta is against the p1, and therefore can be
2243 2246 read quickly.
2244 2247
2245 2248 If `shallow` is True, it only returns the entries from this manifest,
2246 2249 and not any submanifests.
2247 2250 """
2248 2251 store = self._storage()
2249 2252 r = store.rev(self._node)
2250 2253 deltaparent = store.deltaparent(r)
2251 2254 if deltaparent != nullrev and deltaparent in store.parentrevs(r):
2252 2255 return self.readdelta(shallow=shallow)
2253 2256
2254 2257 if shallow:
2255 2258 return manifestdict(store.revision(self._node))
2256 2259 else:
2257 2260 return self.read()
2258 2261
2259 2262 def find(self, key):
2260 2263 return self.read().find(key)
2261 2264
2262 2265
2263 2266 class excludeddir(treemanifest):
2264 2267 """Stand-in for a directory that is excluded from the repository.
2265 2268
2266 2269 With narrowing active on a repository that uses treemanifests,
2267 2270 some of the directory revlogs will be excluded from the resulting
2268 2271 clone. This is a huge storage win for clients, but means we need
2269 2272 some sort of pseudo-manifest to surface to internals so we can
2270 2273 detect a merge conflict outside the narrowspec. That's what this
2271 2274 class is: it stands in for a directory whose node is known, but
2272 2275 whose contents are unknown.
2273 2276 """
2274 2277
2275 2278 def __init__(self, dir, node):
2276 2279 super(excludeddir, self).__init__(dir)
2277 2280 self._node = node
2278 2281 # Add an empty file, which will be included by iterators and such,
2279 2282 # appearing as the directory itself (i.e. something like "dir/")
2280 2283 self._files[b''] = node
2281 2284 self._flags[b''] = b't'
2282 2285
2283 2286 # Manifests outside the narrowspec should never be modified, so avoid
2284 2287 # copying. This makes a noticeable difference when there are very many
2285 2288 # directories outside the narrowspec. Also, it makes sense for the copy to
2286 2289 # be of the same type as the original, which would not happen with the
2287 2290 # super type's copy().
2288 2291 def copy(self):
2289 2292 return self
2290 2293
2291 2294
2292 2295 class excludeddirmanifestctx(treemanifestctx):
2293 2296 """context wrapper for excludeddir - see that docstring for rationale"""
2294 2297
2295 2298 def __init__(self, dir, node):
2296 2299 self._dir = dir
2297 2300 self._node = node
2298 2301
2299 2302 def read(self):
2300 2303 return excludeddir(self._dir, self._node)
2301 2304
2302 2305 def readfast(self, shallow=False):
2303 2306 # special version of readfast since we don't have underlying storage
2304 2307 return self.read()
2305 2308
2306 2309 def write(self, *args):
2307 2310 raise error.ProgrammingError(
2308 2311 b'attempt to write manifest from excluded dir %s' % self._dir
2309 2312 )
2310 2313
2311 2314
2312 2315 class excludedmanifestrevlog(manifestrevlog):
2313 2316 """Stand-in for excluded treemanifest revlogs.
2314 2317
2315 2318 When narrowing is active on a treemanifest repository, we'll have
2316 2319 references to directories we can't see due to the revlog being
2317 2320 skipped. This class exists to conform to the manifestrevlog
2318 2321 interface for those directories and proactively prevent writes to
2319 2322 outside the narrowspec.
2320 2323 """
2321 2324
2322 2325 def __init__(self, dir):
2323 2326 self._dir = dir
2324 2327
2325 2328 def __len__(self):
2326 2329 raise error.ProgrammingError(
2327 2330 b'attempt to get length of excluded dir %s' % self._dir
2328 2331 )
2329 2332
2330 2333 def rev(self, node):
2331 2334 raise error.ProgrammingError(
2332 2335 b'attempt to get rev from excluded dir %s' % self._dir
2333 2336 )
2334 2337
2335 2338 def linkrev(self, node):
2336 2339 raise error.ProgrammingError(
2337 2340 b'attempt to get linkrev from excluded dir %s' % self._dir
2338 2341 )
2339 2342
2340 2343 def node(self, rev):
2341 2344 raise error.ProgrammingError(
2342 2345 b'attempt to get node from excluded dir %s' % self._dir
2343 2346 )
2344 2347
2345 2348 def add(self, *args, **kwargs):
2346 2349 # We should never write entries in dirlogs outside the narrow clone.
2347 2350 # However, the method still gets called from writesubtree() in
2348 2351 # _addtree(), so we need to handle it. We should possibly make that
2349 2352 # avoid calling add() with a clean manifest (_dirty is always False
2350 2353 # in excludeddir instances).
2351 2354 pass
@@ -1,3086 +1,3086 b''
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import io
20 20 import os
21 21 import struct
22 22 import zlib
23 23
24 24 # import stuff from node for others to import from revlog
25 25 from .node import (
26 26 bin,
27 27 hex,
28 28 nullhex,
29 29 nullid,
30 30 nullrev,
31 31 short,
32 32 wdirfilenodeids,
33 33 wdirhex,
34 34 wdirid,
35 35 wdirrev,
36 36 )
37 37 from .i18n import _
38 38 from .pycompat import getattr
39 39 from .revlogutils.constants import (
40 40 FLAG_GENERALDELTA,
41 41 FLAG_INLINE_DATA,
42 42 REVLOGV0,
43 43 REVLOGV1,
44 44 REVLOGV1_FLAGS,
45 45 REVLOGV2,
46 46 REVLOGV2_FLAGS,
47 47 REVLOG_DEFAULT_FLAGS,
48 48 REVLOG_DEFAULT_FORMAT,
49 49 REVLOG_DEFAULT_VERSION,
50 50 )
51 51 from .revlogutils.flagutil import (
52 52 REVIDX_DEFAULT_FLAGS,
53 53 REVIDX_ELLIPSIS,
54 54 REVIDX_EXTSTORED,
55 55 REVIDX_FLAGS_ORDER,
56 56 REVIDX_HASCOPIESINFO,
57 57 REVIDX_ISCENSORED,
58 58 REVIDX_RAWTEXT_CHANGING_FLAGS,
59 59 REVIDX_SIDEDATA,
60 60 )
61 61 from .thirdparty import attr
62 62 from . import (
63 63 ancestor,
64 64 dagop,
65 65 error,
66 66 mdiff,
67 67 policy,
68 68 pycompat,
69 69 templatefilters,
70 70 util,
71 71 )
72 72 from .interfaces import (
73 73 repository,
74 74 util as interfaceutil,
75 75 )
76 76 from .revlogutils import (
77 77 deltas as deltautil,
78 78 flagutil,
79 79 nodemap as nodemaputil,
80 80 sidedata as sidedatautil,
81 81 )
82 82 from .utils import (
83 83 storageutil,
84 84 stringutil,
85 85 )
86 86
87 87 # blanked usage of all the name to prevent pyflakes constraints
88 88 # We need these name available in the module for extensions.
89 89 REVLOGV0
90 90 REVLOGV1
91 91 REVLOGV2
92 92 FLAG_INLINE_DATA
93 93 FLAG_GENERALDELTA
94 94 REVLOG_DEFAULT_FLAGS
95 95 REVLOG_DEFAULT_FORMAT
96 96 REVLOG_DEFAULT_VERSION
97 97 REVLOGV1_FLAGS
98 98 REVLOGV2_FLAGS
99 99 REVIDX_ISCENSORED
100 100 REVIDX_ELLIPSIS
101 101 REVIDX_SIDEDATA
102 102 REVIDX_HASCOPIESINFO
103 103 REVIDX_EXTSTORED
104 104 REVIDX_DEFAULT_FLAGS
105 105 REVIDX_FLAGS_ORDER
106 106 REVIDX_RAWTEXT_CHANGING_FLAGS
107 107
108 108 parsers = policy.importmod('parsers')
109 109 rustancestor = policy.importrust('ancestor')
110 110 rustdagop = policy.importrust('dagop')
111 111 rustrevlog = policy.importrust('revlog')
112 112
113 113 # Aliased for performance.
114 114 _zlibdecompress = zlib.decompress
115 115
116 116 # max size of revlog with inline data
117 117 _maxinline = 131072
118 118 _chunksize = 1048576
119 119
120 120 # Flag processors for REVIDX_ELLIPSIS.
121 121 def ellipsisreadprocessor(rl, text):
122 122 return text, False, {}
123 123
124 124
125 125 def ellipsiswriteprocessor(rl, text, sidedata):
126 126 return text, False
127 127
128 128
129 129 def ellipsisrawprocessor(rl, text):
130 130 return False
131 131
132 132
133 133 ellipsisprocessor = (
134 134 ellipsisreadprocessor,
135 135 ellipsiswriteprocessor,
136 136 ellipsisrawprocessor,
137 137 )
138 138
139 139
140 140 def getoffset(q):
141 141 return int(q >> 16)
142 142
143 143
144 144 def gettype(q):
145 145 return int(q & 0xFFFF)
146 146
147 147
148 148 def offset_type(offset, type):
149 149 if (type & ~flagutil.REVIDX_KNOWN_FLAGS) != 0:
150 150 raise ValueError(b'unknown revlog index flags')
151 151 return int(int(offset) << 16 | type)
152 152
153 153
154 154 def _verify_revision(rl, skipflags, state, node):
155 155 """Verify the integrity of the given revlog ``node`` while providing a hook
156 156 point for extensions to influence the operation."""
157 157 if skipflags:
158 158 state[b'skipread'].add(node)
159 159 else:
160 160 # Side-effect: read content and verify hash.
161 161 rl.revision(node)
162 162
163 163
164 164 # True if a fast implementation for persistent-nodemap is available
165 165 #
166 166 # We also consider we have a "fast" implementation in "pure" python because
167 167 # people using pure don't really have performance consideration (and a
168 168 # wheelbarrow of other slowness source)
169 169 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or util.safehasattr(
170 170 parsers, 'BaseIndexObject'
171 171 )
172 172
173 173
174 174 @attr.s(slots=True, frozen=True)
175 175 class _revisioninfo(object):
176 176 """Information about a revision that allows building its fulltext
177 177 node: expected hash of the revision
178 178 p1, p2: parent revs of the revision
179 179 btext: built text cache consisting of a one-element list
180 180 cachedelta: (baserev, uncompressed_delta) or None
181 181 flags: flags associated to the revision storage
182 182
183 183 One of btext[0] or cachedelta must be set.
184 184 """
185 185
186 186 node = attr.ib()
187 187 p1 = attr.ib()
188 188 p2 = attr.ib()
189 189 btext = attr.ib()
190 190 textlen = attr.ib()
191 191 cachedelta = attr.ib()
192 192 flags = attr.ib()
193 193
194 194
195 195 @interfaceutil.implementer(repository.irevisiondelta)
196 196 @attr.s(slots=True)
197 197 class revlogrevisiondelta(object):
198 198 node = attr.ib()
199 199 p1node = attr.ib()
200 200 p2node = attr.ib()
201 201 basenode = attr.ib()
202 202 flags = attr.ib()
203 203 baserevisionsize = attr.ib()
204 204 revision = attr.ib()
205 205 delta = attr.ib()
206 206 linknode = attr.ib(default=None)
207 207
208 208
209 209 @interfaceutil.implementer(repository.iverifyproblem)
210 210 @attr.s(frozen=True)
211 211 class revlogproblem(object):
212 212 warning = attr.ib(default=None)
213 213 error = attr.ib(default=None)
214 214 node = attr.ib(default=None)
215 215
216 216
217 217 # index v0:
218 218 # 4 bytes: offset
219 219 # 4 bytes: compressed length
220 220 # 4 bytes: base rev
221 221 # 4 bytes: link rev
222 222 # 20 bytes: parent 1 nodeid
223 223 # 20 bytes: parent 2 nodeid
224 224 # 20 bytes: nodeid
225 225 indexformatv0 = struct.Struct(b">4l20s20s20s")
226 226 indexformatv0_pack = indexformatv0.pack
227 227 indexformatv0_unpack = indexformatv0.unpack
228 228
229 229
230 230 class revlogoldindex(list):
231 231 @property
232 232 def nodemap(self):
233 233 msg = b"index.nodemap is deprecated, use index.[has_node|rev|get_rev]"
234 234 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
235 235 return self._nodemap
236 236
237 237 @util.propertycache
238 238 def _nodemap(self):
239 239 nodemap = nodemaputil.NodeMap({nullid: nullrev})
240 240 for r in range(0, len(self)):
241 241 n = self[r][7]
242 242 nodemap[n] = r
243 243 return nodemap
244 244
245 245 def has_node(self, node):
246 246 """return True if the node exist in the index"""
247 247 return node in self._nodemap
248 248
249 249 def rev(self, node):
250 250 """return a revision for a node
251 251
252 252 If the node is unknown, raise a RevlogError"""
253 253 return self._nodemap[node]
254 254
255 255 def get_rev(self, node):
256 256 """return a revision for a node
257 257
258 258 If the node is unknown, return None"""
259 259 return self._nodemap.get(node)
260 260
261 261 def append(self, tup):
262 262 self._nodemap[tup[7]] = len(self)
263 263 super(revlogoldindex, self).append(tup)
264 264
265 265 def __delitem__(self, i):
266 266 if not isinstance(i, slice) or not i.stop == -1 or i.step is not None:
267 267 raise ValueError(b"deleting slices only supports a:-1 with step 1")
268 268 for r in pycompat.xrange(i.start, len(self)):
269 269 del self._nodemap[self[r][7]]
270 270 super(revlogoldindex, self).__delitem__(i)
271 271
272 272 def clearcaches(self):
273 273 self.__dict__.pop('_nodemap', None)
274 274
275 275 def __getitem__(self, i):
276 276 if i == -1:
277 277 return (0, 0, 0, -1, -1, -1, -1, nullid)
278 278 return list.__getitem__(self, i)
279 279
280 280
281 281 class revlogoldio(object):
282 282 def __init__(self):
283 283 self.size = indexformatv0.size
284 284
285 285 def parseindex(self, data, inline):
286 286 s = self.size
287 287 index = []
288 288 nodemap = nodemaputil.NodeMap({nullid: nullrev})
289 289 n = off = 0
290 290 l = len(data)
291 291 while off + s <= l:
292 292 cur = data[off : off + s]
293 293 off += s
294 294 e = indexformatv0_unpack(cur)
295 295 # transform to revlogv1 format
296 296 e2 = (
297 297 offset_type(e[0], 0),
298 298 e[1],
299 299 -1,
300 300 e[2],
301 301 e[3],
302 302 nodemap.get(e[4], nullrev),
303 303 nodemap.get(e[5], nullrev),
304 304 e[6],
305 305 )
306 306 index.append(e2)
307 307 nodemap[e[6]] = n
308 308 n += 1
309 309
310 310 index = revlogoldindex(index)
311 311 return index, None
312 312
313 313 def packentry(self, entry, node, version, rev):
314 314 if gettype(entry[0]):
315 315 raise error.RevlogError(
316 316 _(b'index entry flags need revlog version 1')
317 317 )
318 318 e2 = (
319 319 getoffset(entry[0]),
320 320 entry[1],
321 321 entry[3],
322 322 entry[4],
323 323 node(entry[5]),
324 324 node(entry[6]),
325 325 entry[7],
326 326 )
327 327 return indexformatv0_pack(*e2)
328 328
329 329
330 330 # index ng:
331 331 # 6 bytes: offset
332 332 # 2 bytes: flags
333 333 # 4 bytes: compressed length
334 334 # 4 bytes: uncompressed length
335 335 # 4 bytes: base rev
336 336 # 4 bytes: link rev
337 337 # 4 bytes: parent 1 rev
338 338 # 4 bytes: parent 2 rev
339 339 # 32 bytes: nodeid
340 340 indexformatng = struct.Struct(b">Qiiiiii20s12x")
341 341 indexformatng_pack = indexformatng.pack
342 342 versionformat = struct.Struct(b">I")
343 343 versionformat_pack = versionformat.pack
344 344 versionformat_unpack = versionformat.unpack
345 345
346 346 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
347 347 # signed integer)
348 348 _maxentrysize = 0x7FFFFFFF
349 349
350 350
351 351 class revlogio(object):
352 352 def __init__(self):
353 353 self.size = indexformatng.size
354 354
355 355 def parseindex(self, data, inline):
356 356 # call the C implementation to parse the index data
357 357 index, cache = parsers.parse_index2(data, inline)
358 358 return index, cache
359 359
360 360 def packentry(self, entry, node, version, rev):
361 361 p = indexformatng_pack(*entry)
362 362 if rev == 0:
363 363 p = versionformat_pack(version) + p[4:]
364 364 return p
365 365
366 366
367 367 NodemapRevlogIO = None
368 368
369 369 if util.safehasattr(parsers, 'parse_index_devel_nodemap'):
370 370
371 371 class NodemapRevlogIO(revlogio):
372 372 """A debug oriented IO class that return a PersistentNodeMapIndexObject
373 373
374 374 The PersistentNodeMapIndexObject object is meant to test the persistent nodemap feature.
375 375 """
376 376
377 377 def parseindex(self, data, inline):
378 378 index, cache = parsers.parse_index_devel_nodemap(data, inline)
379 379 return index, cache
380 380
381 381
382 382 class rustrevlogio(revlogio):
383 383 def parseindex(self, data, inline):
384 384 index, cache = super(rustrevlogio, self).parseindex(data, inline)
385 385 return rustrevlog.MixedIndex(index), cache
386 386
387 387
388 388 class revlog(object):
389 389 """
390 390 the underlying revision storage object
391 391
392 392 A revlog consists of two parts, an index and the revision data.
393 393
394 394 The index is a file with a fixed record size containing
395 395 information on each revision, including its nodeid (hash), the
396 396 nodeids of its parents, the position and offset of its data within
397 397 the data file, and the revision it's based on. Finally, each entry
398 398 contains a linkrev entry that can serve as a pointer to external
399 399 data.
400 400
401 401 The revision data itself is a linear collection of data chunks.
402 402 Each chunk represents a revision and is usually represented as a
403 403 delta against the previous chunk. To bound lookup time, runs of
404 404 deltas are limited to about 2 times the length of the original
405 405 version data. This makes retrieval of a version proportional to
406 406 its size, or O(1) relative to the number of revisions.
407 407
408 408 Both pieces of the revlog are written to in an append-only
409 409 fashion, which means we never need to rewrite a file to insert or
410 410 remove data, and can use some simple techniques to avoid the need
411 411 for locking while reading.
412 412
413 413 If checkambig, indexfile is opened with checkambig=True at
414 414 writing, to avoid file stat ambiguity.
415 415
416 416 If mmaplargeindex is True, and an mmapindexthreshold is set, the
417 417 index will be mmapped rather than read if it is larger than the
418 418 configured threshold.
419 419
420 420 If censorable is True, the revlog can have censored revisions.
421 421
422 422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 423 compression for the data content.
424 424 """
425 425
426 426 _flagserrorclass = error.RevlogError
427 427
428 428 def __init__(
429 429 self,
430 430 opener,
431 431 indexfile,
432 432 datafile=None,
433 433 checkambig=False,
434 434 mmaplargeindex=False,
435 435 censorable=False,
436 436 upperboundcomp=None,
437 437 persistentnodemap=False,
438 438 ):
439 439 """
440 440 create a revlog object
441 441
442 442 opener is a function that abstracts the file opening operation
443 443 and can be used to implement COW semantics or the like.
444 444
445 445 """
446 446 self.upperboundcomp = upperboundcomp
447 447 self.indexfile = indexfile
448 448 self.datafile = datafile or (indexfile[:-2] + b".d")
449 449 self.nodemap_file = None
450 450 if persistentnodemap:
451 451 self.nodemap_file = nodemaputil.get_nodemap_file(
452 452 opener, self.indexfile
453 453 )
454 454
455 455 self.opener = opener
456 456 # When True, indexfile is opened with checkambig=True at writing, to
457 457 # avoid file stat ambiguity.
458 458 self._checkambig = checkambig
459 459 self._mmaplargeindex = mmaplargeindex
460 460 self._censorable = censorable
461 461 # 3-tuple of (node, rev, text) for a raw revision.
462 462 self._revisioncache = None
463 463 # Maps rev to chain base rev.
464 464 self._chainbasecache = util.lrucachedict(100)
465 465 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
466 466 self._chunkcache = (0, b'')
467 467 # How much data to read and cache into the raw revlog data cache.
468 468 self._chunkcachesize = 65536
469 469 self._maxchainlen = None
470 470 self._deltabothparents = True
471 471 self.index = None
472 472 self._nodemap_docket = None
473 473 # Mapping of partial identifiers to full nodes.
474 474 self._pcache = {}
475 475 # Mapping of revision integer to full node.
476 476 self._compengine = b'zlib'
477 477 self._compengineopts = {}
478 478 self._maxdeltachainspan = -1
479 479 self._withsparseread = False
480 480 self._sparserevlog = False
481 481 self._srdensitythreshold = 0.50
482 482 self._srmingapsize = 262144
483 483
484 484 # Make copy of flag processors so each revlog instance can support
485 485 # custom flags.
486 486 self._flagprocessors = dict(flagutil.flagprocessors)
487 487
488 488 # 2-tuple of file handles being used for active writing.
489 489 self._writinghandles = None
490 490
491 491 self._loadindex()
492 492
493 493 def _loadindex(self):
494 494 mmapindexthreshold = None
495 495 opts = self.opener.options
496 496
497 497 if b'revlogv2' in opts:
498 498 newversionflags = REVLOGV2 | FLAG_INLINE_DATA
499 499 elif b'revlogv1' in opts:
500 500 newversionflags = REVLOGV1 | FLAG_INLINE_DATA
501 501 if b'generaldelta' in opts:
502 502 newversionflags |= FLAG_GENERALDELTA
503 503 elif b'revlogv0' in self.opener.options:
504 504 newversionflags = REVLOGV0
505 505 else:
506 506 newversionflags = REVLOG_DEFAULT_VERSION
507 507
508 508 if b'chunkcachesize' in opts:
509 509 self._chunkcachesize = opts[b'chunkcachesize']
510 510 if b'maxchainlen' in opts:
511 511 self._maxchainlen = opts[b'maxchainlen']
512 512 if b'deltabothparents' in opts:
513 513 self._deltabothparents = opts[b'deltabothparents']
514 514 self._lazydelta = bool(opts.get(b'lazydelta', True))
515 515 self._lazydeltabase = False
516 516 if self._lazydelta:
517 517 self._lazydeltabase = bool(opts.get(b'lazydeltabase', False))
518 518 if b'compengine' in opts:
519 519 self._compengine = opts[b'compengine']
520 520 if b'zlib.level' in opts:
521 521 self._compengineopts[b'zlib.level'] = opts[b'zlib.level']
522 522 if b'zstd.level' in opts:
523 523 self._compengineopts[b'zstd.level'] = opts[b'zstd.level']
524 524 if b'maxdeltachainspan' in opts:
525 525 self._maxdeltachainspan = opts[b'maxdeltachainspan']
526 526 if self._mmaplargeindex and b'mmapindexthreshold' in opts:
527 527 mmapindexthreshold = opts[b'mmapindexthreshold']
528 528 self.hassidedata = bool(opts.get(b'side-data', False))
529 529 if self.hassidedata:
530 530 self._flagprocessors[REVIDX_SIDEDATA] = sidedatautil.processors
531 531 self._sparserevlog = bool(opts.get(b'sparse-revlog', False))
532 532 withsparseread = bool(opts.get(b'with-sparse-read', False))
533 533 # sparse-revlog forces sparse-read
534 534 self._withsparseread = self._sparserevlog or withsparseread
535 535 if b'sparse-read-density-threshold' in opts:
536 536 self._srdensitythreshold = opts[b'sparse-read-density-threshold']
537 537 if b'sparse-read-min-gap-size' in opts:
538 538 self._srmingapsize = opts[b'sparse-read-min-gap-size']
539 539 if opts.get(b'enableellipsis'):
540 540 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
541 541
542 542 # revlog v0 doesn't have flag processors
543 543 for flag, processor in pycompat.iteritems(
544 544 opts.get(b'flagprocessors', {})
545 545 ):
546 546 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
547 547
548 548 if self._chunkcachesize <= 0:
549 549 raise error.RevlogError(
550 550 _(b'revlog chunk cache size %r is not greater than 0')
551 551 % self._chunkcachesize
552 552 )
553 553 elif self._chunkcachesize & (self._chunkcachesize - 1):
554 554 raise error.RevlogError(
555 555 _(b'revlog chunk cache size %r is not a power of 2')
556 556 % self._chunkcachesize
557 557 )
558 558
559 559 indexdata = b''
560 560 self._initempty = True
561 561 try:
562 562 with self._indexfp() as f:
563 563 if (
564 564 mmapindexthreshold is not None
565 565 and self.opener.fstat(f).st_size >= mmapindexthreshold
566 566 ):
567 567 # TODO: should .close() to release resources without
568 568 # relying on Python GC
569 569 indexdata = util.buffer(util.mmapread(f))
570 570 else:
571 571 indexdata = f.read()
572 572 if len(indexdata) > 0:
573 573 versionflags = versionformat_unpack(indexdata[:4])[0]
574 574 self._initempty = False
575 575 else:
576 576 versionflags = newversionflags
577 577 except IOError as inst:
578 578 if inst.errno != errno.ENOENT:
579 579 raise
580 580
581 581 versionflags = newversionflags
582 582
583 583 self.version = versionflags
584 584
585 585 flags = versionflags & ~0xFFFF
586 586 fmt = versionflags & 0xFFFF
587 587
588 588 if fmt == REVLOGV0:
589 589 if flags:
590 590 raise error.RevlogError(
591 591 _(b'unknown flags (%#04x) in version %d revlog %s')
592 592 % (flags >> 16, fmt, self.indexfile)
593 593 )
594 594
595 595 self._inline = False
596 596 self._generaldelta = False
597 597
598 598 elif fmt == REVLOGV1:
599 599 if flags & ~REVLOGV1_FLAGS:
600 600 raise error.RevlogError(
601 601 _(b'unknown flags (%#04x) in version %d revlog %s')
602 602 % (flags >> 16, fmt, self.indexfile)
603 603 )
604 604
605 605 self._inline = versionflags & FLAG_INLINE_DATA
606 606 self._generaldelta = versionflags & FLAG_GENERALDELTA
607 607
608 608 elif fmt == REVLOGV2:
609 609 if flags & ~REVLOGV2_FLAGS:
610 610 raise error.RevlogError(
611 611 _(b'unknown flags (%#04x) in version %d revlog %s')
612 612 % (flags >> 16, fmt, self.indexfile)
613 613 )
614 614
615 615 self._inline = versionflags & FLAG_INLINE_DATA
616 616 # generaldelta implied by version 2 revlogs.
617 617 self._generaldelta = True
618 618
619 619 else:
620 620 raise error.RevlogError(
621 621 _(b'unknown version (%d) in revlog %s') % (fmt, self.indexfile)
622 622 )
623 623 # sparse-revlog can't be on without general-delta (issue6056)
624 624 if not self._generaldelta:
625 625 self._sparserevlog = False
626 626
627 627 self._storedeltachains = True
628 628
629 629 devel_nodemap = (
630 630 self.nodemap_file
631 631 and opts.get(b'devel-force-nodemap', False)
632 632 and NodemapRevlogIO is not None
633 633 )
634 634
635 635 use_rust_index = False
636 636 if rustrevlog is not None:
637 637 if self.nodemap_file is not None:
638 638 use_rust_index = True
639 639 else:
640 640 use_rust_index = self.opener.options.get(b'rust.index')
641 641
642 642 self._io = revlogio()
643 643 if self.version == REVLOGV0:
644 644 self._io = revlogoldio()
645 645 elif devel_nodemap:
646 646 self._io = NodemapRevlogIO()
647 647 elif use_rust_index:
648 648 self._io = rustrevlogio()
649 649 try:
650 650 d = self._io.parseindex(indexdata, self._inline)
651 651 index, _chunkcache = d
652 652 use_nodemap = (
653 653 not self._inline
654 654 and self.nodemap_file is not None
655 655 and util.safehasattr(index, 'update_nodemap_data')
656 656 )
657 657 if use_nodemap:
658 658 nodemap_data = nodemaputil.persisted_data(self)
659 659 if nodemap_data is not None:
660 660 docket = nodemap_data[0]
661 661 if (
662 662 len(d[0]) > docket.tip_rev
663 663 and d[0][docket.tip_rev][7] == docket.tip_node
664 664 ):
665 665 # no changelog tampering
666 666 self._nodemap_docket = docket
667 667 index.update_nodemap_data(*nodemap_data)
668 668 except (ValueError, IndexError):
669 669 raise error.RevlogError(
670 670 _(b"index %s is corrupted") % self.indexfile
671 671 )
672 672 self.index, self._chunkcache = d
673 673 if not self._chunkcache:
674 674 self._chunkclear()
675 675 # revnum -> (chain-length, sum-delta-length)
676 676 self._chaininfocache = util.lrucachedict(500)
677 677 # revlog header -> revlog compressor
678 678 self._decompressors = {}
679 679
680 680 @util.propertycache
681 681 def _compressor(self):
682 682 engine = util.compengines[self._compengine]
683 683 return engine.revlogcompressor(self._compengineopts)
684 684
685 685 def _indexfp(self, mode=b'r'):
686 686 """file object for the revlog's index file"""
687 687 args = {'mode': mode}
688 688 if mode != b'r':
689 689 args['checkambig'] = self._checkambig
690 690 if mode == b'w':
691 691 args['atomictemp'] = True
692 692 return self.opener(self.indexfile, **args)
693 693
694 694 def _datafp(self, mode=b'r'):
695 695 """file object for the revlog's data file"""
696 696 return self.opener(self.datafile, mode=mode)
697 697
698 698 @contextlib.contextmanager
699 699 def _datareadfp(self, existingfp=None):
700 700 """file object suitable to read data"""
701 701 # Use explicit file handle, if given.
702 702 if existingfp is not None:
703 703 yield existingfp
704 704
705 705 # Use a file handle being actively used for writes, if available.
706 706 # There is some danger to doing this because reads will seek the
707 707 # file. However, _writeentry() performs a SEEK_END before all writes,
708 708 # so we should be safe.
709 709 elif self._writinghandles:
710 710 if self._inline:
711 711 yield self._writinghandles[0]
712 712 else:
713 713 yield self._writinghandles[1]
714 714
715 715 # Otherwise open a new file handle.
716 716 else:
717 717 if self._inline:
718 718 func = self._indexfp
719 719 else:
720 720 func = self._datafp
721 721 with func() as fp:
722 722 yield fp
723 723
724 724 def tiprev(self):
725 725 return len(self.index) - 1
726 726
727 727 def tip(self):
728 728 return self.node(self.tiprev())
729 729
730 730 def __contains__(self, rev):
731 731 return 0 <= rev < len(self)
732 732
733 733 def __len__(self):
734 734 return len(self.index)
735 735
736 736 def __iter__(self):
737 737 return iter(pycompat.xrange(len(self)))
738 738
739 739 def revs(self, start=0, stop=None):
740 740 """iterate over all rev in this revlog (from start to stop)"""
741 741 return storageutil.iterrevs(len(self), start=start, stop=stop)
742 742
743 743 @property
744 744 def nodemap(self):
745 745 msg = (
746 746 b"revlog.nodemap is deprecated, "
747 747 b"use revlog.index.[has_node|rev|get_rev]"
748 748 )
749 749 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
750 750 return self.index.nodemap
751 751
752 752 @property
753 753 def _nodecache(self):
754 754 msg = b"revlog._nodecache is deprecated, use revlog.index.nodemap"
755 755 util.nouideprecwarn(msg, b'5.3', stacklevel=2)
756 756 return self.index.nodemap
757 757
758 758 def hasnode(self, node):
759 759 try:
760 760 self.rev(node)
761 761 return True
762 762 except KeyError:
763 763 return False
764 764
765 765 def candelta(self, baserev, rev):
766 766 """whether two revisions (baserev, rev) can be delta-ed or not"""
767 767 # Disable delta if either rev requires a content-changing flag
768 768 # processor (ex. LFS). This is because such flag processor can alter
769 769 # the rawtext content that the delta will be based on, and two clients
770 770 # could have a same revlog node with different flags (i.e. different
771 771 # rawtext contents) and the delta could be incompatible.
772 772 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
773 773 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
774 774 ):
775 775 return False
776 776 return True
777 777
778 778 def update_caches(self, transaction):
779 779 if self.nodemap_file is not None:
780 780 if transaction is None:
781 781 nodemaputil.update_persistent_nodemap(self)
782 782 else:
783 783 nodemaputil.setup_persistent_nodemap(transaction, self)
784 784
785 785 def clearcaches(self):
786 786 self._revisioncache = None
787 787 self._chainbasecache.clear()
788 788 self._chunkcache = (0, b'')
789 789 self._pcache = {}
790 790 self._nodemap_docket = None
791 791 self.index.clearcaches()
792 792 # The python code is the one responsible for validating the docket, we
793 793 # end up having to refresh it here.
794 794 use_nodemap = (
795 795 not self._inline
796 796 and self.nodemap_file is not None
797 797 and util.safehasattr(self.index, 'update_nodemap_data')
798 798 )
799 799 if use_nodemap:
800 800 nodemap_data = nodemaputil.persisted_data(self)
801 801 if nodemap_data is not None:
802 802 self._nodemap_docket = nodemap_data[0]
803 803 self.index.update_nodemap_data(*nodemap_data)
804 804
805 805 def rev(self, node):
806 806 try:
807 807 return self.index.rev(node)
808 808 except TypeError:
809 809 raise
810 810 except error.RevlogError:
811 811 # parsers.c radix tree lookup failed
812 812 if node == wdirid or node in wdirfilenodeids:
813 813 raise error.WdirUnsupported
814 814 raise error.LookupError(node, self.indexfile, _(b'no node'))
815 815
816 816 # Accessors for index entries.
817 817
818 818 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
819 819 # are flags.
820 820 def start(self, rev):
821 821 return int(self.index[rev][0] >> 16)
822 822
823 823 def flags(self, rev):
824 824 return self.index[rev][0] & 0xFFFF
825 825
826 826 def length(self, rev):
827 827 return self.index[rev][1]
828 828
829 829 def rawsize(self, rev):
830 830 """return the length of the uncompressed text for a given revision"""
831 831 l = self.index[rev][2]
832 832 if l >= 0:
833 833 return l
834 834
835 835 t = self.rawdata(rev)
836 836 return len(t)
837 837
838 838 def size(self, rev):
839 839 """length of non-raw text (processed by a "read" flag processor)"""
840 840 # fast path: if no "read" flag processor could change the content,
841 841 # size is rawsize. note: ELLIPSIS is known to not change the content.
842 842 flags = self.flags(rev)
843 843 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
844 844 return self.rawsize(rev)
845 845
846 846 return len(self.revision(rev, raw=False))
847 847
848 848 def chainbase(self, rev):
849 849 base = self._chainbasecache.get(rev)
850 850 if base is not None:
851 851 return base
852 852
853 853 index = self.index
854 854 iterrev = rev
855 855 base = index[iterrev][3]
856 856 while base != iterrev:
857 857 iterrev = base
858 858 base = index[iterrev][3]
859 859
860 860 self._chainbasecache[rev] = base
861 861 return base
862 862
863 863 def linkrev(self, rev):
864 864 return self.index[rev][4]
865 865
866 866 def parentrevs(self, rev):
867 867 try:
868 868 entry = self.index[rev]
869 869 except IndexError:
870 870 if rev == wdirrev:
871 871 raise error.WdirUnsupported
872 872 raise
873 873
874 874 return entry[5], entry[6]
875 875
876 876 # fast parentrevs(rev) where rev isn't filtered
877 877 _uncheckedparentrevs = parentrevs
878 878
879 879 def node(self, rev):
880 880 try:
881 881 return self.index[rev][7]
882 882 except IndexError:
883 883 if rev == wdirrev:
884 884 raise error.WdirUnsupported
885 885 raise
886 886
887 887 # Derived from index values.
888 888
889 889 def end(self, rev):
890 890 return self.start(rev) + self.length(rev)
891 891
892 892 def parents(self, node):
893 893 i = self.index
894 894 d = i[self.rev(node)]
895 895 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
896 896
897 897 def chainlen(self, rev):
898 898 return self._chaininfo(rev)[0]
899 899
900 900 def _chaininfo(self, rev):
901 901 chaininfocache = self._chaininfocache
902 902 if rev in chaininfocache:
903 903 return chaininfocache[rev]
904 904 index = self.index
905 905 generaldelta = self._generaldelta
906 906 iterrev = rev
907 907 e = index[iterrev]
908 908 clen = 0
909 909 compresseddeltalen = 0
910 910 while iterrev != e[3]:
911 911 clen += 1
912 912 compresseddeltalen += e[1]
913 913 if generaldelta:
914 914 iterrev = e[3]
915 915 else:
916 916 iterrev -= 1
917 917 if iterrev in chaininfocache:
918 918 t = chaininfocache[iterrev]
919 919 clen += t[0]
920 920 compresseddeltalen += t[1]
921 921 break
922 922 e = index[iterrev]
923 923 else:
924 924 # Add text length of base since decompressing that also takes
925 925 # work. For cache hits the length is already included.
926 926 compresseddeltalen += e[1]
927 927 r = (clen, compresseddeltalen)
928 928 chaininfocache[rev] = r
929 929 return r
930 930
931 931 def _deltachain(self, rev, stoprev=None):
932 932 """Obtain the delta chain for a revision.
933 933
934 934 ``stoprev`` specifies a revision to stop at. If not specified, we
935 935 stop at the base of the chain.
936 936
937 937 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
938 938 revs in ascending order and ``stopped`` is a bool indicating whether
939 939 ``stoprev`` was hit.
940 940 """
941 941 # Try C implementation.
942 942 try:
943 943 return self.index.deltachain(rev, stoprev, self._generaldelta)
944 944 except AttributeError:
945 945 pass
946 946
947 947 chain = []
948 948
949 949 # Alias to prevent attribute lookup in tight loop.
950 950 index = self.index
951 951 generaldelta = self._generaldelta
952 952
953 953 iterrev = rev
954 954 e = index[iterrev]
955 955 while iterrev != e[3] and iterrev != stoprev:
956 956 chain.append(iterrev)
957 957 if generaldelta:
958 958 iterrev = e[3]
959 959 else:
960 960 iterrev -= 1
961 961 e = index[iterrev]
962 962
963 963 if iterrev == stoprev:
964 964 stopped = True
965 965 else:
966 966 chain.append(iterrev)
967 967 stopped = False
968 968
969 969 chain.reverse()
970 970 return chain, stopped
971 971
972 972 def ancestors(self, revs, stoprev=0, inclusive=False):
973 973 """Generate the ancestors of 'revs' in reverse revision order.
974 974 Does not generate revs lower than stoprev.
975 975
976 976 See the documentation for ancestor.lazyancestors for more details."""
977 977
978 978 # first, make sure start revisions aren't filtered
979 979 revs = list(revs)
980 980 checkrev = self.node
981 981 for r in revs:
982 982 checkrev(r)
983 983 # and we're sure ancestors aren't filtered as well
984 984
985 985 if rustancestor is not None:
986 986 lazyancestors = rustancestor.LazyAncestors
987 987 arg = self.index
988 988 else:
989 989 lazyancestors = ancestor.lazyancestors
990 990 arg = self._uncheckedparentrevs
991 991 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
992 992
993 993 def descendants(self, revs):
994 994 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
995 995
996 996 def findcommonmissing(self, common=None, heads=None):
997 997 """Return a tuple of the ancestors of common and the ancestors of heads
998 998 that are not ancestors of common. In revset terminology, we return the
999 999 tuple:
1000 1000
1001 1001 ::common, (::heads) - (::common)
1002 1002
1003 1003 The list is sorted by revision number, meaning it is
1004 1004 topologically sorted.
1005 1005
1006 1006 'heads' and 'common' are both lists of node IDs. If heads is
1007 1007 not supplied, uses all of the revlog's heads. If common is not
1008 1008 supplied, uses nullid."""
1009 1009 if common is None:
1010 1010 common = [nullid]
1011 1011 if heads is None:
1012 1012 heads = self.heads()
1013 1013
1014 1014 common = [self.rev(n) for n in common]
1015 1015 heads = [self.rev(n) for n in heads]
1016 1016
1017 1017 # we want the ancestors, but inclusive
1018 1018 class lazyset(object):
1019 1019 def __init__(self, lazyvalues):
1020 1020 self.addedvalues = set()
1021 1021 self.lazyvalues = lazyvalues
1022 1022
1023 1023 def __contains__(self, value):
1024 1024 return value in self.addedvalues or value in self.lazyvalues
1025 1025
1026 1026 def __iter__(self):
1027 1027 added = self.addedvalues
1028 1028 for r in added:
1029 1029 yield r
1030 1030 for r in self.lazyvalues:
1031 1031 if not r in added:
1032 1032 yield r
1033 1033
1034 1034 def add(self, value):
1035 1035 self.addedvalues.add(value)
1036 1036
1037 1037 def update(self, values):
1038 1038 self.addedvalues.update(values)
1039 1039
1040 1040 has = lazyset(self.ancestors(common))
1041 1041 has.add(nullrev)
1042 1042 has.update(common)
1043 1043
1044 1044 # take all ancestors from heads that aren't in has
1045 1045 missing = set()
1046 1046 visit = collections.deque(r for r in heads if r not in has)
1047 1047 while visit:
1048 1048 r = visit.popleft()
1049 1049 if r in missing:
1050 1050 continue
1051 1051 else:
1052 1052 missing.add(r)
1053 1053 for p in self.parentrevs(r):
1054 1054 if p not in has:
1055 1055 visit.append(p)
1056 1056 missing = list(missing)
1057 1057 missing.sort()
1058 1058 return has, [self.node(miss) for miss in missing]
1059 1059
1060 1060 def incrementalmissingrevs(self, common=None):
1061 1061 """Return an object that can be used to incrementally compute the
1062 1062 revision numbers of the ancestors of arbitrary sets that are not
1063 1063 ancestors of common. This is an ancestor.incrementalmissingancestors
1064 1064 object.
1065 1065
1066 1066 'common' is a list of revision numbers. If common is not supplied, uses
1067 1067 nullrev.
1068 1068 """
1069 1069 if common is None:
1070 1070 common = [nullrev]
1071 1071
1072 1072 if rustancestor is not None:
1073 1073 return rustancestor.MissingAncestors(self.index, common)
1074 1074 return ancestor.incrementalmissingancestors(self.parentrevs, common)
1075 1075
1076 1076 def findmissingrevs(self, common=None, heads=None):
1077 1077 """Return the revision numbers of the ancestors of heads that
1078 1078 are not ancestors of common.
1079 1079
1080 1080 More specifically, return a list of revision numbers corresponding to
1081 1081 nodes N such that every N satisfies the following constraints:
1082 1082
1083 1083 1. N is an ancestor of some node in 'heads'
1084 1084 2. N is not an ancestor of any node in 'common'
1085 1085
1086 1086 The list is sorted by revision number, meaning it is
1087 1087 topologically sorted.
1088 1088
1089 1089 'heads' and 'common' are both lists of revision numbers. If heads is
1090 1090 not supplied, uses all of the revlog's heads. If common is not
1091 1091 supplied, uses nullid."""
1092 1092 if common is None:
1093 1093 common = [nullrev]
1094 1094 if heads is None:
1095 1095 heads = self.headrevs()
1096 1096
1097 1097 inc = self.incrementalmissingrevs(common=common)
1098 1098 return inc.missingancestors(heads)
1099 1099
1100 1100 def findmissing(self, common=None, heads=None):
1101 1101 """Return the ancestors of heads that are not ancestors of common.
1102 1102
1103 1103 More specifically, return a list of nodes N such that every N
1104 1104 satisfies the following constraints:
1105 1105
1106 1106 1. N is an ancestor of some node in 'heads'
1107 1107 2. N is not an ancestor of any node in 'common'
1108 1108
1109 1109 The list is sorted by revision number, meaning it is
1110 1110 topologically sorted.
1111 1111
1112 1112 'heads' and 'common' are both lists of node IDs. If heads is
1113 1113 not supplied, uses all of the revlog's heads. If common is not
1114 1114 supplied, uses nullid."""
1115 1115 if common is None:
1116 1116 common = [nullid]
1117 1117 if heads is None:
1118 1118 heads = self.heads()
1119 1119
1120 1120 common = [self.rev(n) for n in common]
1121 1121 heads = [self.rev(n) for n in heads]
1122 1122
1123 1123 inc = self.incrementalmissingrevs(common=common)
1124 1124 return [self.node(r) for r in inc.missingancestors(heads)]
1125 1125
1126 1126 def nodesbetween(self, roots=None, heads=None):
1127 1127 """Return a topological path from 'roots' to 'heads'.
1128 1128
1129 1129 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
1130 1130 topologically sorted list of all nodes N that satisfy both of
1131 1131 these constraints:
1132 1132
1133 1133 1. N is a descendant of some node in 'roots'
1134 1134 2. N is an ancestor of some node in 'heads'
1135 1135
1136 1136 Every node is considered to be both a descendant and an ancestor
1137 1137 of itself, so every reachable node in 'roots' and 'heads' will be
1138 1138 included in 'nodes'.
1139 1139
1140 1140 'outroots' is the list of reachable nodes in 'roots', i.e., the
1141 1141 subset of 'roots' that is returned in 'nodes'. Likewise,
1142 1142 'outheads' is the subset of 'heads' that is also in 'nodes'.
1143 1143
1144 1144 'roots' and 'heads' are both lists of node IDs. If 'roots' is
1145 1145 unspecified, uses nullid as the only root. If 'heads' is
1146 1146 unspecified, uses list of all of the revlog's heads."""
1147 1147 nonodes = ([], [], [])
1148 1148 if roots is not None:
1149 1149 roots = list(roots)
1150 1150 if not roots:
1151 1151 return nonodes
1152 1152 lowestrev = min([self.rev(n) for n in roots])
1153 1153 else:
1154 1154 roots = [nullid] # Everybody's a descendant of nullid
1155 1155 lowestrev = nullrev
1156 1156 if (lowestrev == nullrev) and (heads is None):
1157 1157 # We want _all_ the nodes!
1158 1158 return ([self.node(r) for r in self], [nullid], list(self.heads()))
1159 1159 if heads is None:
1160 1160 # All nodes are ancestors, so the latest ancestor is the last
1161 1161 # node.
1162 1162 highestrev = len(self) - 1
1163 1163 # Set ancestors to None to signal that every node is an ancestor.
1164 1164 ancestors = None
1165 1165 # Set heads to an empty dictionary for later discovery of heads
1166 1166 heads = {}
1167 1167 else:
1168 1168 heads = list(heads)
1169 1169 if not heads:
1170 1170 return nonodes
1171 1171 ancestors = set()
1172 1172 # Turn heads into a dictionary so we can remove 'fake' heads.
1173 1173 # Also, later we will be using it to filter out the heads we can't
1174 1174 # find from roots.
1175 1175 heads = dict.fromkeys(heads, False)
1176 1176 # Start at the top and keep marking parents until we're done.
1177 1177 nodestotag = set(heads)
1178 1178 # Remember where the top was so we can use it as a limit later.
1179 1179 highestrev = max([self.rev(n) for n in nodestotag])
1180 1180 while nodestotag:
1181 1181 # grab a node to tag
1182 1182 n = nodestotag.pop()
1183 1183 # Never tag nullid
1184 1184 if n == nullid:
1185 1185 continue
1186 1186 # A node's revision number represents its place in a
1187 1187 # topologically sorted list of nodes.
1188 1188 r = self.rev(n)
1189 1189 if r >= lowestrev:
1190 1190 if n not in ancestors:
1191 1191 # If we are possibly a descendant of one of the roots
1192 1192 # and we haven't already been marked as an ancestor
1193 1193 ancestors.add(n) # Mark as ancestor
1194 1194 # Add non-nullid parents to list of nodes to tag.
1195 1195 nodestotag.update(
1196 1196 [p for p in self.parents(n) if p != nullid]
1197 1197 )
1198 1198 elif n in heads: # We've seen it before, is it a fake head?
1199 1199 # So it is, real heads should not be the ancestors of
1200 1200 # any other heads.
1201 1201 heads.pop(n)
1202 1202 if not ancestors:
1203 1203 return nonodes
1204 1204 # Now that we have our set of ancestors, we want to remove any
1205 1205 # roots that are not ancestors.
1206 1206
1207 1207 # If one of the roots was nullid, everything is included anyway.
1208 1208 if lowestrev > nullrev:
1209 1209 # But, since we weren't, let's recompute the lowest rev to not
1210 1210 # include roots that aren't ancestors.
1211 1211
1212 1212 # Filter out roots that aren't ancestors of heads
1213 1213 roots = [root for root in roots if root in ancestors]
1214 1214 # Recompute the lowest revision
1215 1215 if roots:
1216 1216 lowestrev = min([self.rev(root) for root in roots])
1217 1217 else:
1218 1218 # No more roots? Return empty list
1219 1219 return nonodes
1220 1220 else:
1221 1221 # We are descending from nullid, and don't need to care about
1222 1222 # any other roots.
1223 1223 lowestrev = nullrev
1224 1224 roots = [nullid]
1225 1225 # Transform our roots list into a set.
1226 1226 descendants = set(roots)
1227 1227 # Also, keep the original roots so we can filter out roots that aren't
1228 1228 # 'real' roots (i.e. are descended from other roots).
1229 1229 roots = descendants.copy()
1230 1230 # Our topologically sorted list of output nodes.
1231 1231 orderedout = []
1232 1232 # Don't start at nullid since we don't want nullid in our output list,
1233 1233 # and if nullid shows up in descendants, empty parents will look like
1234 1234 # they're descendants.
1235 1235 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1236 1236 n = self.node(r)
1237 1237 isdescendant = False
1238 1238 if lowestrev == nullrev: # Everybody is a descendant of nullid
1239 1239 isdescendant = True
1240 1240 elif n in descendants:
1241 1241 # n is already a descendant
1242 1242 isdescendant = True
1243 1243 # This check only needs to be done here because all the roots
1244 1244 # will start being marked is descendants before the loop.
1245 1245 if n in roots:
1246 1246 # If n was a root, check if it's a 'real' root.
1247 1247 p = tuple(self.parents(n))
1248 1248 # If any of its parents are descendants, it's not a root.
1249 1249 if (p[0] in descendants) or (p[1] in descendants):
1250 1250 roots.remove(n)
1251 1251 else:
1252 1252 p = tuple(self.parents(n))
1253 1253 # A node is a descendant if either of its parents are
1254 1254 # descendants. (We seeded the dependents list with the roots
1255 1255 # up there, remember?)
1256 1256 if (p[0] in descendants) or (p[1] in descendants):
1257 1257 descendants.add(n)
1258 1258 isdescendant = True
1259 1259 if isdescendant and ((ancestors is None) or (n in ancestors)):
1260 1260 # Only include nodes that are both descendants and ancestors.
1261 1261 orderedout.append(n)
1262 1262 if (ancestors is not None) and (n in heads):
1263 1263 # We're trying to figure out which heads are reachable
1264 1264 # from roots.
1265 1265 # Mark this head as having been reached
1266 1266 heads[n] = True
1267 1267 elif ancestors is None:
1268 1268 # Otherwise, we're trying to discover the heads.
1269 1269 # Assume this is a head because if it isn't, the next step
1270 1270 # will eventually remove it.
1271 1271 heads[n] = True
1272 1272 # But, obviously its parents aren't.
1273 1273 for p in self.parents(n):
1274 1274 heads.pop(p, None)
1275 1275 heads = [head for head, flag in pycompat.iteritems(heads) if flag]
1276 1276 roots = list(roots)
1277 1277 assert orderedout
1278 1278 assert roots
1279 1279 assert heads
1280 1280 return (orderedout, roots, heads)
1281 1281
1282 1282 def headrevs(self, revs=None):
1283 1283 if revs is None:
1284 1284 try:
1285 1285 return self.index.headrevs()
1286 1286 except AttributeError:
1287 1287 return self._headrevs()
1288 1288 if rustdagop is not None:
1289 1289 return rustdagop.headrevs(self.index, revs)
1290 1290 return dagop.headrevs(revs, self._uncheckedparentrevs)
1291 1291
1292 1292 def computephases(self, roots):
1293 1293 return self.index.computephasesmapsets(roots)
1294 1294
1295 1295 def _headrevs(self):
1296 1296 count = len(self)
1297 1297 if not count:
1298 1298 return [nullrev]
1299 1299 # we won't iter over filtered rev so nobody is a head at start
1300 1300 ishead = [0] * (count + 1)
1301 1301 index = self.index
1302 1302 for r in self:
1303 1303 ishead[r] = 1 # I may be an head
1304 1304 e = index[r]
1305 1305 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1306 1306 return [r for r, val in enumerate(ishead) if val]
1307 1307
1308 1308 def heads(self, start=None, stop=None):
1309 1309 """return the list of all nodes that have no children
1310 1310
1311 1311 if start is specified, only heads that are descendants of
1312 1312 start will be returned
1313 1313 if stop is specified, it will consider all the revs from stop
1314 1314 as if they had no children
1315 1315 """
1316 1316 if start is None and stop is None:
1317 1317 if not len(self):
1318 1318 return [nullid]
1319 1319 return [self.node(r) for r in self.headrevs()]
1320 1320
1321 1321 if start is None:
1322 1322 start = nullrev
1323 1323 else:
1324 1324 start = self.rev(start)
1325 1325
1326 1326 stoprevs = {self.rev(n) for n in stop or []}
1327 1327
1328 1328 revs = dagop.headrevssubset(
1329 1329 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
1330 1330 )
1331 1331
1332 1332 return [self.node(rev) for rev in revs]
1333 1333
1334 1334 def children(self, node):
1335 1335 """find the children of a given node"""
1336 1336 c = []
1337 1337 p = self.rev(node)
1338 1338 for r in self.revs(start=p + 1):
1339 1339 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1340 1340 if prevs:
1341 1341 for pr in prevs:
1342 1342 if pr == p:
1343 1343 c.append(self.node(r))
1344 1344 elif p == nullrev:
1345 1345 c.append(self.node(r))
1346 1346 return c
1347 1347
1348 1348 def commonancestorsheads(self, a, b):
1349 1349 """calculate all the heads of the common ancestors of nodes a and b"""
1350 1350 a, b = self.rev(a), self.rev(b)
1351 1351 ancs = self._commonancestorsheads(a, b)
1352 1352 return pycompat.maplist(self.node, ancs)
1353 1353
1354 1354 def _commonancestorsheads(self, *revs):
1355 1355 """calculate all the heads of the common ancestors of revs"""
1356 1356 try:
1357 1357 ancs = self.index.commonancestorsheads(*revs)
1358 1358 except (AttributeError, OverflowError): # C implementation failed
1359 1359 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1360 1360 return ancs
1361 1361
1362 1362 def isancestor(self, a, b):
1363 1363 """return True if node a is an ancestor of node b
1364 1364
1365 1365 A revision is considered an ancestor of itself."""
1366 1366 a, b = self.rev(a), self.rev(b)
1367 1367 return self.isancestorrev(a, b)
1368 1368
1369 1369 def isancestorrev(self, a, b):
1370 1370 """return True if revision a is an ancestor of revision b
1371 1371
1372 1372 A revision is considered an ancestor of itself.
1373 1373
1374 1374 The implementation of this is trivial but the use of
1375 1375 reachableroots is not."""
1376 1376 if a == nullrev:
1377 1377 return True
1378 1378 elif a == b:
1379 1379 return True
1380 1380 elif a > b:
1381 1381 return False
1382 1382 return bool(self.reachableroots(a, [b], [a], includepath=False))
1383 1383
1384 1384 def reachableroots(self, minroot, heads, roots, includepath=False):
1385 1385 """return (heads(::(<roots> and <roots>::<heads>)))
1386 1386
1387 1387 If includepath is True, return (<roots>::<heads>)."""
1388 1388 try:
1389 1389 return self.index.reachableroots2(
1390 1390 minroot, heads, roots, includepath
1391 1391 )
1392 1392 except AttributeError:
1393 1393 return dagop._reachablerootspure(
1394 1394 self.parentrevs, minroot, roots, heads, includepath
1395 1395 )
1396 1396
1397 1397 def ancestor(self, a, b):
1398 1398 """calculate the "best" common ancestor of nodes a and b"""
1399 1399
1400 1400 a, b = self.rev(a), self.rev(b)
1401 1401 try:
1402 1402 ancs = self.index.ancestors(a, b)
1403 1403 except (AttributeError, OverflowError):
1404 1404 ancs = ancestor.ancestors(self.parentrevs, a, b)
1405 1405 if ancs:
1406 1406 # choose a consistent winner when there's a tie
1407 1407 return min(map(self.node, ancs))
1408 1408 return nullid
1409 1409
1410 1410 def _match(self, id):
1411 1411 if isinstance(id, int):
1412 1412 # rev
1413 1413 return self.node(id)
1414 1414 if len(id) == 20:
1415 1415 # possibly a binary node
1416 1416 # odds of a binary node being all hex in ASCII are 1 in 10**25
1417 1417 try:
1418 1418 node = id
1419 1419 self.rev(node) # quick search the index
1420 1420 return node
1421 1421 except error.LookupError:
1422 1422 pass # may be partial hex id
1423 1423 try:
1424 1424 # str(rev)
1425 1425 rev = int(id)
1426 1426 if b"%d" % rev != id:
1427 1427 raise ValueError
1428 1428 if rev < 0:
1429 1429 rev = len(self) + rev
1430 1430 if rev < 0 or rev >= len(self):
1431 1431 raise ValueError
1432 1432 return self.node(rev)
1433 1433 except (ValueError, OverflowError):
1434 1434 pass
1435 1435 if len(id) == 40:
1436 1436 try:
1437 1437 # a full hex nodeid?
1438 1438 node = bin(id)
1439 1439 self.rev(node)
1440 1440 return node
1441 1441 except (TypeError, error.LookupError):
1442 1442 pass
1443 1443
1444 1444 def _partialmatch(self, id):
1445 1445 # we don't care wdirfilenodeids as they should be always full hash
1446 1446 maybewdir = wdirhex.startswith(id)
1447 1447 try:
1448 1448 partial = self.index.partialmatch(id)
1449 1449 if partial and self.hasnode(partial):
1450 1450 if maybewdir:
1451 1451 # single 'ff...' match in radix tree, ambiguous with wdir
1452 1452 raise error.RevlogError
1453 1453 return partial
1454 1454 if maybewdir:
1455 1455 # no 'ff...' match in radix tree, wdir identified
1456 1456 raise error.WdirUnsupported
1457 1457 return None
1458 1458 except error.RevlogError:
1459 1459 # parsers.c radix tree lookup gave multiple matches
1460 1460 # fast path: for unfiltered changelog, radix tree is accurate
1461 1461 if not getattr(self, 'filteredrevs', None):
1462 1462 raise error.AmbiguousPrefixLookupError(
1463 1463 id, self.indexfile, _(b'ambiguous identifier')
1464 1464 )
1465 1465 # fall through to slow path that filters hidden revisions
1466 1466 except (AttributeError, ValueError):
1467 1467 # we are pure python, or key was too short to search radix tree
1468 1468 pass
1469 1469
1470 1470 if id in self._pcache:
1471 1471 return self._pcache[id]
1472 1472
1473 1473 if len(id) <= 40:
1474 1474 try:
1475 1475 # hex(node)[:...]
1476 1476 l = len(id) // 2 # grab an even number of digits
1477 1477 prefix = bin(id[: l * 2])
1478 1478 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1479 1479 nl = [
1480 1480 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
1481 1481 ]
1482 1482 if nullhex.startswith(id):
1483 1483 nl.append(nullid)
1484 1484 if len(nl) > 0:
1485 1485 if len(nl) == 1 and not maybewdir:
1486 1486 self._pcache[id] = nl[0]
1487 1487 return nl[0]
1488 1488 raise error.AmbiguousPrefixLookupError(
1489 1489 id, self.indexfile, _(b'ambiguous identifier')
1490 1490 )
1491 1491 if maybewdir:
1492 1492 raise error.WdirUnsupported
1493 1493 return None
1494 1494 except TypeError:
1495 1495 pass
1496 1496
1497 1497 def lookup(self, id):
1498 1498 """locate a node based on:
1499 1499 - revision number or str(revision number)
1500 1500 - nodeid or subset of hex nodeid
1501 1501 """
1502 1502 n = self._match(id)
1503 1503 if n is not None:
1504 1504 return n
1505 1505 n = self._partialmatch(id)
1506 1506 if n:
1507 1507 return n
1508 1508
1509 1509 raise error.LookupError(id, self.indexfile, _(b'no match found'))
1510 1510
1511 1511 def shortest(self, node, minlength=1):
1512 1512 """Find the shortest unambiguous prefix that matches node."""
1513 1513
1514 1514 def isvalid(prefix):
1515 1515 try:
1516 1516 matchednode = self._partialmatch(prefix)
1517 1517 except error.AmbiguousPrefixLookupError:
1518 1518 return False
1519 1519 except error.WdirUnsupported:
1520 1520 # single 'ff...' match
1521 1521 return True
1522 1522 if matchednode is None:
1523 1523 raise error.LookupError(node, self.indexfile, _(b'no node'))
1524 1524 return True
1525 1525
1526 1526 def maybewdir(prefix):
1527 1527 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
1528 1528
1529 1529 hexnode = hex(node)
1530 1530
1531 1531 def disambiguate(hexnode, minlength):
1532 1532 """Disambiguate against wdirid."""
1533 1533 for length in range(minlength, len(hexnode) + 1):
1534 1534 prefix = hexnode[:length]
1535 1535 if not maybewdir(prefix):
1536 1536 return prefix
1537 1537
1538 1538 if not getattr(self, 'filteredrevs', None):
1539 1539 try:
1540 1540 length = max(self.index.shortest(node), minlength)
1541 1541 return disambiguate(hexnode, length)
1542 1542 except error.RevlogError:
1543 1543 if node != wdirid:
1544 1544 raise error.LookupError(node, self.indexfile, _(b'no node'))
1545 1545 except AttributeError:
1546 1546 # Fall through to pure code
1547 1547 pass
1548 1548
1549 1549 if node == wdirid:
1550 1550 for length in range(minlength, len(hexnode) + 1):
1551 1551 prefix = hexnode[:length]
1552 1552 if isvalid(prefix):
1553 1553 return prefix
1554 1554
1555 1555 for length in range(minlength, len(hexnode) + 1):
1556 1556 prefix = hexnode[:length]
1557 1557 if isvalid(prefix):
1558 1558 return disambiguate(hexnode, length)
1559 1559
1560 1560 def cmp(self, node, text):
1561 1561 """compare text with a given file revision
1562 1562
1563 1563 returns True if text is different than what is stored.
1564 1564 """
1565 1565 p1, p2 = self.parents(node)
1566 1566 return storageutil.hashrevisionsha1(text, p1, p2) != node
1567 1567
1568 1568 def _cachesegment(self, offset, data):
1569 1569 """Add a segment to the revlog cache.
1570 1570
1571 1571 Accepts an absolute offset and the data that is at that location.
1572 1572 """
1573 1573 o, d = self._chunkcache
1574 1574 # try to add to existing cache
1575 1575 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1576 1576 self._chunkcache = o, d + data
1577 1577 else:
1578 1578 self._chunkcache = offset, data
1579 1579
1580 1580 def _readsegment(self, offset, length, df=None):
1581 1581 """Load a segment of raw data from the revlog.
1582 1582
1583 1583 Accepts an absolute offset, length to read, and an optional existing
1584 1584 file handle to read from.
1585 1585
1586 1586 If an existing file handle is passed, it will be seeked and the
1587 1587 original seek position will NOT be restored.
1588 1588
1589 1589 Returns a str or buffer of raw byte data.
1590 1590
1591 1591 Raises if the requested number of bytes could not be read.
1592 1592 """
1593 1593 # Cache data both forward and backward around the requested
1594 1594 # data, in a fixed size window. This helps speed up operations
1595 1595 # involving reading the revlog backwards.
1596 1596 cachesize = self._chunkcachesize
1597 1597 realoffset = offset & ~(cachesize - 1)
1598 1598 reallength = (
1599 1599 (offset + length + cachesize) & ~(cachesize - 1)
1600 1600 ) - realoffset
1601 1601 with self._datareadfp(df) as df:
1602 1602 df.seek(realoffset)
1603 1603 d = df.read(reallength)
1604 1604
1605 1605 self._cachesegment(realoffset, d)
1606 1606 if offset != realoffset or reallength != length:
1607 1607 startoffset = offset - realoffset
1608 1608 if len(d) - startoffset < length:
1609 1609 raise error.RevlogError(
1610 1610 _(
1611 1611 b'partial read of revlog %s; expected %d bytes from '
1612 1612 b'offset %d, got %d'
1613 1613 )
1614 1614 % (
1615 1615 self.indexfile if self._inline else self.datafile,
1616 1616 length,
1617 1617 realoffset,
1618 1618 len(d) - startoffset,
1619 1619 )
1620 1620 )
1621 1621
1622 1622 return util.buffer(d, startoffset, length)
1623 1623
1624 1624 if len(d) < length:
1625 1625 raise error.RevlogError(
1626 1626 _(
1627 1627 b'partial read of revlog %s; expected %d bytes from offset '
1628 1628 b'%d, got %d'
1629 1629 )
1630 1630 % (
1631 1631 self.indexfile if self._inline else self.datafile,
1632 1632 length,
1633 1633 offset,
1634 1634 len(d),
1635 1635 )
1636 1636 )
1637 1637
1638 1638 return d
1639 1639
1640 1640 def _getsegment(self, offset, length, df=None):
1641 1641 """Obtain a segment of raw data from the revlog.
1642 1642
1643 1643 Accepts an absolute offset, length of bytes to obtain, and an
1644 1644 optional file handle to the already-opened revlog. If the file
1645 1645 handle is used, it's original seek position will not be preserved.
1646 1646
1647 1647 Requests for data may be returned from a cache.
1648 1648
1649 1649 Returns a str or a buffer instance of raw byte data.
1650 1650 """
1651 1651 o, d = self._chunkcache
1652 1652 l = len(d)
1653 1653
1654 1654 # is it in the cache?
1655 1655 cachestart = offset - o
1656 1656 cacheend = cachestart + length
1657 1657 if cachestart >= 0 and cacheend <= l:
1658 1658 if cachestart == 0 and cacheend == l:
1659 1659 return d # avoid a copy
1660 1660 return util.buffer(d, cachestart, cacheend - cachestart)
1661 1661
1662 1662 return self._readsegment(offset, length, df=df)
1663 1663
1664 1664 def _getsegmentforrevs(self, startrev, endrev, df=None):
1665 1665 """Obtain a segment of raw data corresponding to a range of revisions.
1666 1666
1667 1667 Accepts the start and end revisions and an optional already-open
1668 1668 file handle to be used for reading. If the file handle is read, its
1669 1669 seek position will not be preserved.
1670 1670
1671 1671 Requests for data may be satisfied by a cache.
1672 1672
1673 1673 Returns a 2-tuple of (offset, data) for the requested range of
1674 1674 revisions. Offset is the integer offset from the beginning of the
1675 1675 revlog and data is a str or buffer of the raw byte data.
1676 1676
1677 1677 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1678 1678 to determine where each revision's data begins and ends.
1679 1679 """
1680 1680 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1681 1681 # (functions are expensive).
1682 1682 index = self.index
1683 1683 istart = index[startrev]
1684 1684 start = int(istart[0] >> 16)
1685 1685 if startrev == endrev:
1686 1686 end = start + istart[1]
1687 1687 else:
1688 1688 iend = index[endrev]
1689 1689 end = int(iend[0] >> 16) + iend[1]
1690 1690
1691 1691 if self._inline:
1692 1692 start += (startrev + 1) * self._io.size
1693 1693 end += (endrev + 1) * self._io.size
1694 1694 length = end - start
1695 1695
1696 1696 return start, self._getsegment(start, length, df=df)
1697 1697
1698 1698 def _chunk(self, rev, df=None):
1699 1699 """Obtain a single decompressed chunk for a revision.
1700 1700
1701 1701 Accepts an integer revision and an optional already-open file handle
1702 1702 to be used for reading. If used, the seek position of the file will not
1703 1703 be preserved.
1704 1704
1705 1705 Returns a str holding uncompressed data for the requested revision.
1706 1706 """
1707 1707 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1708 1708
1709 1709 def _chunks(self, revs, df=None, targetsize=None):
1710 1710 """Obtain decompressed chunks for the specified revisions.
1711 1711
1712 1712 Accepts an iterable of numeric revisions that are assumed to be in
1713 1713 ascending order. Also accepts an optional already-open file handle
1714 1714 to be used for reading. If used, the seek position of the file will
1715 1715 not be preserved.
1716 1716
1717 1717 This function is similar to calling ``self._chunk()`` multiple times,
1718 1718 but is faster.
1719 1719
1720 1720 Returns a list with decompressed data for each requested revision.
1721 1721 """
1722 1722 if not revs:
1723 1723 return []
1724 1724 start = self.start
1725 1725 length = self.length
1726 1726 inline = self._inline
1727 1727 iosize = self._io.size
1728 1728 buffer = util.buffer
1729 1729
1730 1730 l = []
1731 1731 ladd = l.append
1732 1732
1733 1733 if not self._withsparseread:
1734 1734 slicedchunks = (revs,)
1735 1735 else:
1736 1736 slicedchunks = deltautil.slicechunk(
1737 1737 self, revs, targetsize=targetsize
1738 1738 )
1739 1739
1740 1740 for revschunk in slicedchunks:
1741 1741 firstrev = revschunk[0]
1742 1742 # Skip trailing revisions with empty diff
1743 1743 for lastrev in revschunk[::-1]:
1744 1744 if length(lastrev) != 0:
1745 1745 break
1746 1746
1747 1747 try:
1748 1748 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1749 1749 except OverflowError:
1750 1750 # issue4215 - we can't cache a run of chunks greater than
1751 1751 # 2G on Windows
1752 1752 return [self._chunk(rev, df=df) for rev in revschunk]
1753 1753
1754 1754 decomp = self.decompress
1755 1755 for rev in revschunk:
1756 1756 chunkstart = start(rev)
1757 1757 if inline:
1758 1758 chunkstart += (rev + 1) * iosize
1759 1759 chunklength = length(rev)
1760 1760 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1761 1761
1762 1762 return l
1763 1763
1764 1764 def _chunkclear(self):
1765 1765 """Clear the raw chunk cache."""
1766 1766 self._chunkcache = (0, b'')
1767 1767
1768 1768 def deltaparent(self, rev):
1769 1769 """return deltaparent of the given revision"""
1770 1770 base = self.index[rev][3]
1771 1771 if base == rev:
1772 1772 return nullrev
1773 1773 elif self._generaldelta:
1774 1774 return base
1775 1775 else:
1776 1776 return rev - 1
1777 1777
1778 1778 def issnapshot(self, rev):
1779 1779 """tells whether rev is a snapshot"""
1780 1780 if not self._sparserevlog:
1781 1781 return self.deltaparent(rev) == nullrev
1782 1782 elif util.safehasattr(self.index, b'issnapshot'):
1783 1783 # directly assign the method to cache the testing and access
1784 1784 self.issnapshot = self.index.issnapshot
1785 1785 return self.issnapshot(rev)
1786 1786 if rev == nullrev:
1787 1787 return True
1788 1788 entry = self.index[rev]
1789 1789 base = entry[3]
1790 1790 if base == rev:
1791 1791 return True
1792 1792 if base == nullrev:
1793 1793 return True
1794 1794 p1 = entry[5]
1795 1795 p2 = entry[6]
1796 1796 if base == p1 or base == p2:
1797 1797 return False
1798 1798 return self.issnapshot(base)
1799 1799
1800 1800 def snapshotdepth(self, rev):
1801 1801 """number of snapshot in the chain before this one"""
1802 1802 if not self.issnapshot(rev):
1803 1803 raise error.ProgrammingError(b'revision %d not a snapshot')
1804 1804 return len(self._deltachain(rev)[0]) - 1
1805 1805
1806 1806 def revdiff(self, rev1, rev2):
1807 1807 """return or calculate a delta between two revisions
1808 1808
1809 1809 The delta calculated is in binary form and is intended to be written to
1810 1810 revlog data directly. So this function needs raw revision data.
1811 1811 """
1812 1812 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1813 1813 return bytes(self._chunk(rev2))
1814 1814
1815 1815 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
1816 1816
1817 1817 def _processflags(self, text, flags, operation, raw=False):
1818 1818 """deprecated entry point to access flag processors"""
1819 1819 msg = b'_processflag(...) use the specialized variant'
1820 1820 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1821 1821 if raw:
1822 1822 return text, flagutil.processflagsraw(self, text, flags)
1823 1823 elif operation == b'read':
1824 1824 return flagutil.processflagsread(self, text, flags)
1825 1825 else: # write operation
1826 1826 return flagutil.processflagswrite(self, text, flags, None)
1827 1827
1828 1828 def revision(self, nodeorrev, _df=None, raw=False):
1829 1829 """return an uncompressed revision of a given node or revision
1830 1830 number.
1831 1831
1832 1832 _df - an existing file handle to read from. (internal-only)
1833 1833 raw - an optional argument specifying if the revision data is to be
1834 1834 treated as raw data when applying flag transforms. 'raw' should be set
1835 1835 to True when generating changegroups or in debug commands.
1836 1836 """
1837 1837 if raw:
1838 1838 msg = (
1839 1839 b'revlog.revision(..., raw=True) is deprecated, '
1840 1840 b'use revlog.rawdata(...)'
1841 1841 )
1842 1842 util.nouideprecwarn(msg, b'5.2', stacklevel=2)
1843 1843 return self._revisiondata(nodeorrev, _df, raw=raw)[0]
1844 1844
1845 1845 def sidedata(self, nodeorrev, _df=None):
1846 1846 """a map of extra data related to the changeset but not part of the hash
1847 1847
1848 1848 This function currently return a dictionary. However, more advanced
1849 1849 mapping object will likely be used in the future for a more
1850 1850 efficient/lazy code.
1851 1851 """
1852 1852 return self._revisiondata(nodeorrev, _df)[1]
1853 1853
1854 1854 def _revisiondata(self, nodeorrev, _df=None, raw=False):
1855 1855 # deal with <nodeorrev> argument type
1856 1856 if isinstance(nodeorrev, int):
1857 1857 rev = nodeorrev
1858 1858 node = self.node(rev)
1859 1859 else:
1860 1860 node = nodeorrev
1861 1861 rev = None
1862 1862
1863 1863 # fast path the special `nullid` rev
1864 1864 if node == nullid:
1865 1865 return b"", {}
1866 1866
1867 1867 # ``rawtext`` is the text as stored inside the revlog. Might be the
1868 1868 # revision or might need to be processed to retrieve the revision.
1869 1869 rev, rawtext, validated = self._rawtext(node, rev, _df=_df)
1870 1870
1871 1871 if raw and validated:
1872 1872 # if we don't want to process the raw text and that raw
1873 1873 # text is cached, we can exit early.
1874 1874 return rawtext, {}
1875 1875 if rev is None:
1876 1876 rev = self.rev(node)
1877 1877 # the revlog's flag for this revision
1878 1878 # (usually alter its state or content)
1879 1879 flags = self.flags(rev)
1880 1880
1881 1881 if validated and flags == REVIDX_DEFAULT_FLAGS:
1882 1882 # no extra flags set, no flag processor runs, text = rawtext
1883 1883 return rawtext, {}
1884 1884
1885 1885 sidedata = {}
1886 1886 if raw:
1887 1887 validatehash = flagutil.processflagsraw(self, rawtext, flags)
1888 1888 text = rawtext
1889 1889 else:
1890 1890 try:
1891 1891 r = flagutil.processflagsread(self, rawtext, flags)
1892 1892 except error.SidedataHashError as exc:
1893 1893 msg = _(b"integrity check failed on %s:%s sidedata key %d")
1894 1894 msg %= (self.indexfile, pycompat.bytestr(rev), exc.sidedatakey)
1895 1895 raise error.RevlogError(msg)
1896 1896 text, validatehash, sidedata = r
1897 1897 if validatehash:
1898 1898 self.checkhash(text, node, rev=rev)
1899 1899 if not validated:
1900 1900 self._revisioncache = (node, rev, rawtext)
1901 1901
1902 1902 return text, sidedata
1903 1903
1904 1904 def _rawtext(self, node, rev, _df=None):
1905 1905 """return the possibly unvalidated rawtext for a revision
1906 1906
1907 1907 returns (rev, rawtext, validated)
1908 1908 """
1909 1909
1910 1910 # revision in the cache (could be useful to apply delta)
1911 1911 cachedrev = None
1912 1912 # An intermediate text to apply deltas to
1913 1913 basetext = None
1914 1914
1915 1915 # Check if we have the entry in cache
1916 1916 # The cache entry looks like (node, rev, rawtext)
1917 1917 if self._revisioncache:
1918 1918 if self._revisioncache[0] == node:
1919 1919 return (rev, self._revisioncache[2], True)
1920 1920 cachedrev = self._revisioncache[1]
1921 1921
1922 1922 if rev is None:
1923 1923 rev = self.rev(node)
1924 1924
1925 1925 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1926 1926 if stopped:
1927 1927 basetext = self._revisioncache[2]
1928 1928
1929 1929 # drop cache to save memory, the caller is expected to
1930 1930 # update self._revisioncache after validating the text
1931 1931 self._revisioncache = None
1932 1932
1933 1933 targetsize = None
1934 1934 rawsize = self.index[rev][2]
1935 1935 if 0 <= rawsize:
1936 1936 targetsize = 4 * rawsize
1937 1937
1938 1938 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1939 1939 if basetext is None:
1940 1940 basetext = bytes(bins[0])
1941 1941 bins = bins[1:]
1942 1942
1943 1943 rawtext = mdiff.patches(basetext, bins)
1944 1944 del basetext # let us have a chance to free memory early
1945 1945 return (rev, rawtext, False)
1946 1946
1947 1947 def rawdata(self, nodeorrev, _df=None):
1948 1948 """return an uncompressed raw data of a given node or revision number.
1949 1949
1950 1950 _df - an existing file handle to read from. (internal-only)
1951 1951 """
1952 1952 return self._revisiondata(nodeorrev, _df, raw=True)[0]
1953 1953
1954 1954 def hash(self, text, p1, p2):
1955 1955 """Compute a node hash.
1956 1956
1957 1957 Available as a function so that subclasses can replace the hash
1958 1958 as needed.
1959 1959 """
1960 1960 return storageutil.hashrevisionsha1(text, p1, p2)
1961 1961
1962 1962 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1963 1963 """Check node hash integrity.
1964 1964
1965 1965 Available as a function so that subclasses can extend hash mismatch
1966 1966 behaviors as needed.
1967 1967 """
1968 1968 try:
1969 1969 if p1 is None and p2 is None:
1970 1970 p1, p2 = self.parents(node)
1971 1971 if node != self.hash(text, p1, p2):
1972 1972 # Clear the revision cache on hash failure. The revision cache
1973 1973 # only stores the raw revision and clearing the cache does have
1974 1974 # the side-effect that we won't have a cache hit when the raw
1975 1975 # revision data is accessed. But this case should be rare and
1976 1976 # it is extra work to teach the cache about the hash
1977 1977 # verification state.
1978 1978 if self._revisioncache and self._revisioncache[0] == node:
1979 1979 self._revisioncache = None
1980 1980
1981 1981 revornode = rev
1982 1982 if revornode is None:
1983 1983 revornode = templatefilters.short(hex(node))
1984 1984 raise error.RevlogError(
1985 1985 _(b"integrity check failed on %s:%s")
1986 1986 % (self.indexfile, pycompat.bytestr(revornode))
1987 1987 )
1988 1988 except error.RevlogError:
1989 1989 if self._censorable and storageutil.iscensoredtext(text):
1990 1990 raise error.CensoredNodeError(self.indexfile, node, text)
1991 1991 raise
1992 1992
1993 1993 def _enforceinlinesize(self, tr, fp=None):
1994 1994 """Check if the revlog is too big for inline and convert if so.
1995 1995
1996 1996 This should be called after revisions are added to the revlog. If the
1997 1997 revlog has grown too large to be an inline revlog, it will convert it
1998 1998 to use multiple index and data files.
1999 1999 """
2000 2000 tiprev = len(self) - 1
2001 2001 if (
2002 2002 not self._inline
2003 2003 or (self.start(tiprev) + self.length(tiprev)) < _maxinline
2004 2004 ):
2005 2005 return
2006 2006
2007 2007 troffset = tr.findoffset(self.indexfile)
2008 2008 if troffset is None:
2009 2009 raise error.RevlogError(
2010 2010 _(b"%s not found in the transaction") % self.indexfile
2011 2011 )
2012 2012 trindex = 0
2013 2013 tr.add(self.datafile, 0)
2014 2014
2015 2015 if fp:
2016 2016 fp.flush()
2017 2017 fp.close()
2018 2018 # We can't use the cached file handle after close(). So prevent
2019 2019 # its usage.
2020 2020 self._writinghandles = None
2021 2021
2022 2022 with self._indexfp(b'r') as ifh, self._datafp(b'w') as dfh:
2023 2023 for r in self:
2024 2024 dfh.write(self._getsegmentforrevs(r, r, df=ifh)[1])
2025 2025 if troffset <= self.start(r):
2026 2026 trindex = r
2027 2027
2028 2028 with self._indexfp(b'w') as fp:
2029 2029 self.version &= ~FLAG_INLINE_DATA
2030 2030 self._inline = False
2031 2031 io = self._io
2032 2032 for i in self:
2033 2033 e = io.packentry(self.index[i], self.node, self.version, i)
2034 2034 fp.write(e)
2035 2035
2036 2036 # the temp file replace the real index when we exit the context
2037 2037 # manager
2038 2038
2039 2039 tr.replace(self.indexfile, trindex * self._io.size)
2040 2040 nodemaputil.setup_persistent_nodemap(tr, self)
2041 2041 self._chunkclear()
2042 2042
2043 2043 def _nodeduplicatecallback(self, transaction, node):
2044 2044 """called when trying to add a node already stored."""
2045 2045
2046 2046 def addrevision(
2047 2047 self,
2048 2048 text,
2049 2049 transaction,
2050 2050 link,
2051 2051 p1,
2052 2052 p2,
2053 2053 cachedelta=None,
2054 2054 node=None,
2055 2055 flags=REVIDX_DEFAULT_FLAGS,
2056 2056 deltacomputer=None,
2057 2057 sidedata=None,
2058 2058 ):
2059 2059 """add a revision to the log
2060 2060
2061 2061 text - the revision data to add
2062 2062 transaction - the transaction object used for rollback
2063 2063 link - the linkrev data to add
2064 2064 p1, p2 - the parent nodeids of the revision
2065 2065 cachedelta - an optional precomputed delta
2066 2066 node - nodeid of revision; typically node is not specified, and it is
2067 2067 computed by default as hash(text, p1, p2), however subclasses might
2068 2068 use different hashing method (and override checkhash() in such case)
2069 2069 flags - the known flags to set on the revision
2070 2070 deltacomputer - an optional deltacomputer instance shared between
2071 2071 multiple calls
2072 2072 """
2073 2073 if link == nullrev:
2074 2074 raise error.RevlogError(
2075 2075 _(b"attempted to add linkrev -1 to %s") % self.indexfile
2076 2076 )
2077 2077
2078 2078 if sidedata is None:
2079 2079 sidedata = {}
2080 2080 flags = flags & ~REVIDX_SIDEDATA
2081 2081 elif not self.hassidedata:
2082 2082 raise error.ProgrammingError(
2083 2083 _(b"trying to add sidedata to a revlog who don't support them")
2084 2084 )
2085 2085 else:
2086 2086 flags |= REVIDX_SIDEDATA
2087 2087
2088 2088 if flags:
2089 2089 node = node or self.hash(text, p1, p2)
2090 2090
2091 2091 rawtext, validatehash = flagutil.processflagswrite(
2092 2092 self, text, flags, sidedata=sidedata
2093 2093 )
2094 2094
2095 2095 # If the flag processor modifies the revision data, ignore any provided
2096 2096 # cachedelta.
2097 2097 if rawtext != text:
2098 2098 cachedelta = None
2099 2099
2100 2100 if len(rawtext) > _maxentrysize:
2101 2101 raise error.RevlogError(
2102 2102 _(
2103 2103 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
2104 2104 )
2105 2105 % (self.indexfile, len(rawtext))
2106 2106 )
2107 2107
2108 2108 node = node or self.hash(rawtext, p1, p2)
2109 if self.index.has_node(node):
2110 return node
2109 rev = self.index.get_rev(node)
2110 if rev is not None:
2111 return rev
2111 2112
2112 2113 if validatehash:
2113 2114 self.checkhash(rawtext, node, p1=p1, p2=p2)
2114 2115
2115 rev = self.addrawrevision(
2116 return self.addrawrevision(
2116 2117 rawtext,
2117 2118 transaction,
2118 2119 link,
2119 2120 p1,
2120 2121 p2,
2121 2122 node,
2122 2123 flags,
2123 2124 cachedelta=cachedelta,
2124 2125 deltacomputer=deltacomputer,
2125 2126 )
2126 return node
2127 2127
2128 2128 def addrawrevision(
2129 2129 self,
2130 2130 rawtext,
2131 2131 transaction,
2132 2132 link,
2133 2133 p1,
2134 2134 p2,
2135 2135 node,
2136 2136 flags,
2137 2137 cachedelta=None,
2138 2138 deltacomputer=None,
2139 2139 ):
2140 2140 """add a raw revision with known flags, node and parents
2141 2141 useful when reusing a revision not stored in this revlog (ex: received
2142 2142 over wire, or read from an external bundle).
2143 2143 """
2144 2144 dfh = None
2145 2145 if not self._inline:
2146 2146 dfh = self._datafp(b"a+")
2147 2147 ifh = self._indexfp(b"a+")
2148 2148 try:
2149 2149 return self._addrevision(
2150 2150 node,
2151 2151 rawtext,
2152 2152 transaction,
2153 2153 link,
2154 2154 p1,
2155 2155 p2,
2156 2156 flags,
2157 2157 cachedelta,
2158 2158 ifh,
2159 2159 dfh,
2160 2160 deltacomputer=deltacomputer,
2161 2161 )
2162 2162 finally:
2163 2163 if dfh:
2164 2164 dfh.close()
2165 2165 ifh.close()
2166 2166
2167 2167 def compress(self, data):
2168 2168 """Generate a possibly-compressed representation of data."""
2169 2169 if not data:
2170 2170 return b'', data
2171 2171
2172 2172 compressed = self._compressor.compress(data)
2173 2173
2174 2174 if compressed:
2175 2175 # The revlog compressor added the header in the returned data.
2176 2176 return b'', compressed
2177 2177
2178 2178 if data[0:1] == b'\0':
2179 2179 return b'', data
2180 2180 return b'u', data
2181 2181
2182 2182 def decompress(self, data):
2183 2183 """Decompress a revlog chunk.
2184 2184
2185 2185 The chunk is expected to begin with a header identifying the
2186 2186 format type so it can be routed to an appropriate decompressor.
2187 2187 """
2188 2188 if not data:
2189 2189 return data
2190 2190
2191 2191 # Revlogs are read much more frequently than they are written and many
2192 2192 # chunks only take microseconds to decompress, so performance is
2193 2193 # important here.
2194 2194 #
2195 2195 # We can make a few assumptions about revlogs:
2196 2196 #
2197 2197 # 1) the majority of chunks will be compressed (as opposed to inline
2198 2198 # raw data).
2199 2199 # 2) decompressing *any* data will likely by at least 10x slower than
2200 2200 # returning raw inline data.
2201 2201 # 3) we want to prioritize common and officially supported compression
2202 2202 # engines
2203 2203 #
2204 2204 # It follows that we want to optimize for "decompress compressed data
2205 2205 # when encoded with common and officially supported compression engines"
2206 2206 # case over "raw data" and "data encoded by less common or non-official
2207 2207 # compression engines." That is why we have the inline lookup first
2208 2208 # followed by the compengines lookup.
2209 2209 #
2210 2210 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
2211 2211 # compressed chunks. And this matters for changelog and manifest reads.
2212 2212 t = data[0:1]
2213 2213
2214 2214 if t == b'x':
2215 2215 try:
2216 2216 return _zlibdecompress(data)
2217 2217 except zlib.error as e:
2218 2218 raise error.RevlogError(
2219 2219 _(b'revlog decompress error: %s')
2220 2220 % stringutil.forcebytestr(e)
2221 2221 )
2222 2222 # '\0' is more common than 'u' so it goes first.
2223 2223 elif t == b'\0':
2224 2224 return data
2225 2225 elif t == b'u':
2226 2226 return util.buffer(data, 1)
2227 2227
2228 2228 try:
2229 2229 compressor = self._decompressors[t]
2230 2230 except KeyError:
2231 2231 try:
2232 2232 engine = util.compengines.forrevlogheader(t)
2233 2233 compressor = engine.revlogcompressor(self._compengineopts)
2234 2234 self._decompressors[t] = compressor
2235 2235 except KeyError:
2236 2236 raise error.RevlogError(_(b'unknown compression type %r') % t)
2237 2237
2238 2238 return compressor.decompress(data)
2239 2239
2240 2240 def _addrevision(
2241 2241 self,
2242 2242 node,
2243 2243 rawtext,
2244 2244 transaction,
2245 2245 link,
2246 2246 p1,
2247 2247 p2,
2248 2248 flags,
2249 2249 cachedelta,
2250 2250 ifh,
2251 2251 dfh,
2252 2252 alwayscache=False,
2253 2253 deltacomputer=None,
2254 2254 ):
2255 2255 """internal function to add revisions to the log
2256 2256
2257 2257 see addrevision for argument descriptions.
2258 2258
2259 2259 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
2260 2260
2261 2261 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
2262 2262 be used.
2263 2263
2264 2264 invariants:
2265 2265 - rawtext is optional (can be None); if not set, cachedelta must be set.
2266 2266 if both are set, they must correspond to each other.
2267 2267 """
2268 2268 if node == nullid:
2269 2269 raise error.RevlogError(
2270 2270 _(b"%s: attempt to add null revision") % self.indexfile
2271 2271 )
2272 2272 if node == wdirid or node in wdirfilenodeids:
2273 2273 raise error.RevlogError(
2274 2274 _(b"%s: attempt to add wdir revision") % self.indexfile
2275 2275 )
2276 2276
2277 2277 if self._inline:
2278 2278 fh = ifh
2279 2279 else:
2280 2280 fh = dfh
2281 2281
2282 2282 btext = [rawtext]
2283 2283
2284 2284 curr = len(self)
2285 2285 prev = curr - 1
2286 2286 offset = self.end(prev)
2287 2287 p1r, p2r = self.rev(p1), self.rev(p2)
2288 2288
2289 2289 # full versions are inserted when the needed deltas
2290 2290 # become comparable to the uncompressed text
2291 2291 if rawtext is None:
2292 2292 # need rawtext size, before changed by flag processors, which is
2293 2293 # the non-raw size. use revlog explicitly to avoid filelog's extra
2294 2294 # logic that might remove metadata size.
2295 2295 textlen = mdiff.patchedsize(
2296 2296 revlog.size(self, cachedelta[0]), cachedelta[1]
2297 2297 )
2298 2298 else:
2299 2299 textlen = len(rawtext)
2300 2300
2301 2301 if deltacomputer is None:
2302 2302 deltacomputer = deltautil.deltacomputer(self)
2303 2303
2304 2304 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
2305 2305
2306 2306 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
2307 2307
2308 2308 e = (
2309 2309 offset_type(offset, flags),
2310 2310 deltainfo.deltalen,
2311 2311 textlen,
2312 2312 deltainfo.base,
2313 2313 link,
2314 2314 p1r,
2315 2315 p2r,
2316 2316 node,
2317 2317 )
2318 2318 self.index.append(e)
2319 2319
2320 2320 entry = self._io.packentry(e, self.node, self.version, curr)
2321 2321 self._writeentry(
2322 2322 transaction, ifh, dfh, entry, deltainfo.data, link, offset
2323 2323 )
2324 2324
2325 2325 rawtext = btext[0]
2326 2326
2327 2327 if alwayscache and rawtext is None:
2328 2328 rawtext = deltacomputer.buildtext(revinfo, fh)
2329 2329
2330 2330 if type(rawtext) == bytes: # only accept immutable objects
2331 2331 self._revisioncache = (node, curr, rawtext)
2332 2332 self._chainbasecache[curr] = deltainfo.chainbase
2333 2333 return curr
2334 2334
2335 2335 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
2336 2336 # Files opened in a+ mode have inconsistent behavior on various
2337 2337 # platforms. Windows requires that a file positioning call be made
2338 2338 # when the file handle transitions between reads and writes. See
2339 2339 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
2340 2340 # platforms, Python or the platform itself can be buggy. Some versions
2341 2341 # of Solaris have been observed to not append at the end of the file
2342 2342 # if the file was seeked to before the end. See issue4943 for more.
2343 2343 #
2344 2344 # We work around this issue by inserting a seek() before writing.
2345 2345 # Note: This is likely not necessary on Python 3. However, because
2346 2346 # the file handle is reused for reads and may be seeked there, we need
2347 2347 # to be careful before changing this.
2348 2348 ifh.seek(0, os.SEEK_END)
2349 2349 if dfh:
2350 2350 dfh.seek(0, os.SEEK_END)
2351 2351
2352 2352 curr = len(self) - 1
2353 2353 if not self._inline:
2354 2354 transaction.add(self.datafile, offset)
2355 2355 transaction.add(self.indexfile, curr * len(entry))
2356 2356 if data[0]:
2357 2357 dfh.write(data[0])
2358 2358 dfh.write(data[1])
2359 2359 ifh.write(entry)
2360 2360 else:
2361 2361 offset += curr * self._io.size
2362 2362 transaction.add(self.indexfile, offset)
2363 2363 ifh.write(entry)
2364 2364 ifh.write(data[0])
2365 2365 ifh.write(data[1])
2366 2366 self._enforceinlinesize(transaction, ifh)
2367 2367 nodemaputil.setup_persistent_nodemap(transaction, self)
2368 2368
2369 2369 def addgroup(
2370 2370 self,
2371 2371 deltas,
2372 2372 linkmapper,
2373 2373 transaction,
2374 2374 alwayscache=False,
2375 2375 addrevisioncb=None,
2376 2376 duplicaterevisioncb=None,
2377 2377 ):
2378 2378 """
2379 2379 add a delta group
2380 2380
2381 2381 given a set of deltas, add them to the revision log. the
2382 2382 first delta is against its parent, which should be in our
2383 2383 log, the rest are against the previous delta.
2384 2384
2385 2385 If ``addrevisioncb`` is defined, it will be called with arguments of
2386 2386 this revlog and the node that was added.
2387 2387 """
2388 2388
2389 2389 if self._writinghandles:
2390 2390 raise error.ProgrammingError(b'cannot nest addgroup() calls')
2391 2391
2392 2392 r = len(self)
2393 2393 end = 0
2394 2394 if r:
2395 2395 end = self.end(r - 1)
2396 2396 ifh = self._indexfp(b"a+")
2397 2397 isize = r * self._io.size
2398 2398 if self._inline:
2399 2399 transaction.add(self.indexfile, end + isize)
2400 2400 dfh = None
2401 2401 else:
2402 2402 transaction.add(self.indexfile, isize)
2403 2403 transaction.add(self.datafile, end)
2404 2404 dfh = self._datafp(b"a+")
2405 2405
2406 2406 def flush():
2407 2407 if dfh:
2408 2408 dfh.flush()
2409 2409 ifh.flush()
2410 2410
2411 2411 self._writinghandles = (ifh, dfh)
2412 2412 empty = True
2413 2413
2414 2414 try:
2415 2415 deltacomputer = deltautil.deltacomputer(self)
2416 2416 # loop through our set of deltas
2417 2417 for data in deltas:
2418 2418 node, p1, p2, linknode, deltabase, delta, flags = data
2419 2419 link = linkmapper(linknode)
2420 2420 flags = flags or REVIDX_DEFAULT_FLAGS
2421 2421
2422 2422 if self.index.has_node(node):
2423 2423 # this can happen if two branches make the same change
2424 2424 self._nodeduplicatecallback(transaction, node)
2425 2425 if duplicaterevisioncb:
2426 2426 duplicaterevisioncb(self, node)
2427 2427 empty = False
2428 2428 continue
2429 2429
2430 2430 for p in (p1, p2):
2431 2431 if not self.index.has_node(p):
2432 2432 raise error.LookupError(
2433 2433 p, self.indexfile, _(b'unknown parent')
2434 2434 )
2435 2435
2436 2436 if not self.index.has_node(deltabase):
2437 2437 raise error.LookupError(
2438 2438 deltabase, self.indexfile, _(b'unknown delta base')
2439 2439 )
2440 2440
2441 2441 baserev = self.rev(deltabase)
2442 2442
2443 2443 if baserev != nullrev and self.iscensored(baserev):
2444 2444 # if base is censored, delta must be full replacement in a
2445 2445 # single patch operation
2446 2446 hlen = struct.calcsize(b">lll")
2447 2447 oldlen = self.rawsize(baserev)
2448 2448 newlen = len(delta) - hlen
2449 2449 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2450 2450 raise error.CensoredBaseError(
2451 2451 self.indexfile, self.node(baserev)
2452 2452 )
2453 2453
2454 2454 if not flags and self._peek_iscensored(baserev, delta, flush):
2455 2455 flags |= REVIDX_ISCENSORED
2456 2456
2457 2457 # We assume consumers of addrevisioncb will want to retrieve
2458 2458 # the added revision, which will require a call to
2459 2459 # revision(). revision() will fast path if there is a cache
2460 2460 # hit. So, we tell _addrevision() to always cache in this case.
2461 2461 # We're only using addgroup() in the context of changegroup
2462 2462 # generation so the revision data can always be handled as raw
2463 2463 # by the flagprocessor.
2464 2464 self._addrevision(
2465 2465 node,
2466 2466 None,
2467 2467 transaction,
2468 2468 link,
2469 2469 p1,
2470 2470 p2,
2471 2471 flags,
2472 2472 (baserev, delta),
2473 2473 ifh,
2474 2474 dfh,
2475 2475 alwayscache=alwayscache,
2476 2476 deltacomputer=deltacomputer,
2477 2477 )
2478 2478
2479 2479 if addrevisioncb:
2480 2480 addrevisioncb(self, node)
2481 2481 empty = False
2482 2482
2483 2483 if not dfh and not self._inline:
2484 2484 # addrevision switched from inline to conventional
2485 2485 # reopen the index
2486 2486 ifh.close()
2487 2487 dfh = self._datafp(b"a+")
2488 2488 ifh = self._indexfp(b"a+")
2489 2489 self._writinghandles = (ifh, dfh)
2490 2490 finally:
2491 2491 self._writinghandles = None
2492 2492
2493 2493 if dfh:
2494 2494 dfh.close()
2495 2495 ifh.close()
2496 2496 return not empty
2497 2497
2498 2498 def iscensored(self, rev):
2499 2499 """Check if a file revision is censored."""
2500 2500 if not self._censorable:
2501 2501 return False
2502 2502
2503 2503 return self.flags(rev) & REVIDX_ISCENSORED
2504 2504
2505 2505 def _peek_iscensored(self, baserev, delta, flush):
2506 2506 """Quickly check if a delta produces a censored revision."""
2507 2507 if not self._censorable:
2508 2508 return False
2509 2509
2510 2510 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2511 2511
2512 2512 def getstrippoint(self, minlink):
2513 2513 """find the minimum rev that must be stripped to strip the linkrev
2514 2514
2515 2515 Returns a tuple containing the minimum rev and a set of all revs that
2516 2516 have linkrevs that will be broken by this strip.
2517 2517 """
2518 2518 return storageutil.resolvestripinfo(
2519 2519 minlink,
2520 2520 len(self) - 1,
2521 2521 self.headrevs(),
2522 2522 self.linkrev,
2523 2523 self.parentrevs,
2524 2524 )
2525 2525
2526 2526 def strip(self, minlink, transaction):
2527 2527 """truncate the revlog on the first revision with a linkrev >= minlink
2528 2528
2529 2529 This function is called when we're stripping revision minlink and
2530 2530 its descendants from the repository.
2531 2531
2532 2532 We have to remove all revisions with linkrev >= minlink, because
2533 2533 the equivalent changelog revisions will be renumbered after the
2534 2534 strip.
2535 2535
2536 2536 So we truncate the revlog on the first of these revisions, and
2537 2537 trust that the caller has saved the revisions that shouldn't be
2538 2538 removed and that it'll re-add them after this truncation.
2539 2539 """
2540 2540 if len(self) == 0:
2541 2541 return
2542 2542
2543 2543 rev, _ = self.getstrippoint(minlink)
2544 2544 if rev == len(self):
2545 2545 return
2546 2546
2547 2547 # first truncate the files on disk
2548 2548 end = self.start(rev)
2549 2549 if not self._inline:
2550 2550 transaction.add(self.datafile, end)
2551 2551 end = rev * self._io.size
2552 2552 else:
2553 2553 end += rev * self._io.size
2554 2554
2555 2555 transaction.add(self.indexfile, end)
2556 2556
2557 2557 # then reset internal state in memory to forget those revisions
2558 2558 self._revisioncache = None
2559 2559 self._chaininfocache = util.lrucachedict(500)
2560 2560 self._chunkclear()
2561 2561
2562 2562 del self.index[rev:-1]
2563 2563
2564 2564 def checksize(self):
2565 2565 """Check size of index and data files
2566 2566
2567 2567 return a (dd, di) tuple.
2568 2568 - dd: extra bytes for the "data" file
2569 2569 - di: extra bytes for the "index" file
2570 2570
2571 2571 A healthy revlog will return (0, 0).
2572 2572 """
2573 2573 expected = 0
2574 2574 if len(self):
2575 2575 expected = max(0, self.end(len(self) - 1))
2576 2576
2577 2577 try:
2578 2578 with self._datafp() as f:
2579 2579 f.seek(0, io.SEEK_END)
2580 2580 actual = f.tell()
2581 2581 dd = actual - expected
2582 2582 except IOError as inst:
2583 2583 if inst.errno != errno.ENOENT:
2584 2584 raise
2585 2585 dd = 0
2586 2586
2587 2587 try:
2588 2588 f = self.opener(self.indexfile)
2589 2589 f.seek(0, io.SEEK_END)
2590 2590 actual = f.tell()
2591 2591 f.close()
2592 2592 s = self._io.size
2593 2593 i = max(0, actual // s)
2594 2594 di = actual - (i * s)
2595 2595 if self._inline:
2596 2596 databytes = 0
2597 2597 for r in self:
2598 2598 databytes += max(0, self.length(r))
2599 2599 dd = 0
2600 2600 di = actual - len(self) * s - databytes
2601 2601 except IOError as inst:
2602 2602 if inst.errno != errno.ENOENT:
2603 2603 raise
2604 2604 di = 0
2605 2605
2606 2606 return (dd, di)
2607 2607
2608 2608 def files(self):
2609 2609 res = [self.indexfile]
2610 2610 if not self._inline:
2611 2611 res.append(self.datafile)
2612 2612 return res
2613 2613
2614 2614 def emitrevisions(
2615 2615 self,
2616 2616 nodes,
2617 2617 nodesorder=None,
2618 2618 revisiondata=False,
2619 2619 assumehaveparentrevisions=False,
2620 2620 deltamode=repository.CG_DELTAMODE_STD,
2621 2621 ):
2622 2622 if nodesorder not in (b'nodes', b'storage', b'linear', None):
2623 2623 raise error.ProgrammingError(
2624 2624 b'unhandled value for nodesorder: %s' % nodesorder
2625 2625 )
2626 2626
2627 2627 if nodesorder is None and not self._generaldelta:
2628 2628 nodesorder = b'storage'
2629 2629
2630 2630 if (
2631 2631 not self._storedeltachains
2632 2632 and deltamode != repository.CG_DELTAMODE_PREV
2633 2633 ):
2634 2634 deltamode = repository.CG_DELTAMODE_FULL
2635 2635
2636 2636 return storageutil.emitrevisions(
2637 2637 self,
2638 2638 nodes,
2639 2639 nodesorder,
2640 2640 revlogrevisiondelta,
2641 2641 deltaparentfn=self.deltaparent,
2642 2642 candeltafn=self.candelta,
2643 2643 rawsizefn=self.rawsize,
2644 2644 revdifffn=self.revdiff,
2645 2645 flagsfn=self.flags,
2646 2646 deltamode=deltamode,
2647 2647 revisiondata=revisiondata,
2648 2648 assumehaveparentrevisions=assumehaveparentrevisions,
2649 2649 )
2650 2650
2651 2651 DELTAREUSEALWAYS = b'always'
2652 2652 DELTAREUSESAMEREVS = b'samerevs'
2653 2653 DELTAREUSENEVER = b'never'
2654 2654
2655 2655 DELTAREUSEFULLADD = b'fulladd'
2656 2656
2657 2657 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
2658 2658
2659 2659 def clone(
2660 2660 self,
2661 2661 tr,
2662 2662 destrevlog,
2663 2663 addrevisioncb=None,
2664 2664 deltareuse=DELTAREUSESAMEREVS,
2665 2665 forcedeltabothparents=None,
2666 2666 sidedatacompanion=None,
2667 2667 ):
2668 2668 """Copy this revlog to another, possibly with format changes.
2669 2669
2670 2670 The destination revlog will contain the same revisions and nodes.
2671 2671 However, it may not be bit-for-bit identical due to e.g. delta encoding
2672 2672 differences.
2673 2673
2674 2674 The ``deltareuse`` argument control how deltas from the existing revlog
2675 2675 are preserved in the destination revlog. The argument can have the
2676 2676 following values:
2677 2677
2678 2678 DELTAREUSEALWAYS
2679 2679 Deltas will always be reused (if possible), even if the destination
2680 2680 revlog would not select the same revisions for the delta. This is the
2681 2681 fastest mode of operation.
2682 2682 DELTAREUSESAMEREVS
2683 2683 Deltas will be reused if the destination revlog would pick the same
2684 2684 revisions for the delta. This mode strikes a balance between speed
2685 2685 and optimization.
2686 2686 DELTAREUSENEVER
2687 2687 Deltas will never be reused. This is the slowest mode of execution.
2688 2688 This mode can be used to recompute deltas (e.g. if the diff/delta
2689 2689 algorithm changes).
2690 2690 DELTAREUSEFULLADD
2691 2691 Revision will be re-added as if their were new content. This is
2692 2692 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
2693 2693 eg: large file detection and handling.
2694 2694
2695 2695 Delta computation can be slow, so the choice of delta reuse policy can
2696 2696 significantly affect run time.
2697 2697
2698 2698 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2699 2699 two extremes. Deltas will be reused if they are appropriate. But if the
2700 2700 delta could choose a better revision, it will do so. This means if you
2701 2701 are converting a non-generaldelta revlog to a generaldelta revlog,
2702 2702 deltas will be recomputed if the delta's parent isn't a parent of the
2703 2703 revision.
2704 2704
2705 2705 In addition to the delta policy, the ``forcedeltabothparents``
2706 2706 argument controls whether to force compute deltas against both parents
2707 2707 for merges. By default, the current default is used.
2708 2708
2709 2709 If not None, the `sidedatacompanion` is callable that accept two
2710 2710 arguments:
2711 2711
2712 2712 (srcrevlog, rev)
2713 2713
2714 2714 and return a quintet that control changes to sidedata content from the
2715 2715 old revision to the new clone result:
2716 2716
2717 2717 (dropall, filterout, update, new_flags, dropped_flags)
2718 2718
2719 2719 * if `dropall` is True, all sidedata should be dropped
2720 2720 * `filterout` is a set of sidedata keys that should be dropped
2721 2721 * `update` is a mapping of additionnal/new key -> value
2722 2722 * new_flags is a bitfields of new flags that the revision should get
2723 2723 * dropped_flags is a bitfields of new flags that the revision shoudl not longer have
2724 2724 """
2725 2725 if deltareuse not in self.DELTAREUSEALL:
2726 2726 raise ValueError(
2727 2727 _(b'value for deltareuse invalid: %s') % deltareuse
2728 2728 )
2729 2729
2730 2730 if len(destrevlog):
2731 2731 raise ValueError(_(b'destination revlog is not empty'))
2732 2732
2733 2733 if getattr(self, 'filteredrevs', None):
2734 2734 raise ValueError(_(b'source revlog has filtered revisions'))
2735 2735 if getattr(destrevlog, 'filteredrevs', None):
2736 2736 raise ValueError(_(b'destination revlog has filtered revisions'))
2737 2737
2738 2738 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
2739 2739 # if possible.
2740 2740 oldlazydelta = destrevlog._lazydelta
2741 2741 oldlazydeltabase = destrevlog._lazydeltabase
2742 2742 oldamd = destrevlog._deltabothparents
2743 2743
2744 2744 try:
2745 2745 if deltareuse == self.DELTAREUSEALWAYS:
2746 2746 destrevlog._lazydeltabase = True
2747 2747 destrevlog._lazydelta = True
2748 2748 elif deltareuse == self.DELTAREUSESAMEREVS:
2749 2749 destrevlog._lazydeltabase = False
2750 2750 destrevlog._lazydelta = True
2751 2751 elif deltareuse == self.DELTAREUSENEVER:
2752 2752 destrevlog._lazydeltabase = False
2753 2753 destrevlog._lazydelta = False
2754 2754
2755 2755 destrevlog._deltabothparents = forcedeltabothparents or oldamd
2756 2756
2757 2757 self._clone(
2758 2758 tr,
2759 2759 destrevlog,
2760 2760 addrevisioncb,
2761 2761 deltareuse,
2762 2762 forcedeltabothparents,
2763 2763 sidedatacompanion,
2764 2764 )
2765 2765
2766 2766 finally:
2767 2767 destrevlog._lazydelta = oldlazydelta
2768 2768 destrevlog._lazydeltabase = oldlazydeltabase
2769 2769 destrevlog._deltabothparents = oldamd
2770 2770
2771 2771 def _clone(
2772 2772 self,
2773 2773 tr,
2774 2774 destrevlog,
2775 2775 addrevisioncb,
2776 2776 deltareuse,
2777 2777 forcedeltabothparents,
2778 2778 sidedatacompanion,
2779 2779 ):
2780 2780 """perform the core duty of `revlog.clone` after parameter processing"""
2781 2781 deltacomputer = deltautil.deltacomputer(destrevlog)
2782 2782 index = self.index
2783 2783 for rev in self:
2784 2784 entry = index[rev]
2785 2785
2786 2786 # Some classes override linkrev to take filtered revs into
2787 2787 # account. Use raw entry from index.
2788 2788 flags = entry[0] & 0xFFFF
2789 2789 linkrev = entry[4]
2790 2790 p1 = index[entry[5]][7]
2791 2791 p2 = index[entry[6]][7]
2792 2792 node = entry[7]
2793 2793
2794 2794 sidedataactions = (False, [], {}, 0, 0)
2795 2795 if sidedatacompanion is not None:
2796 2796 sidedataactions = sidedatacompanion(self, rev)
2797 2797
2798 2798 # (Possibly) reuse the delta from the revlog if allowed and
2799 2799 # the revlog chunk is a delta.
2800 2800 cachedelta = None
2801 2801 rawtext = None
2802 2802 if any(sidedataactions) or deltareuse == self.DELTAREUSEFULLADD:
2803 2803 dropall = sidedataactions[0]
2804 2804 filterout = sidedataactions[1]
2805 2805 update = sidedataactions[2]
2806 2806 new_flags = sidedataactions[3]
2807 2807 dropped_flags = sidedataactions[4]
2808 2808 text, sidedata = self._revisiondata(rev)
2809 2809 if dropall:
2810 2810 sidedata = {}
2811 2811 for key in filterout:
2812 2812 sidedata.pop(key, None)
2813 2813 sidedata.update(update)
2814 2814 if not sidedata:
2815 2815 sidedata = None
2816 2816
2817 2817 flags |= new_flags
2818 2818 flags &= ~dropped_flags
2819 2819
2820 2820 destrevlog.addrevision(
2821 2821 text,
2822 2822 tr,
2823 2823 linkrev,
2824 2824 p1,
2825 2825 p2,
2826 2826 cachedelta=cachedelta,
2827 2827 node=node,
2828 2828 flags=flags,
2829 2829 deltacomputer=deltacomputer,
2830 2830 sidedata=sidedata,
2831 2831 )
2832 2832 else:
2833 2833 if destrevlog._lazydelta:
2834 2834 dp = self.deltaparent(rev)
2835 2835 if dp != nullrev:
2836 2836 cachedelta = (dp, bytes(self._chunk(rev)))
2837 2837
2838 2838 if not cachedelta:
2839 2839 rawtext = self.rawdata(rev)
2840 2840
2841 2841 ifh = destrevlog.opener(
2842 2842 destrevlog.indexfile, b'a+', checkambig=False
2843 2843 )
2844 2844 dfh = None
2845 2845 if not destrevlog._inline:
2846 2846 dfh = destrevlog.opener(destrevlog.datafile, b'a+')
2847 2847 try:
2848 2848 destrevlog._addrevision(
2849 2849 node,
2850 2850 rawtext,
2851 2851 tr,
2852 2852 linkrev,
2853 2853 p1,
2854 2854 p2,
2855 2855 flags,
2856 2856 cachedelta,
2857 2857 ifh,
2858 2858 dfh,
2859 2859 deltacomputer=deltacomputer,
2860 2860 )
2861 2861 finally:
2862 2862 if dfh:
2863 2863 dfh.close()
2864 2864 ifh.close()
2865 2865
2866 2866 if addrevisioncb:
2867 2867 addrevisioncb(self, rev, node)
2868 2868
2869 2869 def censorrevision(self, tr, censornode, tombstone=b''):
2870 2870 if (self.version & 0xFFFF) == REVLOGV0:
2871 2871 raise error.RevlogError(
2872 2872 _(b'cannot censor with version %d revlogs') % self.version
2873 2873 )
2874 2874
2875 2875 censorrev = self.rev(censornode)
2876 2876 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2877 2877
2878 2878 if len(tombstone) > self.rawsize(censorrev):
2879 2879 raise error.Abort(
2880 2880 _(b'censor tombstone must be no longer than censored data')
2881 2881 )
2882 2882
2883 2883 # Rewriting the revlog in place is hard. Our strategy for censoring is
2884 2884 # to create a new revlog, copy all revisions to it, then replace the
2885 2885 # revlogs on transaction close.
2886 2886
2887 2887 newindexfile = self.indexfile + b'.tmpcensored'
2888 2888 newdatafile = self.datafile + b'.tmpcensored'
2889 2889
2890 2890 # This is a bit dangerous. We could easily have a mismatch of state.
2891 2891 newrl = revlog(self.opener, newindexfile, newdatafile, censorable=True)
2892 2892 newrl.version = self.version
2893 2893 newrl._generaldelta = self._generaldelta
2894 2894 newrl._io = self._io
2895 2895
2896 2896 for rev in self.revs():
2897 2897 node = self.node(rev)
2898 2898 p1, p2 = self.parents(node)
2899 2899
2900 2900 if rev == censorrev:
2901 2901 newrl.addrawrevision(
2902 2902 tombstone,
2903 2903 tr,
2904 2904 self.linkrev(censorrev),
2905 2905 p1,
2906 2906 p2,
2907 2907 censornode,
2908 2908 REVIDX_ISCENSORED,
2909 2909 )
2910 2910
2911 2911 if newrl.deltaparent(rev) != nullrev:
2912 2912 raise error.Abort(
2913 2913 _(
2914 2914 b'censored revision stored as delta; '
2915 2915 b'cannot censor'
2916 2916 ),
2917 2917 hint=_(
2918 2918 b'censoring of revlogs is not '
2919 2919 b'fully implemented; please report '
2920 2920 b'this bug'
2921 2921 ),
2922 2922 )
2923 2923 continue
2924 2924
2925 2925 if self.iscensored(rev):
2926 2926 if self.deltaparent(rev) != nullrev:
2927 2927 raise error.Abort(
2928 2928 _(
2929 2929 b'cannot censor due to censored '
2930 2930 b'revision having delta stored'
2931 2931 )
2932 2932 )
2933 2933 rawtext = self._chunk(rev)
2934 2934 else:
2935 2935 rawtext = self.rawdata(rev)
2936 2936
2937 2937 newrl.addrawrevision(
2938 2938 rawtext, tr, self.linkrev(rev), p1, p2, node, self.flags(rev)
2939 2939 )
2940 2940
2941 2941 tr.addbackup(self.indexfile, location=b'store')
2942 2942 if not self._inline:
2943 2943 tr.addbackup(self.datafile, location=b'store')
2944 2944
2945 2945 self.opener.rename(newrl.indexfile, self.indexfile)
2946 2946 if not self._inline:
2947 2947 self.opener.rename(newrl.datafile, self.datafile)
2948 2948
2949 2949 self.clearcaches()
2950 2950 self._loadindex()
2951 2951
2952 2952 def verifyintegrity(self, state):
2953 2953 """Verifies the integrity of the revlog.
2954 2954
2955 2955 Yields ``revlogproblem`` instances describing problems that are
2956 2956 found.
2957 2957 """
2958 2958 dd, di = self.checksize()
2959 2959 if dd:
2960 2960 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
2961 2961 if di:
2962 2962 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
2963 2963
2964 2964 version = self.version & 0xFFFF
2965 2965
2966 2966 # The verifier tells us what version revlog we should be.
2967 2967 if version != state[b'expectedversion']:
2968 2968 yield revlogproblem(
2969 2969 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
2970 2970 % (self.indexfile, version, state[b'expectedversion'])
2971 2971 )
2972 2972
2973 2973 state[b'skipread'] = set()
2974 2974 state[b'safe_renamed'] = set()
2975 2975
2976 2976 for rev in self:
2977 2977 node = self.node(rev)
2978 2978
2979 2979 # Verify contents. 4 cases to care about:
2980 2980 #
2981 2981 # common: the most common case
2982 2982 # rename: with a rename
2983 2983 # meta: file content starts with b'\1\n', the metadata
2984 2984 # header defined in filelog.py, but without a rename
2985 2985 # ext: content stored externally
2986 2986 #
2987 2987 # More formally, their differences are shown below:
2988 2988 #
2989 2989 # | common | rename | meta | ext
2990 2990 # -------------------------------------------------------
2991 2991 # flags() | 0 | 0 | 0 | not 0
2992 2992 # renamed() | False | True | False | ?
2993 2993 # rawtext[0:2]=='\1\n'| False | True | True | ?
2994 2994 #
2995 2995 # "rawtext" means the raw text stored in revlog data, which
2996 2996 # could be retrieved by "rawdata(rev)". "text"
2997 2997 # mentioned below is "revision(rev)".
2998 2998 #
2999 2999 # There are 3 different lengths stored physically:
3000 3000 # 1. L1: rawsize, stored in revlog index
3001 3001 # 2. L2: len(rawtext), stored in revlog data
3002 3002 # 3. L3: len(text), stored in revlog data if flags==0, or
3003 3003 # possibly somewhere else if flags!=0
3004 3004 #
3005 3005 # L1 should be equal to L2. L3 could be different from them.
3006 3006 # "text" may or may not affect commit hash depending on flag
3007 3007 # processors (see flagutil.addflagprocessor).
3008 3008 #
3009 3009 # | common | rename | meta | ext
3010 3010 # -------------------------------------------------
3011 3011 # rawsize() | L1 | L1 | L1 | L1
3012 3012 # size() | L1 | L2-LM | L1(*) | L1 (?)
3013 3013 # len(rawtext) | L2 | L2 | L2 | L2
3014 3014 # len(text) | L2 | L2 | L2 | L3
3015 3015 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3016 3016 #
3017 3017 # LM: length of metadata, depending on rawtext
3018 3018 # (*): not ideal, see comment in filelog.size
3019 3019 # (?): could be "- len(meta)" if the resolved content has
3020 3020 # rename metadata
3021 3021 #
3022 3022 # Checks needed to be done:
3023 3023 # 1. length check: L1 == L2, in all cases.
3024 3024 # 2. hash check: depending on flag processor, we may need to
3025 3025 # use either "text" (external), or "rawtext" (in revlog).
3026 3026
3027 3027 try:
3028 3028 skipflags = state.get(b'skipflags', 0)
3029 3029 if skipflags:
3030 3030 skipflags &= self.flags(rev)
3031 3031
3032 3032 _verify_revision(self, skipflags, state, node)
3033 3033
3034 3034 l1 = self.rawsize(rev)
3035 3035 l2 = len(self.rawdata(node))
3036 3036
3037 3037 if l1 != l2:
3038 3038 yield revlogproblem(
3039 3039 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3040 3040 node=node,
3041 3041 )
3042 3042
3043 3043 except error.CensoredNodeError:
3044 3044 if state[b'erroroncensored']:
3045 3045 yield revlogproblem(
3046 3046 error=_(b'censored file data'), node=node
3047 3047 )
3048 3048 state[b'skipread'].add(node)
3049 3049 except Exception as e:
3050 3050 yield revlogproblem(
3051 3051 error=_(b'unpacking %s: %s')
3052 3052 % (short(node), stringutil.forcebytestr(e)),
3053 3053 node=node,
3054 3054 )
3055 3055 state[b'skipread'].add(node)
3056 3056
3057 3057 def storageinfo(
3058 3058 self,
3059 3059 exclusivefiles=False,
3060 3060 sharedfiles=False,
3061 3061 revisionscount=False,
3062 3062 trackedsize=False,
3063 3063 storedsize=False,
3064 3064 ):
3065 3065 d = {}
3066 3066
3067 3067 if exclusivefiles:
3068 3068 d[b'exclusivefiles'] = [(self.opener, self.indexfile)]
3069 3069 if not self._inline:
3070 3070 d[b'exclusivefiles'].append((self.opener, self.datafile))
3071 3071
3072 3072 if sharedfiles:
3073 3073 d[b'sharedfiles'] = []
3074 3074
3075 3075 if revisionscount:
3076 3076 d[b'revisionscount'] = len(self)
3077 3077
3078 3078 if trackedsize:
3079 3079 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
3080 3080
3081 3081 if storedsize:
3082 3082 d[b'storedsize'] = sum(
3083 3083 self.opener.stat(path).st_size for path in self.files()
3084 3084 )
3085 3085
3086 3086 return d
General Comments 0
You need to be logged in to leave comments. Login now