##// END OF EJS Templates
changegroup: refactor emitrevision to use a `deltamode` argument...
Boris Feld -
r40456:59a870a4 default
parent child Browse files
Show More
@@ -1,1853 +1,1857
1 1 # repository.py - Interfaces and base classes for repositories and peers.
2 2 #
3 3 # Copyright 2017 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 from .i18n import _
11 11 from . import (
12 12 error,
13 13 )
14 14 from .utils import (
15 15 interfaceutil,
16 16 )
17 17
18 18 # When narrowing is finalized and no longer subject to format changes,
19 19 # we should move this to just "narrow" or similar.
20 20 NARROW_REQUIREMENT = 'narrowhg-experimental'
21 21
22 22 # Local repository feature string.
23 23
24 24 # Revlogs are being used for file storage.
25 25 REPO_FEATURE_REVLOG_FILE_STORAGE = b'revlogfilestorage'
26 26 # The storage part of the repository is shared from an external source.
27 27 REPO_FEATURE_SHARED_STORAGE = b'sharedstore'
28 28 # LFS supported for backing file storage.
29 29 REPO_FEATURE_LFS = b'lfs'
30 30 # Repository supports being stream cloned.
31 31 REPO_FEATURE_STREAM_CLONE = b'streamclone'
32 32 # Files storage may lack data for all ancestors.
33 33 REPO_FEATURE_SHALLOW_FILE_STORAGE = b'shallowfilestorage'
34 34
35 35 REVISION_FLAG_CENSORED = 1 << 15
36 36 REVISION_FLAG_ELLIPSIS = 1 << 14
37 37 REVISION_FLAG_EXTSTORED = 1 << 13
38 38
39 39 REVISION_FLAGS_KNOWN = (
40 40 REVISION_FLAG_CENSORED | REVISION_FLAG_ELLIPSIS | REVISION_FLAG_EXTSTORED)
41 41
42 CG_DELTAMODE_STD = b'default'
43 CG_DELTAMODE_PREV = b'previous'
44 CG_DELTAMODE_FULL = b'fulltext'
45
42 46 class ipeerconnection(interfaceutil.Interface):
43 47 """Represents a "connection" to a repository.
44 48
45 49 This is the base interface for representing a connection to a repository.
46 50 It holds basic properties and methods applicable to all peer types.
47 51
48 52 This is not a complete interface definition and should not be used
49 53 outside of this module.
50 54 """
51 55 ui = interfaceutil.Attribute("""ui.ui instance""")
52 56
53 57 def url():
54 58 """Returns a URL string representing this peer.
55 59
56 60 Currently, implementations expose the raw URL used to construct the
57 61 instance. It may contain credentials as part of the URL. The
58 62 expectations of the value aren't well-defined and this could lead to
59 63 data leakage.
60 64
61 65 TODO audit/clean consumers and more clearly define the contents of this
62 66 value.
63 67 """
64 68
65 69 def local():
66 70 """Returns a local repository instance.
67 71
68 72 If the peer represents a local repository, returns an object that
69 73 can be used to interface with it. Otherwise returns ``None``.
70 74 """
71 75
72 76 def peer():
73 77 """Returns an object conforming to this interface.
74 78
75 79 Most implementations will ``return self``.
76 80 """
77 81
78 82 def canpush():
79 83 """Returns a boolean indicating if this peer can be pushed to."""
80 84
81 85 def close():
82 86 """Close the connection to this peer.
83 87
84 88 This is called when the peer will no longer be used. Resources
85 89 associated with the peer should be cleaned up.
86 90 """
87 91
88 92 class ipeercapabilities(interfaceutil.Interface):
89 93 """Peer sub-interface related to capabilities."""
90 94
91 95 def capable(name):
92 96 """Determine support for a named capability.
93 97
94 98 Returns ``False`` if capability not supported.
95 99
96 100 Returns ``True`` if boolean capability is supported. Returns a string
97 101 if capability support is non-boolean.
98 102
99 103 Capability strings may or may not map to wire protocol capabilities.
100 104 """
101 105
102 106 def requirecap(name, purpose):
103 107 """Require a capability to be present.
104 108
105 109 Raises a ``CapabilityError`` if the capability isn't present.
106 110 """
107 111
108 112 class ipeercommands(interfaceutil.Interface):
109 113 """Client-side interface for communicating over the wire protocol.
110 114
111 115 This interface is used as a gateway to the Mercurial wire protocol.
112 116 methods commonly call wire protocol commands of the same name.
113 117 """
114 118
115 119 def branchmap():
116 120 """Obtain heads in named branches.
117 121
118 122 Returns a dict mapping branch name to an iterable of nodes that are
119 123 heads on that branch.
120 124 """
121 125
122 126 def capabilities():
123 127 """Obtain capabilities of the peer.
124 128
125 129 Returns a set of string capabilities.
126 130 """
127 131
128 132 def clonebundles():
129 133 """Obtains the clone bundles manifest for the repo.
130 134
131 135 Returns the manifest as unparsed bytes.
132 136 """
133 137
134 138 def debugwireargs(one, two, three=None, four=None, five=None):
135 139 """Used to facilitate debugging of arguments passed over the wire."""
136 140
137 141 def getbundle(source, **kwargs):
138 142 """Obtain remote repository data as a bundle.
139 143
140 144 This command is how the bulk of repository data is transferred from
141 145 the peer to the local repository
142 146
143 147 Returns a generator of bundle data.
144 148 """
145 149
146 150 def heads():
147 151 """Determine all known head revisions in the peer.
148 152
149 153 Returns an iterable of binary nodes.
150 154 """
151 155
152 156 def known(nodes):
153 157 """Determine whether multiple nodes are known.
154 158
155 159 Accepts an iterable of nodes whose presence to check for.
156 160
157 161 Returns an iterable of booleans indicating of the corresponding node
158 162 at that index is known to the peer.
159 163 """
160 164
161 165 def listkeys(namespace):
162 166 """Obtain all keys in a pushkey namespace.
163 167
164 168 Returns an iterable of key names.
165 169 """
166 170
167 171 def lookup(key):
168 172 """Resolve a value to a known revision.
169 173
170 174 Returns a binary node of the resolved revision on success.
171 175 """
172 176
173 177 def pushkey(namespace, key, old, new):
174 178 """Set a value using the ``pushkey`` protocol.
175 179
176 180 Arguments correspond to the pushkey namespace and key to operate on and
177 181 the old and new values for that key.
178 182
179 183 Returns a string with the peer result. The value inside varies by the
180 184 namespace.
181 185 """
182 186
183 187 def stream_out():
184 188 """Obtain streaming clone data.
185 189
186 190 Successful result should be a generator of data chunks.
187 191 """
188 192
189 193 def unbundle(bundle, heads, url):
190 194 """Transfer repository data to the peer.
191 195
192 196 This is how the bulk of data during a push is transferred.
193 197
194 198 Returns the integer number of heads added to the peer.
195 199 """
196 200
197 201 class ipeerlegacycommands(interfaceutil.Interface):
198 202 """Interface for implementing support for legacy wire protocol commands.
199 203
200 204 Wire protocol commands transition to legacy status when they are no longer
201 205 used by modern clients. To facilitate identifying which commands are
202 206 legacy, the interfaces are split.
203 207 """
204 208
205 209 def between(pairs):
206 210 """Obtain nodes between pairs of nodes.
207 211
208 212 ``pairs`` is an iterable of node pairs.
209 213
210 214 Returns an iterable of iterables of nodes corresponding to each
211 215 requested pair.
212 216 """
213 217
214 218 def branches(nodes):
215 219 """Obtain ancestor changesets of specific nodes back to a branch point.
216 220
217 221 For each requested node, the peer finds the first ancestor node that is
218 222 a DAG root or is a merge.
219 223
220 224 Returns an iterable of iterables with the resolved values for each node.
221 225 """
222 226
223 227 def changegroup(nodes, source):
224 228 """Obtain a changegroup with data for descendants of specified nodes."""
225 229
226 230 def changegroupsubset(bases, heads, source):
227 231 pass
228 232
229 233 class ipeercommandexecutor(interfaceutil.Interface):
230 234 """Represents a mechanism to execute remote commands.
231 235
232 236 This is the primary interface for requesting that wire protocol commands
233 237 be executed. Instances of this interface are active in a context manager
234 238 and have a well-defined lifetime. When the context manager exits, all
235 239 outstanding requests are waited on.
236 240 """
237 241
238 242 def callcommand(name, args):
239 243 """Request that a named command be executed.
240 244
241 245 Receives the command name and a dictionary of command arguments.
242 246
243 247 Returns a ``concurrent.futures.Future`` that will resolve to the
244 248 result of that command request. That exact value is left up to
245 249 the implementation and possibly varies by command.
246 250
247 251 Not all commands can coexist with other commands in an executor
248 252 instance: it depends on the underlying wire protocol transport being
249 253 used and the command itself.
250 254
251 255 Implementations MAY call ``sendcommands()`` automatically if the
252 256 requested command can not coexist with other commands in this executor.
253 257
254 258 Implementations MAY call ``sendcommands()`` automatically when the
255 259 future's ``result()`` is called. So, consumers using multiple
256 260 commands with an executor MUST ensure that ``result()`` is not called
257 261 until all command requests have been issued.
258 262 """
259 263
260 264 def sendcommands():
261 265 """Trigger submission of queued command requests.
262 266
263 267 Not all transports submit commands as soon as they are requested to
264 268 run. When called, this method forces queued command requests to be
265 269 issued. It will no-op if all commands have already been sent.
266 270
267 271 When called, no more new commands may be issued with this executor.
268 272 """
269 273
270 274 def close():
271 275 """Signal that this command request is finished.
272 276
273 277 When called, no more new commands may be issued. All outstanding
274 278 commands that have previously been issued are waited on before
275 279 returning. This not only includes waiting for the futures to resolve,
276 280 but also waiting for all response data to arrive. In other words,
277 281 calling this waits for all on-wire state for issued command requests
278 282 to finish.
279 283
280 284 When used as a context manager, this method is called when exiting the
281 285 context manager.
282 286
283 287 This method may call ``sendcommands()`` if there are buffered commands.
284 288 """
285 289
286 290 class ipeerrequests(interfaceutil.Interface):
287 291 """Interface for executing commands on a peer."""
288 292
289 293 def commandexecutor():
290 294 """A context manager that resolves to an ipeercommandexecutor.
291 295
292 296 The object this resolves to can be used to issue command requests
293 297 to the peer.
294 298
295 299 Callers should call its ``callcommand`` method to issue command
296 300 requests.
297 301
298 302 A new executor should be obtained for each distinct set of commands
299 303 (possibly just a single command) that the consumer wants to execute
300 304 as part of a single operation or round trip. This is because some
301 305 peers are half-duplex and/or don't support persistent connections.
302 306 e.g. in the case of HTTP peers, commands sent to an executor represent
303 307 a single HTTP request. While some peers may support multiple command
304 308 sends over the wire per executor, consumers need to code to the least
305 309 capable peer. So it should be assumed that command executors buffer
306 310 called commands until they are told to send them and that each
307 311 command executor could result in a new connection or wire-level request
308 312 being issued.
309 313 """
310 314
311 315 class ipeerbase(ipeerconnection, ipeercapabilities, ipeerrequests):
312 316 """Unified interface for peer repositories.
313 317
314 318 All peer instances must conform to this interface.
315 319 """
316 320
317 321 class ipeerv2(ipeerconnection, ipeercapabilities, ipeerrequests):
318 322 """Unified peer interface for wire protocol version 2 peers."""
319 323
320 324 apidescriptor = interfaceutil.Attribute(
321 325 """Data structure holding description of server API.""")
322 326
323 327 @interfaceutil.implementer(ipeerbase)
324 328 class peer(object):
325 329 """Base class for peer repositories."""
326 330
327 331 def capable(self, name):
328 332 caps = self.capabilities()
329 333 if name in caps:
330 334 return True
331 335
332 336 name = '%s=' % name
333 337 for cap in caps:
334 338 if cap.startswith(name):
335 339 return cap[len(name):]
336 340
337 341 return False
338 342
339 343 def requirecap(self, name, purpose):
340 344 if self.capable(name):
341 345 return
342 346
343 347 raise error.CapabilityError(
344 348 _('cannot %s; remote repository does not support the %r '
345 349 'capability') % (purpose, name))
346 350
347 351 class iverifyproblem(interfaceutil.Interface):
348 352 """Represents a problem with the integrity of the repository.
349 353
350 354 Instances of this interface are emitted to describe an integrity issue
351 355 with a repository (e.g. corrupt storage, missing data, etc).
352 356
353 357 Instances are essentially messages associated with severity.
354 358 """
355 359 warning = interfaceutil.Attribute(
356 360 """Message indicating a non-fatal problem.""")
357 361
358 362 error = interfaceutil.Attribute(
359 363 """Message indicating a fatal problem.""")
360 364
361 365 node = interfaceutil.Attribute(
362 366 """Revision encountering the problem.
363 367
364 368 ``None`` means the problem doesn't apply to a single revision.
365 369 """)
366 370
367 371 class irevisiondelta(interfaceutil.Interface):
368 372 """Represents a delta between one revision and another.
369 373
370 374 Instances convey enough information to allow a revision to be exchanged
371 375 with another repository.
372 376
373 377 Instances represent the fulltext revision data or a delta against
374 378 another revision. Therefore the ``revision`` and ``delta`` attributes
375 379 are mutually exclusive.
376 380
377 381 Typically used for changegroup generation.
378 382 """
379 383
380 384 node = interfaceutil.Attribute(
381 385 """20 byte node of this revision.""")
382 386
383 387 p1node = interfaceutil.Attribute(
384 388 """20 byte node of 1st parent of this revision.""")
385 389
386 390 p2node = interfaceutil.Attribute(
387 391 """20 byte node of 2nd parent of this revision.""")
388 392
389 393 linknode = interfaceutil.Attribute(
390 394 """20 byte node of the changelog revision this node is linked to.""")
391 395
392 396 flags = interfaceutil.Attribute(
393 397 """2 bytes of integer flags that apply to this revision.
394 398
395 399 This is a bitwise composition of the ``REVISION_FLAG_*`` constants.
396 400 """)
397 401
398 402 basenode = interfaceutil.Attribute(
399 403 """20 byte node of the revision this data is a delta against.
400 404
401 405 ``nullid`` indicates that the revision is a full revision and not
402 406 a delta.
403 407 """)
404 408
405 409 baserevisionsize = interfaceutil.Attribute(
406 410 """Size of base revision this delta is against.
407 411
408 412 May be ``None`` if ``basenode`` is ``nullid``.
409 413 """)
410 414
411 415 revision = interfaceutil.Attribute(
412 416 """Raw fulltext of revision data for this node.""")
413 417
414 418 delta = interfaceutil.Attribute(
415 419 """Delta between ``basenode`` and ``node``.
416 420
417 421 Stored in the bdiff delta format.
418 422 """)
419 423
420 424 class ifilerevisionssequence(interfaceutil.Interface):
421 425 """Contains index data for all revisions of a file.
422 426
423 427 Types implementing this behave like lists of tuples. The index
424 428 in the list corresponds to the revision number. The values contain
425 429 index metadata.
426 430
427 431 The *null* revision (revision number -1) is always the last item
428 432 in the index.
429 433 """
430 434
431 435 def __len__():
432 436 """The total number of revisions."""
433 437
434 438 def __getitem__(rev):
435 439 """Returns the object having a specific revision number.
436 440
437 441 Returns an 8-tuple with the following fields:
438 442
439 443 offset+flags
440 444 Contains the offset and flags for the revision. 64-bit unsigned
441 445 integer where first 6 bytes are the offset and the next 2 bytes
442 446 are flags. The offset can be 0 if it is not used by the store.
443 447 compressed size
444 448 Size of the revision data in the store. It can be 0 if it isn't
445 449 needed by the store.
446 450 uncompressed size
447 451 Fulltext size. It can be 0 if it isn't needed by the store.
448 452 base revision
449 453 Revision number of revision the delta for storage is encoded
450 454 against. -1 indicates not encoded against a base revision.
451 455 link revision
452 456 Revision number of changelog revision this entry is related to.
453 457 p1 revision
454 458 Revision number of 1st parent. -1 if no 1st parent.
455 459 p2 revision
456 460 Revision number of 2nd parent. -1 if no 1st parent.
457 461 node
458 462 Binary node value for this revision number.
459 463
460 464 Negative values should index off the end of the sequence. ``-1``
461 465 should return the null revision. ``-2`` should return the most
462 466 recent revision.
463 467 """
464 468
465 469 def __contains__(rev):
466 470 """Whether a revision number exists."""
467 471
468 472 def insert(self, i, entry):
469 473 """Add an item to the index at specific revision."""
470 474
471 475 class ifileindex(interfaceutil.Interface):
472 476 """Storage interface for index data of a single file.
473 477
474 478 File storage data is divided into index metadata and data storage.
475 479 This interface defines the index portion of the interface.
476 480
477 481 The index logically consists of:
478 482
479 483 * A mapping between revision numbers and nodes.
480 484 * DAG data (storing and querying the relationship between nodes).
481 485 * Metadata to facilitate storage.
482 486 """
483 487 def __len__():
484 488 """Obtain the number of revisions stored for this file."""
485 489
486 490 def __iter__():
487 491 """Iterate over revision numbers for this file."""
488 492
489 493 def hasnode(node):
490 494 """Returns a bool indicating if a node is known to this store.
491 495
492 496 Implementations must only return True for full, binary node values:
493 497 hex nodes, revision numbers, and partial node matches must be
494 498 rejected.
495 499
496 500 The null node is never present.
497 501 """
498 502
499 503 def revs(start=0, stop=None):
500 504 """Iterate over revision numbers for this file, with control."""
501 505
502 506 def parents(node):
503 507 """Returns a 2-tuple of parent nodes for a revision.
504 508
505 509 Values will be ``nullid`` if the parent is empty.
506 510 """
507 511
508 512 def parentrevs(rev):
509 513 """Like parents() but operates on revision numbers."""
510 514
511 515 def rev(node):
512 516 """Obtain the revision number given a node.
513 517
514 518 Raises ``error.LookupError`` if the node is not known.
515 519 """
516 520
517 521 def node(rev):
518 522 """Obtain the node value given a revision number.
519 523
520 524 Raises ``IndexError`` if the node is not known.
521 525 """
522 526
523 527 def lookup(node):
524 528 """Attempt to resolve a value to a node.
525 529
526 530 Value can be a binary node, hex node, revision number, or a string
527 531 that can be converted to an integer.
528 532
529 533 Raises ``error.LookupError`` if a node could not be resolved.
530 534 """
531 535
532 536 def linkrev(rev):
533 537 """Obtain the changeset revision number a revision is linked to."""
534 538
535 539 def iscensored(rev):
536 540 """Return whether a revision's content has been censored."""
537 541
538 542 def commonancestorsheads(node1, node2):
539 543 """Obtain an iterable of nodes containing heads of common ancestors.
540 544
541 545 See ``ancestor.commonancestorsheads()``.
542 546 """
543 547
544 548 def descendants(revs):
545 549 """Obtain descendant revision numbers for a set of revision numbers.
546 550
547 551 If ``nullrev`` is in the set, this is equivalent to ``revs()``.
548 552 """
549 553
550 554 def heads(start=None, stop=None):
551 555 """Obtain a list of nodes that are DAG heads, with control.
552 556
553 557 The set of revisions examined can be limited by specifying
554 558 ``start`` and ``stop``. ``start`` is a node. ``stop`` is an
555 559 iterable of nodes. DAG traversal starts at earlier revision
556 560 ``start`` and iterates forward until any node in ``stop`` is
557 561 encountered.
558 562 """
559 563
560 564 def children(node):
561 565 """Obtain nodes that are children of a node.
562 566
563 567 Returns a list of nodes.
564 568 """
565 569
566 570 class ifiledata(interfaceutil.Interface):
567 571 """Storage interface for data storage of a specific file.
568 572
569 573 This complements ``ifileindex`` and provides an interface for accessing
570 574 data for a tracked file.
571 575 """
572 576 def size(rev):
573 577 """Obtain the fulltext size of file data.
574 578
575 579 Any metadata is excluded from size measurements.
576 580 """
577 581
578 582 def revision(node, raw=False):
579 583 """"Obtain fulltext data for a node.
580 584
581 585 By default, any storage transformations are applied before the data
582 586 is returned. If ``raw`` is True, non-raw storage transformations
583 587 are not applied.
584 588
585 589 The fulltext data may contain a header containing metadata. Most
586 590 consumers should use ``read()`` to obtain the actual file data.
587 591 """
588 592
589 593 def read(node):
590 594 """Resolve file fulltext data.
591 595
592 596 This is similar to ``revision()`` except any metadata in the data
593 597 headers is stripped.
594 598 """
595 599
596 600 def renamed(node):
597 601 """Obtain copy metadata for a node.
598 602
599 603 Returns ``False`` if no copy metadata is stored or a 2-tuple of
600 604 (path, node) from which this revision was copied.
601 605 """
602 606
603 607 def cmp(node, fulltext):
604 608 """Compare fulltext to another revision.
605 609
606 610 Returns True if the fulltext is different from what is stored.
607 611
608 612 This takes copy metadata into account.
609 613
610 614 TODO better document the copy metadata and censoring logic.
611 615 """
612 616
613 617 def emitrevisions(nodes,
614 618 nodesorder=None,
615 619 revisiondata=False,
616 620 assumehaveparentrevisions=False,
617 621 deltaprevious=False):
618 622 """Produce ``irevisiondelta`` for revisions.
619 623
620 624 Given an iterable of nodes, emits objects conforming to the
621 625 ``irevisiondelta`` interface that describe revisions in storage.
622 626
623 627 This method is a generator.
624 628
625 629 The input nodes may be unordered. Implementations must ensure that a
626 630 node's parents are emitted before the node itself. Transitively, this
627 631 means that a node may only be emitted once all its ancestors in
628 632 ``nodes`` have also been emitted.
629 633
630 634 By default, emits "index" data (the ``node``, ``p1node``, and
631 635 ``p2node`` attributes). If ``revisiondata`` is set, revision data
632 636 will also be present on the emitted objects.
633 637
634 638 With default argument values, implementations can choose to emit
635 639 either fulltext revision data or a delta. When emitting deltas,
636 640 implementations must consider whether the delta's base revision
637 641 fulltext is available to the receiver.
638 642
639 643 The base revision fulltext is guaranteed to be available if any of
640 644 the following are met:
641 645
642 646 * Its fulltext revision was emitted by this method call.
643 647 * A delta for that revision was emitted by this method call.
644 648 * ``assumehaveparentrevisions`` is True and the base revision is a
645 649 parent of the node.
646 650
647 651 ``nodesorder`` can be used to control the order that revisions are
648 652 emitted. By default, revisions can be reordered as long as they are
649 653 in DAG topological order (see above). If the value is ``nodes``,
650 654 the iteration order from ``nodes`` should be used. If the value is
651 655 ``storage``, then the native order from the backing storage layer
652 656 is used. (Not all storage layers will have strong ordering and behavior
653 657 of this mode is storage-dependent.) ``nodes`` ordering can force
654 658 revisions to be emitted before their ancestors, so consumers should
655 659 use it with care.
656 660
657 661 The ``linknode`` attribute on the returned ``irevisiondelta`` may not
658 662 be set and it is the caller's responsibility to resolve it, if needed.
659 663
660 664 If ``deltaprevious`` is True and revision data is requested, all
661 665 revision data should be emitted as deltas against the revision
662 666 emitted just prior. The initial revision should be a delta against
663 667 its 1st parent.
664 668 """
665 669
666 670 class ifilemutation(interfaceutil.Interface):
667 671 """Storage interface for mutation events of a tracked file."""
668 672
669 673 def add(filedata, meta, transaction, linkrev, p1, p2):
670 674 """Add a new revision to the store.
671 675
672 676 Takes file data, dictionary of metadata, a transaction, linkrev,
673 677 and parent nodes.
674 678
675 679 Returns the node that was added.
676 680
677 681 May no-op if a revision matching the supplied data is already stored.
678 682 """
679 683
680 684 def addrevision(revisiondata, transaction, linkrev, p1, p2, node=None,
681 685 flags=0, cachedelta=None):
682 686 """Add a new revision to the store.
683 687
684 688 This is similar to ``add()`` except it operates at a lower level.
685 689
686 690 The data passed in already contains a metadata header, if any.
687 691
688 692 ``node`` and ``flags`` can be used to define the expected node and
689 693 the flags to use with storage. ``flags`` is a bitwise value composed
690 694 of the various ``REVISION_FLAG_*`` constants.
691 695
692 696 ``add()`` is usually called when adding files from e.g. the working
693 697 directory. ``addrevision()`` is often called by ``add()`` and for
694 698 scenarios where revision data has already been computed, such as when
695 699 applying raw data from a peer repo.
696 700 """
697 701
698 702 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None,
699 703 maybemissingparents=False):
700 704 """Process a series of deltas for storage.
701 705
702 706 ``deltas`` is an iterable of 7-tuples of
703 707 (node, p1, p2, linknode, deltabase, delta, flags) defining revisions
704 708 to add.
705 709
706 710 The ``delta`` field contains ``mpatch`` data to apply to a base
707 711 revision, identified by ``deltabase``. The base node can be
708 712 ``nullid``, in which case the header from the delta can be ignored
709 713 and the delta used as the fulltext.
710 714
711 715 ``addrevisioncb`` should be called for each node as it is committed.
712 716
713 717 ``maybemissingparents`` is a bool indicating whether the incoming
714 718 data may reference parents/ancestor revisions that aren't present.
715 719 This flag is set when receiving data into a "shallow" store that
716 720 doesn't hold all history.
717 721
718 722 Returns a list of nodes that were processed. A node will be in the list
719 723 even if it existed in the store previously.
720 724 """
721 725
722 726 def censorrevision(tr, node, tombstone=b''):
723 727 """Remove the content of a single revision.
724 728
725 729 The specified ``node`` will have its content purged from storage.
726 730 Future attempts to access the revision data for this node will
727 731 result in failure.
728 732
729 733 A ``tombstone`` message can optionally be stored. This message may be
730 734 displayed to users when they attempt to access the missing revision
731 735 data.
732 736
733 737 Storage backends may have stored deltas against the previous content
734 738 in this revision. As part of censoring a revision, these storage
735 739 backends are expected to rewrite any internally stored deltas such
736 740 that they no longer reference the deleted content.
737 741 """
738 742
739 743 def getstrippoint(minlink):
740 744 """Find the minimum revision that must be stripped to strip a linkrev.
741 745
742 746 Returns a 2-tuple containing the minimum revision number and a set
743 747 of all revisions numbers that would be broken by this strip.
744 748
745 749 TODO this is highly revlog centric and should be abstracted into
746 750 a higher-level deletion API. ``repair.strip()`` relies on this.
747 751 """
748 752
749 753 def strip(minlink, transaction):
750 754 """Remove storage of items starting at a linkrev.
751 755
752 756 This uses ``getstrippoint()`` to determine the first node to remove.
753 757 Then it effectively truncates storage for all revisions after that.
754 758
755 759 TODO this is highly revlog centric and should be abstracted into a
756 760 higher-level deletion API.
757 761 """
758 762
759 763 class ifilestorage(ifileindex, ifiledata, ifilemutation):
760 764 """Complete storage interface for a single tracked file."""
761 765
762 766 def files():
763 767 """Obtain paths that are backing storage for this file.
764 768
765 769 TODO this is used heavily by verify code and there should probably
766 770 be a better API for that.
767 771 """
768 772
769 773 def storageinfo(exclusivefiles=False, sharedfiles=False,
770 774 revisionscount=False, trackedsize=False,
771 775 storedsize=False):
772 776 """Obtain information about storage for this file's data.
773 777
774 778 Returns a dict describing storage for this tracked path. The keys
775 779 in the dict map to arguments of the same. The arguments are bools
776 780 indicating whether to calculate and obtain that data.
777 781
778 782 exclusivefiles
779 783 Iterable of (vfs, path) describing files that are exclusively
780 784 used to back storage for this tracked path.
781 785
782 786 sharedfiles
783 787 Iterable of (vfs, path) describing files that are used to back
784 788 storage for this tracked path. Those files may also provide storage
785 789 for other stored entities.
786 790
787 791 revisionscount
788 792 Number of revisions available for retrieval.
789 793
790 794 trackedsize
791 795 Total size in bytes of all tracked revisions. This is a sum of the
792 796 length of the fulltext of all revisions.
793 797
794 798 storedsize
795 799 Total size in bytes used to store data for all tracked revisions.
796 800 This is commonly less than ``trackedsize`` due to internal usage
797 801 of deltas rather than fulltext revisions.
798 802
799 803 Not all storage backends may support all queries are have a reasonable
800 804 value to use. In that case, the value should be set to ``None`` and
801 805 callers are expected to handle this special value.
802 806 """
803 807
804 808 def verifyintegrity(state):
805 809 """Verifies the integrity of file storage.
806 810
807 811 ``state`` is a dict holding state of the verifier process. It can be
808 812 used to communicate data between invocations of multiple storage
809 813 primitives.
810 814
811 815 If individual revisions cannot have their revision content resolved,
812 816 the method is expected to set the ``skipread`` key to a set of nodes
813 817 that encountered problems.
814 818
815 819 The method yields objects conforming to the ``iverifyproblem``
816 820 interface.
817 821 """
818 822
819 823 class idirs(interfaceutil.Interface):
820 824 """Interface representing a collection of directories from paths.
821 825
822 826 This interface is essentially a derived data structure representing
823 827 directories from a collection of paths.
824 828 """
825 829
826 830 def addpath(path):
827 831 """Add a path to the collection.
828 832
829 833 All directories in the path will be added to the collection.
830 834 """
831 835
832 836 def delpath(path):
833 837 """Remove a path from the collection.
834 838
835 839 If the removal was the last path in a particular directory, the
836 840 directory is removed from the collection.
837 841 """
838 842
839 843 def __iter__():
840 844 """Iterate over the directories in this collection of paths."""
841 845
842 846 def __contains__(path):
843 847 """Whether a specific directory is in this collection."""
844 848
845 849 class imanifestdict(interfaceutil.Interface):
846 850 """Interface representing a manifest data structure.
847 851
848 852 A manifest is effectively a dict mapping paths to entries. Each entry
849 853 consists of a binary node and extra flags affecting that entry.
850 854 """
851 855
852 856 def __getitem__(path):
853 857 """Returns the binary node value for a path in the manifest.
854 858
855 859 Raises ``KeyError`` if the path does not exist in the manifest.
856 860
857 861 Equivalent to ``self.find(path)[0]``.
858 862 """
859 863
860 864 def find(path):
861 865 """Returns the entry for a path in the manifest.
862 866
863 867 Returns a 2-tuple of (node, flags).
864 868
865 869 Raises ``KeyError`` if the path does not exist in the manifest.
866 870 """
867 871
868 872 def __len__():
869 873 """Return the number of entries in the manifest."""
870 874
871 875 def __nonzero__():
872 876 """Returns True if the manifest has entries, False otherwise."""
873 877
874 878 __bool__ = __nonzero__
875 879
876 880 def __setitem__(path, node):
877 881 """Define the node value for a path in the manifest.
878 882
879 883 If the path is already in the manifest, its flags will be copied to
880 884 the new entry.
881 885 """
882 886
883 887 def __contains__(path):
884 888 """Whether a path exists in the manifest."""
885 889
886 890 def __delitem__(path):
887 891 """Remove a path from the manifest.
888 892
889 893 Raises ``KeyError`` if the path is not in the manifest.
890 894 """
891 895
892 896 def __iter__():
893 897 """Iterate over paths in the manifest."""
894 898
895 899 def iterkeys():
896 900 """Iterate over paths in the manifest."""
897 901
898 902 def keys():
899 903 """Obtain a list of paths in the manifest."""
900 904
901 905 def filesnotin(other, match=None):
902 906 """Obtain the set of paths in this manifest but not in another.
903 907
904 908 ``match`` is an optional matcher function to be applied to both
905 909 manifests.
906 910
907 911 Returns a set of paths.
908 912 """
909 913
910 914 def dirs():
911 915 """Returns an object implementing the ``idirs`` interface."""
912 916
913 917 def hasdir(dir):
914 918 """Returns a bool indicating if a directory is in this manifest."""
915 919
916 920 def matches(match):
917 921 """Generate a new manifest filtered through a matcher.
918 922
919 923 Returns an object conforming to the ``imanifestdict`` interface.
920 924 """
921 925
922 926 def walk(match):
923 927 """Generator of paths in manifest satisfying a matcher.
924 928
925 929 This is equivalent to ``self.matches(match).iterkeys()`` except a new
926 930 manifest object is not created.
927 931
928 932 If the matcher has explicit files listed and they don't exist in
929 933 the manifest, ``match.bad()`` is called for each missing file.
930 934 """
931 935
932 936 def diff(other, match=None, clean=False):
933 937 """Find differences between this manifest and another.
934 938
935 939 This manifest is compared to ``other``.
936 940
937 941 If ``match`` is provided, the two manifests are filtered against this
938 942 matcher and only entries satisfying the matcher are compared.
939 943
940 944 If ``clean`` is True, unchanged files are included in the returned
941 945 object.
942 946
943 947 Returns a dict with paths as keys and values of 2-tuples of 2-tuples of
944 948 the form ``((node1, flag1), (node2, flag2))`` where ``(node1, flag1)``
945 949 represents the node and flags for this manifest and ``(node2, flag2)``
946 950 are the same for the other manifest.
947 951 """
948 952
949 953 def setflag(path, flag):
950 954 """Set the flag value for a given path.
951 955
952 956 Raises ``KeyError`` if the path is not already in the manifest.
953 957 """
954 958
955 959 def get(path, default=None):
956 960 """Obtain the node value for a path or a default value if missing."""
957 961
958 962 def flags(path, default=''):
959 963 """Return the flags value for a path or a default value if missing."""
960 964
961 965 def copy():
962 966 """Return a copy of this manifest."""
963 967
964 968 def items():
965 969 """Returns an iterable of (path, node) for items in this manifest."""
966 970
967 971 def iteritems():
968 972 """Identical to items()."""
969 973
970 974 def iterentries():
971 975 """Returns an iterable of (path, node, flags) for this manifest.
972 976
973 977 Similar to ``iteritems()`` except items are a 3-tuple and include
974 978 flags.
975 979 """
976 980
977 981 def text():
978 982 """Obtain the raw data representation for this manifest.
979 983
980 984 Result is used to create a manifest revision.
981 985 """
982 986
983 987 def fastdelta(base, changes):
984 988 """Obtain a delta between this manifest and another given changes.
985 989
986 990 ``base`` in the raw data representation for another manifest.
987 991
988 992 ``changes`` is an iterable of ``(path, to_delete)``.
989 993
990 994 Returns a 2-tuple containing ``bytearray(self.text())`` and the
991 995 delta between ``base`` and this manifest.
992 996 """
993 997
994 998 class imanifestrevisionbase(interfaceutil.Interface):
995 999 """Base interface representing a single revision of a manifest.
996 1000
997 1001 Should not be used as a primary interface: should always be inherited
998 1002 as part of a larger interface.
999 1003 """
1000 1004
1001 1005 def new():
1002 1006 """Obtain a new manifest instance.
1003 1007
1004 1008 Returns an object conforming to the ``imanifestrevisionwritable``
1005 1009 interface. The instance will be associated with the same
1006 1010 ``imanifestlog`` collection as this instance.
1007 1011 """
1008 1012
1009 1013 def copy():
1010 1014 """Obtain a copy of this manifest instance.
1011 1015
1012 1016 Returns an object conforming to the ``imanifestrevisionwritable``
1013 1017 interface. The instance will be associated with the same
1014 1018 ``imanifestlog`` collection as this instance.
1015 1019 """
1016 1020
1017 1021 def read():
1018 1022 """Obtain the parsed manifest data structure.
1019 1023
1020 1024 The returned object conforms to the ``imanifestdict`` interface.
1021 1025 """
1022 1026
1023 1027 class imanifestrevisionstored(imanifestrevisionbase):
1024 1028 """Interface representing a manifest revision committed to storage."""
1025 1029
1026 1030 def node():
1027 1031 """The binary node for this manifest."""
1028 1032
1029 1033 parents = interfaceutil.Attribute(
1030 1034 """List of binary nodes that are parents for this manifest revision."""
1031 1035 )
1032 1036
1033 1037 def readdelta(shallow=False):
1034 1038 """Obtain the manifest data structure representing changes from parent.
1035 1039
1036 1040 This manifest is compared to its 1st parent. A new manifest representing
1037 1041 those differences is constructed.
1038 1042
1039 1043 The returned object conforms to the ``imanifestdict`` interface.
1040 1044 """
1041 1045
1042 1046 def readfast(shallow=False):
1043 1047 """Calls either ``read()`` or ``readdelta()``.
1044 1048
1045 1049 The faster of the two options is called.
1046 1050 """
1047 1051
1048 1052 def find(key):
1049 1053 """Calls self.read().find(key)``.
1050 1054
1051 1055 Returns a 2-tuple of ``(node, flags)`` or raises ``KeyError``.
1052 1056 """
1053 1057
1054 1058 class imanifestrevisionwritable(imanifestrevisionbase):
1055 1059 """Interface representing a manifest revision that can be committed."""
1056 1060
1057 1061 def write(transaction, linkrev, p1node, p2node, added, removed, match=None):
1058 1062 """Add this revision to storage.
1059 1063
1060 1064 Takes a transaction object, the changeset revision number it will
1061 1065 be associated with, its parent nodes, and lists of added and
1062 1066 removed paths.
1063 1067
1064 1068 If match is provided, storage can choose not to inspect or write out
1065 1069 items that do not match. Storage is still required to be able to provide
1066 1070 the full manifest in the future for any directories written (these
1067 1071 manifests should not be "narrowed on disk").
1068 1072
1069 1073 Returns the binary node of the created revision.
1070 1074 """
1071 1075
1072 1076 class imanifeststorage(interfaceutil.Interface):
1073 1077 """Storage interface for manifest data."""
1074 1078
1075 1079 tree = interfaceutil.Attribute(
1076 1080 """The path to the directory this manifest tracks.
1077 1081
1078 1082 The empty bytestring represents the root manifest.
1079 1083 """)
1080 1084
1081 1085 index = interfaceutil.Attribute(
1082 1086 """An ``ifilerevisionssequence`` instance.""")
1083 1087
1084 1088 indexfile = interfaceutil.Attribute(
1085 1089 """Path of revlog index file.
1086 1090
1087 1091 TODO this is revlog specific and should not be exposed.
1088 1092 """)
1089 1093
1090 1094 opener = interfaceutil.Attribute(
1091 1095 """VFS opener to use to access underlying files used for storage.
1092 1096
1093 1097 TODO this is revlog specific and should not be exposed.
1094 1098 """)
1095 1099
1096 1100 version = interfaceutil.Attribute(
1097 1101 """Revlog version number.
1098 1102
1099 1103 TODO this is revlog specific and should not be exposed.
1100 1104 """)
1101 1105
1102 1106 _generaldelta = interfaceutil.Attribute(
1103 1107 """Whether generaldelta storage is being used.
1104 1108
1105 1109 TODO this is revlog specific and should not be exposed.
1106 1110 """)
1107 1111
1108 1112 fulltextcache = interfaceutil.Attribute(
1109 1113 """Dict with cache of fulltexts.
1110 1114
1111 1115 TODO this doesn't feel appropriate for the storage interface.
1112 1116 """)
1113 1117
1114 1118 def __len__():
1115 1119 """Obtain the number of revisions stored for this manifest."""
1116 1120
1117 1121 def __iter__():
1118 1122 """Iterate over revision numbers for this manifest."""
1119 1123
1120 1124 def rev(node):
1121 1125 """Obtain the revision number given a binary node.
1122 1126
1123 1127 Raises ``error.LookupError`` if the node is not known.
1124 1128 """
1125 1129
1126 1130 def node(rev):
1127 1131 """Obtain the node value given a revision number.
1128 1132
1129 1133 Raises ``error.LookupError`` if the revision is not known.
1130 1134 """
1131 1135
1132 1136 def lookup(value):
1133 1137 """Attempt to resolve a value to a node.
1134 1138
1135 1139 Value can be a binary node, hex node, revision number, or a bytes
1136 1140 that can be converted to an integer.
1137 1141
1138 1142 Raises ``error.LookupError`` if a ndoe could not be resolved.
1139 1143 """
1140 1144
1141 1145 def parents(node):
1142 1146 """Returns a 2-tuple of parent nodes for a node.
1143 1147
1144 1148 Values will be ``nullid`` if the parent is empty.
1145 1149 """
1146 1150
1147 1151 def parentrevs(rev):
1148 1152 """Like parents() but operates on revision numbers."""
1149 1153
1150 1154 def linkrev(rev):
1151 1155 """Obtain the changeset revision number a revision is linked to."""
1152 1156
1153 1157 def revision(node, _df=None, raw=False):
1154 1158 """Obtain fulltext data for a node."""
1155 1159
1156 1160 def revdiff(rev1, rev2):
1157 1161 """Obtain a delta between two revision numbers.
1158 1162
1159 1163 The returned data is the result of ``bdiff.bdiff()`` on the raw
1160 1164 revision data.
1161 1165 """
1162 1166
1163 1167 def cmp(node, fulltext):
1164 1168 """Compare fulltext to another revision.
1165 1169
1166 1170 Returns True if the fulltext is different from what is stored.
1167 1171 """
1168 1172
1169 1173 def emitrevisions(nodes,
1170 1174 nodesorder=None,
1171 1175 revisiondata=False,
1172 1176 assumehaveparentrevisions=False):
1173 1177 """Produce ``irevisiondelta`` describing revisions.
1174 1178
1175 1179 See the documentation for ``ifiledata`` for more.
1176 1180 """
1177 1181
1178 1182 def addgroup(deltas, linkmapper, transaction, addrevisioncb=None):
1179 1183 """Process a series of deltas for storage.
1180 1184
1181 1185 See the documentation in ``ifilemutation`` for more.
1182 1186 """
1183 1187
1184 1188 def rawsize(rev):
1185 1189 """Obtain the size of tracked data.
1186 1190
1187 1191 Is equivalent to ``len(m.revision(node, raw=True))``.
1188 1192
1189 1193 TODO this method is only used by upgrade code and may be removed.
1190 1194 """
1191 1195
1192 1196 def getstrippoint(minlink):
1193 1197 """Find minimum revision that must be stripped to strip a linkrev.
1194 1198
1195 1199 See the documentation in ``ifilemutation`` for more.
1196 1200 """
1197 1201
1198 1202 def strip(minlink, transaction):
1199 1203 """Remove storage of items starting at a linkrev.
1200 1204
1201 1205 See the documentation in ``ifilemutation`` for more.
1202 1206 """
1203 1207
1204 1208 def checksize():
1205 1209 """Obtain the expected sizes of backing files.
1206 1210
1207 1211 TODO this is used by verify and it should not be part of the interface.
1208 1212 """
1209 1213
1210 1214 def files():
1211 1215 """Obtain paths that are backing storage for this manifest.
1212 1216
1213 1217 TODO this is used by verify and there should probably be a better API
1214 1218 for this functionality.
1215 1219 """
1216 1220
1217 1221 def deltaparent(rev):
1218 1222 """Obtain the revision that a revision is delta'd against.
1219 1223
1220 1224 TODO delta encoding is an implementation detail of storage and should
1221 1225 not be exposed to the storage interface.
1222 1226 """
1223 1227
1224 1228 def clone(tr, dest, **kwargs):
1225 1229 """Clone this instance to another."""
1226 1230
1227 1231 def clearcaches(clear_persisted_data=False):
1228 1232 """Clear any caches associated with this instance."""
1229 1233
1230 1234 def dirlog(d):
1231 1235 """Obtain a manifest storage instance for a tree."""
1232 1236
1233 1237 def add(m, transaction, link, p1, p2, added, removed, readtree=None,
1234 1238 match=None):
1235 1239 """Add a revision to storage.
1236 1240
1237 1241 ``m`` is an object conforming to ``imanifestdict``.
1238 1242
1239 1243 ``link`` is the linkrev revision number.
1240 1244
1241 1245 ``p1`` and ``p2`` are the parent revision numbers.
1242 1246
1243 1247 ``added`` and ``removed`` are iterables of added and removed paths,
1244 1248 respectively.
1245 1249
1246 1250 ``readtree`` is a function that can be used to read the child tree(s)
1247 1251 when recursively writing the full tree structure when using
1248 1252 treemanifets.
1249 1253
1250 1254 ``match`` is a matcher that can be used to hint to storage that not all
1251 1255 paths must be inspected; this is an optimization and can be safely
1252 1256 ignored. Note that the storage must still be able to reproduce a full
1253 1257 manifest including files that did not match.
1254 1258 """
1255 1259
1256 1260 def storageinfo(exclusivefiles=False, sharedfiles=False,
1257 1261 revisionscount=False, trackedsize=False,
1258 1262 storedsize=False):
1259 1263 """Obtain information about storage for this manifest's data.
1260 1264
1261 1265 See ``ifilestorage.storageinfo()`` for a description of this method.
1262 1266 This one behaves the same way, except for manifest data.
1263 1267 """
1264 1268
1265 1269 class imanifestlog(interfaceutil.Interface):
1266 1270 """Interface representing a collection of manifest snapshots.
1267 1271
1268 1272 Represents the root manifest in a repository.
1269 1273
1270 1274 Also serves as a means to access nested tree manifests and to cache
1271 1275 tree manifests.
1272 1276 """
1273 1277
1274 1278 def __getitem__(node):
1275 1279 """Obtain a manifest instance for a given binary node.
1276 1280
1277 1281 Equivalent to calling ``self.get('', node)``.
1278 1282
1279 1283 The returned object conforms to the ``imanifestrevisionstored``
1280 1284 interface.
1281 1285 """
1282 1286
1283 1287 def get(tree, node, verify=True):
1284 1288 """Retrieve the manifest instance for a given directory and binary node.
1285 1289
1286 1290 ``node`` always refers to the node of the root manifest (which will be
1287 1291 the only manifest if flat manifests are being used).
1288 1292
1289 1293 If ``tree`` is the empty string, the root manifest is returned.
1290 1294 Otherwise the manifest for the specified directory will be returned
1291 1295 (requires tree manifests).
1292 1296
1293 1297 If ``verify`` is True, ``LookupError`` is raised if the node is not
1294 1298 known.
1295 1299
1296 1300 The returned object conforms to the ``imanifestrevisionstored``
1297 1301 interface.
1298 1302 """
1299 1303
1300 1304 def getstorage(tree):
1301 1305 """Retrieve an interface to storage for a particular tree.
1302 1306
1303 1307 If ``tree`` is the empty bytestring, storage for the root manifest will
1304 1308 be returned. Otherwise storage for a tree manifest is returned.
1305 1309
1306 1310 TODO formalize interface for returned object.
1307 1311 """
1308 1312
1309 1313 def clearcaches():
1310 1314 """Clear caches associated with this collection."""
1311 1315
1312 1316 def rev(node):
1313 1317 """Obtain the revision number for a binary node.
1314 1318
1315 1319 Raises ``error.LookupError`` if the node is not known.
1316 1320 """
1317 1321
1318 1322 class ilocalrepositoryfilestorage(interfaceutil.Interface):
1319 1323 """Local repository sub-interface providing access to tracked file storage.
1320 1324
1321 1325 This interface defines how a repository accesses storage for a single
1322 1326 tracked file path.
1323 1327 """
1324 1328
1325 1329 def file(f):
1326 1330 """Obtain a filelog for a tracked path.
1327 1331
1328 1332 The returned type conforms to the ``ifilestorage`` interface.
1329 1333 """
1330 1334
1331 1335 class ilocalrepositorymain(interfaceutil.Interface):
1332 1336 """Main interface for local repositories.
1333 1337
1334 1338 This currently captures the reality of things - not how things should be.
1335 1339 """
1336 1340
1337 1341 supportedformats = interfaceutil.Attribute(
1338 1342 """Set of requirements that apply to stream clone.
1339 1343
1340 1344 This is actually a class attribute and is shared among all instances.
1341 1345 """)
1342 1346
1343 1347 supported = interfaceutil.Attribute(
1344 1348 """Set of requirements that this repo is capable of opening.""")
1345 1349
1346 1350 requirements = interfaceutil.Attribute(
1347 1351 """Set of requirements this repo uses.""")
1348 1352
1349 1353 features = interfaceutil.Attribute(
1350 1354 """Set of "features" this repository supports.
1351 1355
1352 1356 A "feature" is a loosely-defined term. It can refer to a feature
1353 1357 in the classical sense or can describe an implementation detail
1354 1358 of the repository. For example, a ``readonly`` feature may denote
1355 1359 the repository as read-only. Or a ``revlogfilestore`` feature may
1356 1360 denote that the repository is using revlogs for file storage.
1357 1361
1358 1362 The intent of features is to provide a machine-queryable mechanism
1359 1363 for repo consumers to test for various repository characteristics.
1360 1364
1361 1365 Features are similar to ``requirements``. The main difference is that
1362 1366 requirements are stored on-disk and represent requirements to open the
1363 1367 repository. Features are more run-time capabilities of the repository
1364 1368 and more granular capabilities (which may be derived from requirements).
1365 1369 """)
1366 1370
1367 1371 filtername = interfaceutil.Attribute(
1368 1372 """Name of the repoview that is active on this repo.""")
1369 1373
1370 1374 wvfs = interfaceutil.Attribute(
1371 1375 """VFS used to access the working directory.""")
1372 1376
1373 1377 vfs = interfaceutil.Attribute(
1374 1378 """VFS rooted at the .hg directory.
1375 1379
1376 1380 Used to access repository data not in the store.
1377 1381 """)
1378 1382
1379 1383 svfs = interfaceutil.Attribute(
1380 1384 """VFS rooted at the store.
1381 1385
1382 1386 Used to access repository data in the store. Typically .hg/store.
1383 1387 But can point elsewhere if the store is shared.
1384 1388 """)
1385 1389
1386 1390 root = interfaceutil.Attribute(
1387 1391 """Path to the root of the working directory.""")
1388 1392
1389 1393 path = interfaceutil.Attribute(
1390 1394 """Path to the .hg directory.""")
1391 1395
1392 1396 origroot = interfaceutil.Attribute(
1393 1397 """The filesystem path that was used to construct the repo.""")
1394 1398
1395 1399 auditor = interfaceutil.Attribute(
1396 1400 """A pathauditor for the working directory.
1397 1401
1398 1402 This checks if a path refers to a nested repository.
1399 1403
1400 1404 Operates on the filesystem.
1401 1405 """)
1402 1406
1403 1407 nofsauditor = interfaceutil.Attribute(
1404 1408 """A pathauditor for the working directory.
1405 1409
1406 1410 This is like ``auditor`` except it doesn't do filesystem checks.
1407 1411 """)
1408 1412
1409 1413 baseui = interfaceutil.Attribute(
1410 1414 """Original ui instance passed into constructor.""")
1411 1415
1412 1416 ui = interfaceutil.Attribute(
1413 1417 """Main ui instance for this instance.""")
1414 1418
1415 1419 sharedpath = interfaceutil.Attribute(
1416 1420 """Path to the .hg directory of the repo this repo was shared from.""")
1417 1421
1418 1422 store = interfaceutil.Attribute(
1419 1423 """A store instance.""")
1420 1424
1421 1425 spath = interfaceutil.Attribute(
1422 1426 """Path to the store.""")
1423 1427
1424 1428 sjoin = interfaceutil.Attribute(
1425 1429 """Alias to self.store.join.""")
1426 1430
1427 1431 cachevfs = interfaceutil.Attribute(
1428 1432 """A VFS used to access the cache directory.
1429 1433
1430 1434 Typically .hg/cache.
1431 1435 """)
1432 1436
1433 1437 filteredrevcache = interfaceutil.Attribute(
1434 1438 """Holds sets of revisions to be filtered.""")
1435 1439
1436 1440 names = interfaceutil.Attribute(
1437 1441 """A ``namespaces`` instance.""")
1438 1442
1439 1443 def close():
1440 1444 """Close the handle on this repository."""
1441 1445
1442 1446 def peer():
1443 1447 """Obtain an object conforming to the ``peer`` interface."""
1444 1448
1445 1449 def unfiltered():
1446 1450 """Obtain an unfiltered/raw view of this repo."""
1447 1451
1448 1452 def filtered(name, visibilityexceptions=None):
1449 1453 """Obtain a named view of this repository."""
1450 1454
1451 1455 obsstore = interfaceutil.Attribute(
1452 1456 """A store of obsolescence data.""")
1453 1457
1454 1458 changelog = interfaceutil.Attribute(
1455 1459 """A handle on the changelog revlog.""")
1456 1460
1457 1461 manifestlog = interfaceutil.Attribute(
1458 1462 """An instance conforming to the ``imanifestlog`` interface.
1459 1463
1460 1464 Provides access to manifests for the repository.
1461 1465 """)
1462 1466
1463 1467 dirstate = interfaceutil.Attribute(
1464 1468 """Working directory state.""")
1465 1469
1466 1470 narrowpats = interfaceutil.Attribute(
1467 1471 """Matcher patterns for this repository's narrowspec.""")
1468 1472
1469 1473 def narrowmatch():
1470 1474 """Obtain a matcher for the narrowspec."""
1471 1475
1472 1476 def setnarrowpats(newincludes, newexcludes):
1473 1477 """Define the narrowspec for this repository."""
1474 1478
1475 1479 def __getitem__(changeid):
1476 1480 """Try to resolve a changectx."""
1477 1481
1478 1482 def __contains__(changeid):
1479 1483 """Whether a changeset exists."""
1480 1484
1481 1485 def __nonzero__():
1482 1486 """Always returns True."""
1483 1487 return True
1484 1488
1485 1489 __bool__ = __nonzero__
1486 1490
1487 1491 def __len__():
1488 1492 """Returns the number of changesets in the repo."""
1489 1493
1490 1494 def __iter__():
1491 1495 """Iterate over revisions in the changelog."""
1492 1496
1493 1497 def revs(expr, *args):
1494 1498 """Evaluate a revset.
1495 1499
1496 1500 Emits revisions.
1497 1501 """
1498 1502
1499 1503 def set(expr, *args):
1500 1504 """Evaluate a revset.
1501 1505
1502 1506 Emits changectx instances.
1503 1507 """
1504 1508
1505 1509 def anyrevs(specs, user=False, localalias=None):
1506 1510 """Find revisions matching one of the given revsets."""
1507 1511
1508 1512 def url():
1509 1513 """Returns a string representing the location of this repo."""
1510 1514
1511 1515 def hook(name, throw=False, **args):
1512 1516 """Call a hook."""
1513 1517
1514 1518 def tags():
1515 1519 """Return a mapping of tag to node."""
1516 1520
1517 1521 def tagtype(tagname):
1518 1522 """Return the type of a given tag."""
1519 1523
1520 1524 def tagslist():
1521 1525 """Return a list of tags ordered by revision."""
1522 1526
1523 1527 def nodetags(node):
1524 1528 """Return the tags associated with a node."""
1525 1529
1526 1530 def nodebookmarks(node):
1527 1531 """Return the list of bookmarks pointing to the specified node."""
1528 1532
1529 1533 def branchmap():
1530 1534 """Return a mapping of branch to heads in that branch."""
1531 1535
1532 1536 def revbranchcache():
1533 1537 pass
1534 1538
1535 1539 def branchtip(branchtip, ignoremissing=False):
1536 1540 """Return the tip node for a given branch."""
1537 1541
1538 1542 def lookup(key):
1539 1543 """Resolve the node for a revision."""
1540 1544
1541 1545 def lookupbranch(key):
1542 1546 """Look up the branch name of the given revision or branch name."""
1543 1547
1544 1548 def known(nodes):
1545 1549 """Determine whether a series of nodes is known.
1546 1550
1547 1551 Returns a list of bools.
1548 1552 """
1549 1553
1550 1554 def local():
1551 1555 """Whether the repository is local."""
1552 1556 return True
1553 1557
1554 1558 def publishing():
1555 1559 """Whether the repository is a publishing repository."""
1556 1560
1557 1561 def cancopy():
1558 1562 pass
1559 1563
1560 1564 def shared():
1561 1565 """The type of shared repository or None."""
1562 1566
1563 1567 def wjoin(f, *insidef):
1564 1568 """Calls self.vfs.reljoin(self.root, f, *insidef)"""
1565 1569
1566 1570 def setparents(p1, p2):
1567 1571 """Set the parent nodes of the working directory."""
1568 1572
1569 1573 def filectx(path, changeid=None, fileid=None):
1570 1574 """Obtain a filectx for the given file revision."""
1571 1575
1572 1576 def getcwd():
1573 1577 """Obtain the current working directory from the dirstate."""
1574 1578
1575 1579 def pathto(f, cwd=None):
1576 1580 """Obtain the relative path to a file."""
1577 1581
1578 1582 def adddatafilter(name, fltr):
1579 1583 pass
1580 1584
1581 1585 def wread(filename):
1582 1586 """Read a file from wvfs, using data filters."""
1583 1587
1584 1588 def wwrite(filename, data, flags, backgroundclose=False, **kwargs):
1585 1589 """Write data to a file in the wvfs, using data filters."""
1586 1590
1587 1591 def wwritedata(filename, data):
1588 1592 """Resolve data for writing to the wvfs, using data filters."""
1589 1593
1590 1594 def currenttransaction():
1591 1595 """Obtain the current transaction instance or None."""
1592 1596
1593 1597 def transaction(desc, report=None):
1594 1598 """Open a new transaction to write to the repository."""
1595 1599
1596 1600 def undofiles():
1597 1601 """Returns a list of (vfs, path) for files to undo transactions."""
1598 1602
1599 1603 def recover():
1600 1604 """Roll back an interrupted transaction."""
1601 1605
1602 1606 def rollback(dryrun=False, force=False):
1603 1607 """Undo the last transaction.
1604 1608
1605 1609 DANGEROUS.
1606 1610 """
1607 1611
1608 1612 def updatecaches(tr=None, full=False):
1609 1613 """Warm repo caches."""
1610 1614
1611 1615 def invalidatecaches():
1612 1616 """Invalidate cached data due to the repository mutating."""
1613 1617
1614 1618 def invalidatevolatilesets():
1615 1619 pass
1616 1620
1617 1621 def invalidatedirstate():
1618 1622 """Invalidate the dirstate."""
1619 1623
1620 1624 def invalidate(clearfilecache=False):
1621 1625 pass
1622 1626
1623 1627 def invalidateall():
1624 1628 pass
1625 1629
1626 1630 def lock(wait=True):
1627 1631 """Lock the repository store and return a lock instance."""
1628 1632
1629 1633 def wlock(wait=True):
1630 1634 """Lock the non-store parts of the repository."""
1631 1635
1632 1636 def currentwlock():
1633 1637 """Return the wlock if it's held or None."""
1634 1638
1635 1639 def checkcommitpatterns(wctx, vdirs, match, status, fail):
1636 1640 pass
1637 1641
1638 1642 def commit(text='', user=None, date=None, match=None, force=False,
1639 1643 editor=False, extra=None):
1640 1644 """Add a new revision to the repository."""
1641 1645
1642 1646 def commitctx(ctx, error=False):
1643 1647 """Commit a commitctx instance to the repository."""
1644 1648
1645 1649 def destroying():
1646 1650 """Inform the repository that nodes are about to be destroyed."""
1647 1651
1648 1652 def destroyed():
1649 1653 """Inform the repository that nodes have been destroyed."""
1650 1654
1651 1655 def status(node1='.', node2=None, match=None, ignored=False,
1652 1656 clean=False, unknown=False, listsubrepos=False):
1653 1657 """Convenience method to call repo[x].status()."""
1654 1658
1655 1659 def addpostdsstatus(ps):
1656 1660 pass
1657 1661
1658 1662 def postdsstatus():
1659 1663 pass
1660 1664
1661 1665 def clearpostdsstatus():
1662 1666 pass
1663 1667
1664 1668 def heads(start=None):
1665 1669 """Obtain list of nodes that are DAG heads."""
1666 1670
1667 1671 def branchheads(branch=None, start=None, closed=False):
1668 1672 pass
1669 1673
1670 1674 def branches(nodes):
1671 1675 pass
1672 1676
1673 1677 def between(pairs):
1674 1678 pass
1675 1679
1676 1680 def checkpush(pushop):
1677 1681 pass
1678 1682
1679 1683 prepushoutgoinghooks = interfaceutil.Attribute(
1680 1684 """util.hooks instance.""")
1681 1685
1682 1686 def pushkey(namespace, key, old, new):
1683 1687 pass
1684 1688
1685 1689 def listkeys(namespace):
1686 1690 pass
1687 1691
1688 1692 def debugwireargs(one, two, three=None, four=None, five=None):
1689 1693 pass
1690 1694
1691 1695 def savecommitmessage(text):
1692 1696 pass
1693 1697
1694 1698 class completelocalrepository(ilocalrepositorymain,
1695 1699 ilocalrepositoryfilestorage):
1696 1700 """Complete interface for a local repository."""
1697 1701
1698 1702 class iwireprotocolcommandcacher(interfaceutil.Interface):
1699 1703 """Represents a caching backend for wire protocol commands.
1700 1704
1701 1705 Wire protocol version 2 supports transparent caching of many commands.
1702 1706 To leverage this caching, servers can activate objects that cache
1703 1707 command responses. Objects handle both cache writing and reading.
1704 1708 This interface defines how that response caching mechanism works.
1705 1709
1706 1710 Wire protocol version 2 commands emit a series of objects that are
1707 1711 serialized and sent to the client. The caching layer exists between
1708 1712 the invocation of the command function and the sending of its output
1709 1713 objects to an output layer.
1710 1714
1711 1715 Instances of this interface represent a binding to a cache that
1712 1716 can serve a response (in place of calling a command function) and/or
1713 1717 write responses to a cache for subsequent use.
1714 1718
1715 1719 When a command request arrives, the following happens with regards
1716 1720 to this interface:
1717 1721
1718 1722 1. The server determines whether the command request is cacheable.
1719 1723 2. If it is, an instance of this interface is spawned.
1720 1724 3. The cacher is activated in a context manager (``__enter__`` is called).
1721 1725 4. A cache *key* for that request is derived. This will call the
1722 1726 instance's ``adjustcachekeystate()`` method so the derivation
1723 1727 can be influenced.
1724 1728 5. The cacher is informed of the derived cache key via a call to
1725 1729 ``setcachekey()``.
1726 1730 6. The cacher's ``lookup()`` method is called to test for presence of
1727 1731 the derived key in the cache.
1728 1732 7. If ``lookup()`` returns a hit, that cached result is used in place
1729 1733 of invoking the command function. ``__exit__`` is called and the instance
1730 1734 is discarded.
1731 1735 8. The command function is invoked.
1732 1736 9. ``onobject()`` is called for each object emitted by the command
1733 1737 function.
1734 1738 10. After the final object is seen, ``onfinished()`` is called.
1735 1739 11. ``__exit__`` is called to signal the end of use of the instance.
1736 1740
1737 1741 Cache *key* derivation can be influenced by the instance.
1738 1742
1739 1743 Cache keys are initially derived by a deterministic representation of
1740 1744 the command request. This includes the command name, arguments, protocol
1741 1745 version, etc. This initial key derivation is performed by CBOR-encoding a
1742 1746 data structure and feeding that output into a hasher.
1743 1747
1744 1748 Instances of this interface can influence this initial key derivation
1745 1749 via ``adjustcachekeystate()``.
1746 1750
1747 1751 The instance is informed of the derived cache key via a call to
1748 1752 ``setcachekey()``. The instance must store the key locally so it can
1749 1753 be consulted on subsequent operations that may require it.
1750 1754
1751 1755 When constructed, the instance has access to a callable that can be used
1752 1756 for encoding response objects. This callable receives as its single
1753 1757 argument an object emitted by a command function. It returns an iterable
1754 1758 of bytes chunks representing the encoded object. Unless the cacher is
1755 1759 caching native Python objects in memory or has a way of reconstructing
1756 1760 the original Python objects, implementations typically call this function
1757 1761 to produce bytes from the output objects and then store those bytes in
1758 1762 the cache. When it comes time to re-emit those bytes, they are wrapped
1759 1763 in a ``wireprototypes.encodedresponse`` instance to tell the output
1760 1764 layer that they are pre-encoded.
1761 1765
1762 1766 When receiving the objects emitted by the command function, instances
1763 1767 can choose what to do with those objects. The simplest thing to do is
1764 1768 re-emit the original objects. They will be forwarded to the output
1765 1769 layer and will be processed as if the cacher did not exist.
1766 1770
1767 1771 Implementations could also choose to not emit objects - instead locally
1768 1772 buffering objects or their encoded representation. They could then emit
1769 1773 a single "coalesced" object when ``onfinished()`` is called. In
1770 1774 this way, the implementation would function as a filtering layer of
1771 1775 sorts.
1772 1776
1773 1777 When caching objects, typically the encoded form of the object will
1774 1778 be stored. Keep in mind that if the original object is forwarded to
1775 1779 the output layer, it will need to be encoded there as well. For large
1776 1780 output, this redundant encoding could add overhead. Implementations
1777 1781 could wrap the encoded object data in ``wireprototypes.encodedresponse``
1778 1782 instances to avoid this overhead.
1779 1783 """
1780 1784 def __enter__():
1781 1785 """Marks the instance as active.
1782 1786
1783 1787 Should return self.
1784 1788 """
1785 1789
1786 1790 def __exit__(exctype, excvalue, exctb):
1787 1791 """Called when cacher is no longer used.
1788 1792
1789 1793 This can be used by implementations to perform cleanup actions (e.g.
1790 1794 disconnecting network sockets, aborting a partially cached response.
1791 1795 """
1792 1796
1793 1797 def adjustcachekeystate(state):
1794 1798 """Influences cache key derivation by adjusting state to derive key.
1795 1799
1796 1800 A dict defining the state used to derive the cache key is passed.
1797 1801
1798 1802 Implementations can modify this dict to record additional state that
1799 1803 is wanted to influence key derivation.
1800 1804
1801 1805 Implementations are *highly* encouraged to not modify or delete
1802 1806 existing keys.
1803 1807 """
1804 1808
1805 1809 def setcachekey(key):
1806 1810 """Record the derived cache key for this request.
1807 1811
1808 1812 Instances may mutate the key for internal usage, as desired. e.g.
1809 1813 instances may wish to prepend the repo name, introduce path
1810 1814 components for filesystem or URL addressing, etc. Behavior is up to
1811 1815 the cache.
1812 1816
1813 1817 Returns a bool indicating if the request is cacheable by this
1814 1818 instance.
1815 1819 """
1816 1820
1817 1821 def lookup():
1818 1822 """Attempt to resolve an entry in the cache.
1819 1823
1820 1824 The instance is instructed to look for the cache key that it was
1821 1825 informed about via the call to ``setcachekey()``.
1822 1826
1823 1827 If there's no cache hit or the cacher doesn't wish to use the cached
1824 1828 entry, ``None`` should be returned.
1825 1829
1826 1830 Else, a dict defining the cached result should be returned. The
1827 1831 dict may have the following keys:
1828 1832
1829 1833 objs
1830 1834 An iterable of objects that should be sent to the client. That
1831 1835 iterable of objects is expected to be what the command function
1832 1836 would return if invoked or an equivalent representation thereof.
1833 1837 """
1834 1838
1835 1839 def onobject(obj):
1836 1840 """Called when a new object is emitted from the command function.
1837 1841
1838 1842 Receives as its argument the object that was emitted from the
1839 1843 command function.
1840 1844
1841 1845 This method returns an iterator of objects to forward to the output
1842 1846 layer. The easiest implementation is a generator that just
1843 1847 ``yield obj``.
1844 1848 """
1845 1849
1846 1850 def onfinished():
1847 1851 """Called after all objects have been emitted from the command function.
1848 1852
1849 1853 Implementations should return an iterator of objects to forward to
1850 1854 the output layer.
1851 1855
1852 1856 This method can be a generator.
1853 1857 """
@@ -1,2544 +1,2549
1 1 # revlog.py - storage back-end for mercurial
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 """Storage back-end for Mercurial.
9 9
10 10 This provides efficient delta storage with O(1) retrieve and append
11 11 and O(changes) merge between branches.
12 12 """
13 13
14 14 from __future__ import absolute_import
15 15
16 16 import collections
17 17 import contextlib
18 18 import errno
19 19 import os
20 20 import struct
21 21 import zlib
22 22
23 23 # import stuff from node for others to import from revlog
24 24 from .node import (
25 25 bin,
26 26 hex,
27 27 nullhex,
28 28 nullid,
29 29 nullrev,
30 30 short,
31 31 wdirfilenodeids,
32 32 wdirhex,
33 33 wdirid,
34 34 wdirrev,
35 35 )
36 36 from .i18n import _
37 37 from .revlogutils.constants import (
38 38 FLAG_GENERALDELTA,
39 39 FLAG_INLINE_DATA,
40 40 REVIDX_DEFAULT_FLAGS,
41 41 REVIDX_ELLIPSIS,
42 42 REVIDX_EXTSTORED,
43 43 REVIDX_FLAGS_ORDER,
44 44 REVIDX_ISCENSORED,
45 45 REVIDX_KNOWN_FLAGS,
46 46 REVIDX_RAWTEXT_CHANGING_FLAGS,
47 47 REVLOGV0,
48 48 REVLOGV1,
49 49 REVLOGV1_FLAGS,
50 50 REVLOGV2,
51 51 REVLOGV2_FLAGS,
52 52 REVLOG_DEFAULT_FLAGS,
53 53 REVLOG_DEFAULT_FORMAT,
54 54 REVLOG_DEFAULT_VERSION,
55 55 )
56 56 from .thirdparty import (
57 57 attr,
58 58 )
59 59 from . import (
60 60 ancestor,
61 61 dagop,
62 62 error,
63 63 mdiff,
64 64 policy,
65 65 pycompat,
66 66 repository,
67 67 templatefilters,
68 68 util,
69 69 )
70 70 from .revlogutils import (
71 71 deltas as deltautil,
72 72 )
73 73 from .utils import (
74 74 interfaceutil,
75 75 storageutil,
76 76 stringutil,
77 77 )
78 78
79 79 # blanked usage of all the name to prevent pyflakes constraints
80 80 # We need these name available in the module for extensions.
81 81 REVLOGV0
82 82 REVLOGV1
83 83 REVLOGV2
84 84 FLAG_INLINE_DATA
85 85 FLAG_GENERALDELTA
86 86 REVLOG_DEFAULT_FLAGS
87 87 REVLOG_DEFAULT_FORMAT
88 88 REVLOG_DEFAULT_VERSION
89 89 REVLOGV1_FLAGS
90 90 REVLOGV2_FLAGS
91 91 REVIDX_ISCENSORED
92 92 REVIDX_ELLIPSIS
93 93 REVIDX_EXTSTORED
94 94 REVIDX_DEFAULT_FLAGS
95 95 REVIDX_FLAGS_ORDER
96 96 REVIDX_KNOWN_FLAGS
97 97 REVIDX_RAWTEXT_CHANGING_FLAGS
98 98
99 99 parsers = policy.importmod(r'parsers')
100 100
101 101 # Aliased for performance.
102 102 _zlibdecompress = zlib.decompress
103 103
104 104 # max size of revlog with inline data
105 105 _maxinline = 131072
106 106 _chunksize = 1048576
107 107
108 108 # Store flag processors (cf. 'addflagprocessor()' to register)
109 109 _flagprocessors = {
110 110 REVIDX_ISCENSORED: None,
111 111 }
112 112
113 113 # Flag processors for REVIDX_ELLIPSIS.
114 114 def ellipsisreadprocessor(rl, text):
115 115 return text, False
116 116
117 117 def ellipsiswriteprocessor(rl, text):
118 118 return text, False
119 119
120 120 def ellipsisrawprocessor(rl, text):
121 121 return False
122 122
123 123 ellipsisprocessor = (
124 124 ellipsisreadprocessor,
125 125 ellipsiswriteprocessor,
126 126 ellipsisrawprocessor,
127 127 )
128 128
129 129 def addflagprocessor(flag, processor):
130 130 """Register a flag processor on a revision data flag.
131 131
132 132 Invariant:
133 133 - Flags need to be defined in REVIDX_KNOWN_FLAGS and REVIDX_FLAGS_ORDER,
134 134 and REVIDX_RAWTEXT_CHANGING_FLAGS if they can alter rawtext.
135 135 - Only one flag processor can be registered on a specific flag.
136 136 - flagprocessors must be 3-tuples of functions (read, write, raw) with the
137 137 following signatures:
138 138 - (read) f(self, rawtext) -> text, bool
139 139 - (write) f(self, text) -> rawtext, bool
140 140 - (raw) f(self, rawtext) -> bool
141 141 "text" is presented to the user. "rawtext" is stored in revlog data, not
142 142 directly visible to the user.
143 143 The boolean returned by these transforms is used to determine whether
144 144 the returned text can be used for hash integrity checking. For example,
145 145 if "write" returns False, then "text" is used to generate hash. If
146 146 "write" returns True, that basically means "rawtext" returned by "write"
147 147 should be used to generate hash. Usually, "write" and "read" return
148 148 different booleans. And "raw" returns a same boolean as "write".
149 149
150 150 Note: The 'raw' transform is used for changegroup generation and in some
151 151 debug commands. In this case the transform only indicates whether the
152 152 contents can be used for hash integrity checks.
153 153 """
154 154 _insertflagprocessor(flag, processor, _flagprocessors)
155 155
156 156 def _insertflagprocessor(flag, processor, flagprocessors):
157 157 if not flag & REVIDX_KNOWN_FLAGS:
158 158 msg = _("cannot register processor on unknown flag '%#x'.") % (flag)
159 159 raise error.ProgrammingError(msg)
160 160 if flag not in REVIDX_FLAGS_ORDER:
161 161 msg = _("flag '%#x' undefined in REVIDX_FLAGS_ORDER.") % (flag)
162 162 raise error.ProgrammingError(msg)
163 163 if flag in flagprocessors:
164 164 msg = _("cannot register multiple processors on flag '%#x'.") % (flag)
165 165 raise error.Abort(msg)
166 166 flagprocessors[flag] = processor
167 167
168 168 def getoffset(q):
169 169 return int(q >> 16)
170 170
171 171 def gettype(q):
172 172 return int(q & 0xFFFF)
173 173
174 174 def offset_type(offset, type):
175 175 if (type & ~REVIDX_KNOWN_FLAGS) != 0:
176 176 raise ValueError('unknown revlog index flags')
177 177 return int(int(offset) << 16 | type)
178 178
179 179 @attr.s(slots=True, frozen=True)
180 180 class _revisioninfo(object):
181 181 """Information about a revision that allows building its fulltext
182 182 node: expected hash of the revision
183 183 p1, p2: parent revs of the revision
184 184 btext: built text cache consisting of a one-element list
185 185 cachedelta: (baserev, uncompressed_delta) or None
186 186 flags: flags associated to the revision storage
187 187
188 188 One of btext[0] or cachedelta must be set.
189 189 """
190 190 node = attr.ib()
191 191 p1 = attr.ib()
192 192 p2 = attr.ib()
193 193 btext = attr.ib()
194 194 textlen = attr.ib()
195 195 cachedelta = attr.ib()
196 196 flags = attr.ib()
197 197
198 198 @interfaceutil.implementer(repository.irevisiondelta)
199 199 @attr.s(slots=True)
200 200 class revlogrevisiondelta(object):
201 201 node = attr.ib()
202 202 p1node = attr.ib()
203 203 p2node = attr.ib()
204 204 basenode = attr.ib()
205 205 flags = attr.ib()
206 206 baserevisionsize = attr.ib()
207 207 revision = attr.ib()
208 208 delta = attr.ib()
209 209 linknode = attr.ib(default=None)
210 210
211 211 @interfaceutil.implementer(repository.iverifyproblem)
212 212 @attr.s(frozen=True)
213 213 class revlogproblem(object):
214 214 warning = attr.ib(default=None)
215 215 error = attr.ib(default=None)
216 216 node = attr.ib(default=None)
217 217
218 218 # index v0:
219 219 # 4 bytes: offset
220 220 # 4 bytes: compressed length
221 221 # 4 bytes: base rev
222 222 # 4 bytes: link rev
223 223 # 20 bytes: parent 1 nodeid
224 224 # 20 bytes: parent 2 nodeid
225 225 # 20 bytes: nodeid
226 226 indexformatv0 = struct.Struct(">4l20s20s20s")
227 227 indexformatv0_pack = indexformatv0.pack
228 228 indexformatv0_unpack = indexformatv0.unpack
229 229
230 230 class revlogoldindex(list):
231 231 def __getitem__(self, i):
232 232 if i == -1:
233 233 return (0, 0, 0, -1, -1, -1, -1, nullid)
234 234 return list.__getitem__(self, i)
235 235
236 236 class revlogoldio(object):
237 237 def __init__(self):
238 238 self.size = indexformatv0.size
239 239
240 240 def parseindex(self, data, inline):
241 241 s = self.size
242 242 index = []
243 243 nodemap = {nullid: nullrev}
244 244 n = off = 0
245 245 l = len(data)
246 246 while off + s <= l:
247 247 cur = data[off:off + s]
248 248 off += s
249 249 e = indexformatv0_unpack(cur)
250 250 # transform to revlogv1 format
251 251 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
252 252 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
253 253 index.append(e2)
254 254 nodemap[e[6]] = n
255 255 n += 1
256 256
257 257 return revlogoldindex(index), nodemap, None
258 258
259 259 def packentry(self, entry, node, version, rev):
260 260 if gettype(entry[0]):
261 261 raise error.RevlogError(_('index entry flags need revlog '
262 262 'version 1'))
263 263 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
264 264 node(entry[5]), node(entry[6]), entry[7])
265 265 return indexformatv0_pack(*e2)
266 266
267 267 # index ng:
268 268 # 6 bytes: offset
269 269 # 2 bytes: flags
270 270 # 4 bytes: compressed length
271 271 # 4 bytes: uncompressed length
272 272 # 4 bytes: base rev
273 273 # 4 bytes: link rev
274 274 # 4 bytes: parent 1 rev
275 275 # 4 bytes: parent 2 rev
276 276 # 32 bytes: nodeid
277 277 indexformatng = struct.Struct(">Qiiiiii20s12x")
278 278 indexformatng_pack = indexformatng.pack
279 279 versionformat = struct.Struct(">I")
280 280 versionformat_pack = versionformat.pack
281 281 versionformat_unpack = versionformat.unpack
282 282
283 283 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
284 284 # signed integer)
285 285 _maxentrysize = 0x7fffffff
286 286
287 287 class revlogio(object):
288 288 def __init__(self):
289 289 self.size = indexformatng.size
290 290
291 291 def parseindex(self, data, inline):
292 292 # call the C implementation to parse the index data
293 293 index, cache = parsers.parse_index2(data, inline)
294 294 return index, getattr(index, 'nodemap', None), cache
295 295
296 296 def packentry(self, entry, node, version, rev):
297 297 p = indexformatng_pack(*entry)
298 298 if rev == 0:
299 299 p = versionformat_pack(version) + p[4:]
300 300 return p
301 301
302 302 class revlog(object):
303 303 """
304 304 the underlying revision storage object
305 305
306 306 A revlog consists of two parts, an index and the revision data.
307 307
308 308 The index is a file with a fixed record size containing
309 309 information on each revision, including its nodeid (hash), the
310 310 nodeids of its parents, the position and offset of its data within
311 311 the data file, and the revision it's based on. Finally, each entry
312 312 contains a linkrev entry that can serve as a pointer to external
313 313 data.
314 314
315 315 The revision data itself is a linear collection of data chunks.
316 316 Each chunk represents a revision and is usually represented as a
317 317 delta against the previous chunk. To bound lookup time, runs of
318 318 deltas are limited to about 2 times the length of the original
319 319 version data. This makes retrieval of a version proportional to
320 320 its size, or O(1) relative to the number of revisions.
321 321
322 322 Both pieces of the revlog are written to in an append-only
323 323 fashion, which means we never need to rewrite a file to insert or
324 324 remove data, and can use some simple techniques to avoid the need
325 325 for locking while reading.
326 326
327 327 If checkambig, indexfile is opened with checkambig=True at
328 328 writing, to avoid file stat ambiguity.
329 329
330 330 If mmaplargeindex is True, and an mmapindexthreshold is set, the
331 331 index will be mmapped rather than read if it is larger than the
332 332 configured threshold.
333 333
334 334 If censorable is True, the revlog can have censored revisions.
335 335 """
336 336 def __init__(self, opener, indexfile, datafile=None, checkambig=False,
337 337 mmaplargeindex=False, censorable=False):
338 338 """
339 339 create a revlog object
340 340
341 341 opener is a function that abstracts the file opening operation
342 342 and can be used to implement COW semantics or the like.
343 343 """
344 344 self.indexfile = indexfile
345 345 self.datafile = datafile or (indexfile[:-2] + ".d")
346 346 self.opener = opener
347 347 # When True, indexfile is opened with checkambig=True at writing, to
348 348 # avoid file stat ambiguity.
349 349 self._checkambig = checkambig
350 350 self._censorable = censorable
351 351 # 3-tuple of (node, rev, text) for a raw revision.
352 352 self._revisioncache = None
353 353 # Maps rev to chain base rev.
354 354 self._chainbasecache = util.lrucachedict(100)
355 355 # 2-tuple of (offset, data) of raw data from the revlog at an offset.
356 356 self._chunkcache = (0, '')
357 357 # How much data to read and cache into the raw revlog data cache.
358 358 self._chunkcachesize = 65536
359 359 self._maxchainlen = None
360 360 self._deltabothparents = True
361 361 self.index = []
362 362 # Mapping of partial identifiers to full nodes.
363 363 self._pcache = {}
364 364 # Mapping of revision integer to full node.
365 365 self._nodecache = {nullid: nullrev}
366 366 self._nodepos = None
367 367 self._compengine = 'zlib'
368 368 self._maxdeltachainspan = -1
369 369 self._withsparseread = False
370 370 self._sparserevlog = False
371 371 self._srdensitythreshold = 0.50
372 372 self._srmingapsize = 262144
373 373
374 374 # Make copy of flag processors so each revlog instance can support
375 375 # custom flags.
376 376 self._flagprocessors = dict(_flagprocessors)
377 377
378 378 mmapindexthreshold = None
379 379 v = REVLOG_DEFAULT_VERSION
380 380 opts = getattr(opener, 'options', None)
381 381 if opts is not None:
382 382 if 'revlogv2' in opts:
383 383 # version 2 revlogs always use generaldelta.
384 384 v = REVLOGV2 | FLAG_GENERALDELTA | FLAG_INLINE_DATA
385 385 elif 'revlogv1' in opts:
386 386 if 'generaldelta' in opts:
387 387 v |= FLAG_GENERALDELTA
388 388 else:
389 389 v = 0
390 390 if 'chunkcachesize' in opts:
391 391 self._chunkcachesize = opts['chunkcachesize']
392 392 if 'maxchainlen' in opts:
393 393 self._maxchainlen = opts['maxchainlen']
394 394 if 'deltabothparents' in opts:
395 395 self._deltabothparents = opts['deltabothparents']
396 396 self._lazydeltabase = bool(opts.get('lazydeltabase', False))
397 397 if 'compengine' in opts:
398 398 self._compengine = opts['compengine']
399 399 if 'maxdeltachainspan' in opts:
400 400 self._maxdeltachainspan = opts['maxdeltachainspan']
401 401 if mmaplargeindex and 'mmapindexthreshold' in opts:
402 402 mmapindexthreshold = opts['mmapindexthreshold']
403 403 self._sparserevlog = bool(opts.get('sparse-revlog', False))
404 404 withsparseread = bool(opts.get('with-sparse-read', False))
405 405 # sparse-revlog forces sparse-read
406 406 self._withsparseread = self._sparserevlog or withsparseread
407 407 if 'sparse-read-density-threshold' in opts:
408 408 self._srdensitythreshold = opts['sparse-read-density-threshold']
409 409 if 'sparse-read-min-gap-size' in opts:
410 410 self._srmingapsize = opts['sparse-read-min-gap-size']
411 411 if opts.get('enableellipsis'):
412 412 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
413 413
414 414 # revlog v0 doesn't have flag processors
415 415 for flag, processor in opts.get(b'flagprocessors', {}).iteritems():
416 416 _insertflagprocessor(flag, processor, self._flagprocessors)
417 417
418 418 if self._chunkcachesize <= 0:
419 419 raise error.RevlogError(_('revlog chunk cache size %r is not '
420 420 'greater than 0') % self._chunkcachesize)
421 421 elif self._chunkcachesize & (self._chunkcachesize - 1):
422 422 raise error.RevlogError(_('revlog chunk cache size %r is not a '
423 423 'power of 2') % self._chunkcachesize)
424 424
425 425 self._loadindex(v, mmapindexthreshold)
426 426
427 427 def _loadindex(self, v, mmapindexthreshold):
428 428 indexdata = ''
429 429 self._initempty = True
430 430 try:
431 431 with self._indexfp() as f:
432 432 if (mmapindexthreshold is not None and
433 433 self.opener.fstat(f).st_size >= mmapindexthreshold):
434 434 indexdata = util.buffer(util.mmapread(f))
435 435 else:
436 436 indexdata = f.read()
437 437 if len(indexdata) > 0:
438 438 v = versionformat_unpack(indexdata[:4])[0]
439 439 self._initempty = False
440 440 except IOError as inst:
441 441 if inst.errno != errno.ENOENT:
442 442 raise
443 443
444 444 self.version = v
445 445 self._inline = v & FLAG_INLINE_DATA
446 446 self._generaldelta = v & FLAG_GENERALDELTA
447 447 flags = v & ~0xFFFF
448 448 fmt = v & 0xFFFF
449 449 if fmt == REVLOGV0:
450 450 if flags:
451 451 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
452 452 'revlog %s') %
453 453 (flags >> 16, fmt, self.indexfile))
454 454 elif fmt == REVLOGV1:
455 455 if flags & ~REVLOGV1_FLAGS:
456 456 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
457 457 'revlog %s') %
458 458 (flags >> 16, fmt, self.indexfile))
459 459 elif fmt == REVLOGV2:
460 460 if flags & ~REVLOGV2_FLAGS:
461 461 raise error.RevlogError(_('unknown flags (%#04x) in version %d '
462 462 'revlog %s') %
463 463 (flags >> 16, fmt, self.indexfile))
464 464 else:
465 465 raise error.RevlogError(_('unknown version (%d) in revlog %s') %
466 466 (fmt, self.indexfile))
467 467
468 468 self._storedeltachains = True
469 469
470 470 self._io = revlogio()
471 471 if self.version == REVLOGV0:
472 472 self._io = revlogoldio()
473 473 try:
474 474 d = self._io.parseindex(indexdata, self._inline)
475 475 except (ValueError, IndexError):
476 476 raise error.RevlogError(_("index %s is corrupted") %
477 477 self.indexfile)
478 478 self.index, nodemap, self._chunkcache = d
479 479 if nodemap is not None:
480 480 self.nodemap = self._nodecache = nodemap
481 481 if not self._chunkcache:
482 482 self._chunkclear()
483 483 # revnum -> (chain-length, sum-delta-length)
484 484 self._chaininfocache = {}
485 485 # revlog header -> revlog compressor
486 486 self._decompressors = {}
487 487
488 488 @util.propertycache
489 489 def _compressor(self):
490 490 return util.compengines[self._compengine].revlogcompressor()
491 491
492 492 def _indexfp(self, mode='r'):
493 493 """file object for the revlog's index file"""
494 494 args = {r'mode': mode}
495 495 if mode != 'r':
496 496 args[r'checkambig'] = self._checkambig
497 497 if mode == 'w':
498 498 args[r'atomictemp'] = True
499 499 return self.opener(self.indexfile, **args)
500 500
501 501 def _datafp(self, mode='r'):
502 502 """file object for the revlog's data file"""
503 503 return self.opener(self.datafile, mode=mode)
504 504
505 505 @contextlib.contextmanager
506 506 def _datareadfp(self, existingfp=None):
507 507 """file object suitable to read data"""
508 508 if existingfp is not None:
509 509 yield existingfp
510 510 else:
511 511 if self._inline:
512 512 func = self._indexfp
513 513 else:
514 514 func = self._datafp
515 515 with func() as fp:
516 516 yield fp
517 517
518 518 def tip(self):
519 519 return self.node(len(self.index) - 1)
520 520 def __contains__(self, rev):
521 521 return 0 <= rev < len(self)
522 522 def __len__(self):
523 523 return len(self.index)
524 524 def __iter__(self):
525 525 return iter(pycompat.xrange(len(self)))
526 526 def revs(self, start=0, stop=None):
527 527 """iterate over all rev in this revlog (from start to stop)"""
528 528 return storageutil.iterrevs(len(self), start=start, stop=stop)
529 529
530 530 @util.propertycache
531 531 def nodemap(self):
532 532 if self.index:
533 533 # populate mapping down to the initial node
534 534 node0 = self.index[0][7] # get around changelog filtering
535 535 self.rev(node0)
536 536 return self._nodecache
537 537
538 538 def hasnode(self, node):
539 539 try:
540 540 self.rev(node)
541 541 return True
542 542 except KeyError:
543 543 return False
544 544
545 545 def candelta(self, baserev, rev):
546 546 """whether two revisions (baserev, rev) can be delta-ed or not"""
547 547 # Disable delta if either rev requires a content-changing flag
548 548 # processor (ex. LFS). This is because such flag processor can alter
549 549 # the rawtext content that the delta will be based on, and two clients
550 550 # could have a same revlog node with different flags (i.e. different
551 551 # rawtext contents) and the delta could be incompatible.
552 552 if ((self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS)
553 553 or (self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS)):
554 554 return False
555 555 return True
556 556
557 557 def clearcaches(self):
558 558 self._revisioncache = None
559 559 self._chainbasecache.clear()
560 560 self._chunkcache = (0, '')
561 561 self._pcache = {}
562 562
563 563 try:
564 564 self._nodecache.clearcaches()
565 565 except AttributeError:
566 566 self._nodecache = {nullid: nullrev}
567 567 self._nodepos = None
568 568
569 569 def rev(self, node):
570 570 try:
571 571 return self._nodecache[node]
572 572 except TypeError:
573 573 raise
574 574 except error.RevlogError:
575 575 # parsers.c radix tree lookup failed
576 576 if node == wdirid or node in wdirfilenodeids:
577 577 raise error.WdirUnsupported
578 578 raise error.LookupError(node, self.indexfile, _('no node'))
579 579 except KeyError:
580 580 # pure python cache lookup failed
581 581 n = self._nodecache
582 582 i = self.index
583 583 p = self._nodepos
584 584 if p is None:
585 585 p = len(i) - 1
586 586 else:
587 587 assert p < len(i)
588 588 for r in pycompat.xrange(p, -1, -1):
589 589 v = i[r][7]
590 590 n[v] = r
591 591 if v == node:
592 592 self._nodepos = r - 1
593 593 return r
594 594 if node == wdirid or node in wdirfilenodeids:
595 595 raise error.WdirUnsupported
596 596 raise error.LookupError(node, self.indexfile, _('no node'))
597 597
598 598 # Accessors for index entries.
599 599
600 600 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
601 601 # are flags.
602 602 def start(self, rev):
603 603 return int(self.index[rev][0] >> 16)
604 604
605 605 def flags(self, rev):
606 606 return self.index[rev][0] & 0xFFFF
607 607
608 608 def length(self, rev):
609 609 return self.index[rev][1]
610 610
611 611 def rawsize(self, rev):
612 612 """return the length of the uncompressed text for a given revision"""
613 613 l = self.index[rev][2]
614 614 if l >= 0:
615 615 return l
616 616
617 617 t = self.revision(rev, raw=True)
618 618 return len(t)
619 619
620 620 def size(self, rev):
621 621 """length of non-raw text (processed by a "read" flag processor)"""
622 622 # fast path: if no "read" flag processor could change the content,
623 623 # size is rawsize. note: ELLIPSIS is known to not change the content.
624 624 flags = self.flags(rev)
625 625 if flags & (REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
626 626 return self.rawsize(rev)
627 627
628 628 return len(self.revision(rev, raw=False))
629 629
630 630 def chainbase(self, rev):
631 631 base = self._chainbasecache.get(rev)
632 632 if base is not None:
633 633 return base
634 634
635 635 index = self.index
636 636 iterrev = rev
637 637 base = index[iterrev][3]
638 638 while base != iterrev:
639 639 iterrev = base
640 640 base = index[iterrev][3]
641 641
642 642 self._chainbasecache[rev] = base
643 643 return base
644 644
645 645 def linkrev(self, rev):
646 646 return self.index[rev][4]
647 647
648 648 def parentrevs(self, rev):
649 649 try:
650 650 entry = self.index[rev]
651 651 except IndexError:
652 652 if rev == wdirrev:
653 653 raise error.WdirUnsupported
654 654 raise
655 655
656 656 return entry[5], entry[6]
657 657
658 658 # fast parentrevs(rev) where rev isn't filtered
659 659 _uncheckedparentrevs = parentrevs
660 660
661 661 def node(self, rev):
662 662 try:
663 663 return self.index[rev][7]
664 664 except IndexError:
665 665 if rev == wdirrev:
666 666 raise error.WdirUnsupported
667 667 raise
668 668
669 669 # Derived from index values.
670 670
671 671 def end(self, rev):
672 672 return self.start(rev) + self.length(rev)
673 673
674 674 def parents(self, node):
675 675 i = self.index
676 676 d = i[self.rev(node)]
677 677 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
678 678
679 679 def chainlen(self, rev):
680 680 return self._chaininfo(rev)[0]
681 681
682 682 def _chaininfo(self, rev):
683 683 chaininfocache = self._chaininfocache
684 684 if rev in chaininfocache:
685 685 return chaininfocache[rev]
686 686 index = self.index
687 687 generaldelta = self._generaldelta
688 688 iterrev = rev
689 689 e = index[iterrev]
690 690 clen = 0
691 691 compresseddeltalen = 0
692 692 while iterrev != e[3]:
693 693 clen += 1
694 694 compresseddeltalen += e[1]
695 695 if generaldelta:
696 696 iterrev = e[3]
697 697 else:
698 698 iterrev -= 1
699 699 if iterrev in chaininfocache:
700 700 t = chaininfocache[iterrev]
701 701 clen += t[0]
702 702 compresseddeltalen += t[1]
703 703 break
704 704 e = index[iterrev]
705 705 else:
706 706 # Add text length of base since decompressing that also takes
707 707 # work. For cache hits the length is already included.
708 708 compresseddeltalen += e[1]
709 709 r = (clen, compresseddeltalen)
710 710 chaininfocache[rev] = r
711 711 return r
712 712
713 713 def _deltachain(self, rev, stoprev=None):
714 714 """Obtain the delta chain for a revision.
715 715
716 716 ``stoprev`` specifies a revision to stop at. If not specified, we
717 717 stop at the base of the chain.
718 718
719 719 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
720 720 revs in ascending order and ``stopped`` is a bool indicating whether
721 721 ``stoprev`` was hit.
722 722 """
723 723 # Try C implementation.
724 724 try:
725 725 return self.index.deltachain(rev, stoprev, self._generaldelta)
726 726 except AttributeError:
727 727 pass
728 728
729 729 chain = []
730 730
731 731 # Alias to prevent attribute lookup in tight loop.
732 732 index = self.index
733 733 generaldelta = self._generaldelta
734 734
735 735 iterrev = rev
736 736 e = index[iterrev]
737 737 while iterrev != e[3] and iterrev != stoprev:
738 738 chain.append(iterrev)
739 739 if generaldelta:
740 740 iterrev = e[3]
741 741 else:
742 742 iterrev -= 1
743 743 e = index[iterrev]
744 744
745 745 if iterrev == stoprev:
746 746 stopped = True
747 747 else:
748 748 chain.append(iterrev)
749 749 stopped = False
750 750
751 751 chain.reverse()
752 752 return chain, stopped
753 753
754 754 def ancestors(self, revs, stoprev=0, inclusive=False):
755 755 """Generate the ancestors of 'revs' in reverse topological order.
756 756 Does not generate revs lower than stoprev.
757 757
758 758 See the documentation for ancestor.lazyancestors for more details."""
759 759
760 760 # first, make sure start revisions aren't filtered
761 761 revs = list(revs)
762 762 checkrev = self.node
763 763 for r in revs:
764 764 checkrev(r)
765 765 # and we're sure ancestors aren't filtered as well
766 766 if util.safehasattr(parsers, 'rustlazyancestors'):
767 767 return ancestor.rustlazyancestors(
768 768 self.index, revs,
769 769 stoprev=stoprev, inclusive=inclusive)
770 770 return ancestor.lazyancestors(self._uncheckedparentrevs, revs,
771 771 stoprev=stoprev, inclusive=inclusive)
772 772
773 773 def descendants(self, revs):
774 774 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
775 775
776 776 def findcommonmissing(self, common=None, heads=None):
777 777 """Return a tuple of the ancestors of common and the ancestors of heads
778 778 that are not ancestors of common. In revset terminology, we return the
779 779 tuple:
780 780
781 781 ::common, (::heads) - (::common)
782 782
783 783 The list is sorted by revision number, meaning it is
784 784 topologically sorted.
785 785
786 786 'heads' and 'common' are both lists of node IDs. If heads is
787 787 not supplied, uses all of the revlog's heads. If common is not
788 788 supplied, uses nullid."""
789 789 if common is None:
790 790 common = [nullid]
791 791 if heads is None:
792 792 heads = self.heads()
793 793
794 794 common = [self.rev(n) for n in common]
795 795 heads = [self.rev(n) for n in heads]
796 796
797 797 # we want the ancestors, but inclusive
798 798 class lazyset(object):
799 799 def __init__(self, lazyvalues):
800 800 self.addedvalues = set()
801 801 self.lazyvalues = lazyvalues
802 802
803 803 def __contains__(self, value):
804 804 return value in self.addedvalues or value in self.lazyvalues
805 805
806 806 def __iter__(self):
807 807 added = self.addedvalues
808 808 for r in added:
809 809 yield r
810 810 for r in self.lazyvalues:
811 811 if not r in added:
812 812 yield r
813 813
814 814 def add(self, value):
815 815 self.addedvalues.add(value)
816 816
817 817 def update(self, values):
818 818 self.addedvalues.update(values)
819 819
820 820 has = lazyset(self.ancestors(common))
821 821 has.add(nullrev)
822 822 has.update(common)
823 823
824 824 # take all ancestors from heads that aren't in has
825 825 missing = set()
826 826 visit = collections.deque(r for r in heads if r not in has)
827 827 while visit:
828 828 r = visit.popleft()
829 829 if r in missing:
830 830 continue
831 831 else:
832 832 missing.add(r)
833 833 for p in self.parentrevs(r):
834 834 if p not in has:
835 835 visit.append(p)
836 836 missing = list(missing)
837 837 missing.sort()
838 838 return has, [self.node(miss) for miss in missing]
839 839
840 840 def incrementalmissingrevs(self, common=None):
841 841 """Return an object that can be used to incrementally compute the
842 842 revision numbers of the ancestors of arbitrary sets that are not
843 843 ancestors of common. This is an ancestor.incrementalmissingancestors
844 844 object.
845 845
846 846 'common' is a list of revision numbers. If common is not supplied, uses
847 847 nullrev.
848 848 """
849 849 if common is None:
850 850 common = [nullrev]
851 851
852 852 return ancestor.incrementalmissingancestors(self.parentrevs, common)
853 853
854 854 def findmissingrevs(self, common=None, heads=None):
855 855 """Return the revision numbers of the ancestors of heads that
856 856 are not ancestors of common.
857 857
858 858 More specifically, return a list of revision numbers corresponding to
859 859 nodes N such that every N satisfies the following constraints:
860 860
861 861 1. N is an ancestor of some node in 'heads'
862 862 2. N is not an ancestor of any node in 'common'
863 863
864 864 The list is sorted by revision number, meaning it is
865 865 topologically sorted.
866 866
867 867 'heads' and 'common' are both lists of revision numbers. If heads is
868 868 not supplied, uses all of the revlog's heads. If common is not
869 869 supplied, uses nullid."""
870 870 if common is None:
871 871 common = [nullrev]
872 872 if heads is None:
873 873 heads = self.headrevs()
874 874
875 875 inc = self.incrementalmissingrevs(common=common)
876 876 return inc.missingancestors(heads)
877 877
878 878 def findmissing(self, common=None, heads=None):
879 879 """Return the ancestors of heads that are not ancestors of common.
880 880
881 881 More specifically, return a list of nodes N such that every N
882 882 satisfies the following constraints:
883 883
884 884 1. N is an ancestor of some node in 'heads'
885 885 2. N is not an ancestor of any node in 'common'
886 886
887 887 The list is sorted by revision number, meaning it is
888 888 topologically sorted.
889 889
890 890 'heads' and 'common' are both lists of node IDs. If heads is
891 891 not supplied, uses all of the revlog's heads. If common is not
892 892 supplied, uses nullid."""
893 893 if common is None:
894 894 common = [nullid]
895 895 if heads is None:
896 896 heads = self.heads()
897 897
898 898 common = [self.rev(n) for n in common]
899 899 heads = [self.rev(n) for n in heads]
900 900
901 901 inc = self.incrementalmissingrevs(common=common)
902 902 return [self.node(r) for r in inc.missingancestors(heads)]
903 903
904 904 def nodesbetween(self, roots=None, heads=None):
905 905 """Return a topological path from 'roots' to 'heads'.
906 906
907 907 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
908 908 topologically sorted list of all nodes N that satisfy both of
909 909 these constraints:
910 910
911 911 1. N is a descendant of some node in 'roots'
912 912 2. N is an ancestor of some node in 'heads'
913 913
914 914 Every node is considered to be both a descendant and an ancestor
915 915 of itself, so every reachable node in 'roots' and 'heads' will be
916 916 included in 'nodes'.
917 917
918 918 'outroots' is the list of reachable nodes in 'roots', i.e., the
919 919 subset of 'roots' that is returned in 'nodes'. Likewise,
920 920 'outheads' is the subset of 'heads' that is also in 'nodes'.
921 921
922 922 'roots' and 'heads' are both lists of node IDs. If 'roots' is
923 923 unspecified, uses nullid as the only root. If 'heads' is
924 924 unspecified, uses list of all of the revlog's heads."""
925 925 nonodes = ([], [], [])
926 926 if roots is not None:
927 927 roots = list(roots)
928 928 if not roots:
929 929 return nonodes
930 930 lowestrev = min([self.rev(n) for n in roots])
931 931 else:
932 932 roots = [nullid] # Everybody's a descendant of nullid
933 933 lowestrev = nullrev
934 934 if (lowestrev == nullrev) and (heads is None):
935 935 # We want _all_ the nodes!
936 936 return ([self.node(r) for r in self], [nullid], list(self.heads()))
937 937 if heads is None:
938 938 # All nodes are ancestors, so the latest ancestor is the last
939 939 # node.
940 940 highestrev = len(self) - 1
941 941 # Set ancestors to None to signal that every node is an ancestor.
942 942 ancestors = None
943 943 # Set heads to an empty dictionary for later discovery of heads
944 944 heads = {}
945 945 else:
946 946 heads = list(heads)
947 947 if not heads:
948 948 return nonodes
949 949 ancestors = set()
950 950 # Turn heads into a dictionary so we can remove 'fake' heads.
951 951 # Also, later we will be using it to filter out the heads we can't
952 952 # find from roots.
953 953 heads = dict.fromkeys(heads, False)
954 954 # Start at the top and keep marking parents until we're done.
955 955 nodestotag = set(heads)
956 956 # Remember where the top was so we can use it as a limit later.
957 957 highestrev = max([self.rev(n) for n in nodestotag])
958 958 while nodestotag:
959 959 # grab a node to tag
960 960 n = nodestotag.pop()
961 961 # Never tag nullid
962 962 if n == nullid:
963 963 continue
964 964 # A node's revision number represents its place in a
965 965 # topologically sorted list of nodes.
966 966 r = self.rev(n)
967 967 if r >= lowestrev:
968 968 if n not in ancestors:
969 969 # If we are possibly a descendant of one of the roots
970 970 # and we haven't already been marked as an ancestor
971 971 ancestors.add(n) # Mark as ancestor
972 972 # Add non-nullid parents to list of nodes to tag.
973 973 nodestotag.update([p for p in self.parents(n) if
974 974 p != nullid])
975 975 elif n in heads: # We've seen it before, is it a fake head?
976 976 # So it is, real heads should not be the ancestors of
977 977 # any other heads.
978 978 heads.pop(n)
979 979 if not ancestors:
980 980 return nonodes
981 981 # Now that we have our set of ancestors, we want to remove any
982 982 # roots that are not ancestors.
983 983
984 984 # If one of the roots was nullid, everything is included anyway.
985 985 if lowestrev > nullrev:
986 986 # But, since we weren't, let's recompute the lowest rev to not
987 987 # include roots that aren't ancestors.
988 988
989 989 # Filter out roots that aren't ancestors of heads
990 990 roots = [root for root in roots if root in ancestors]
991 991 # Recompute the lowest revision
992 992 if roots:
993 993 lowestrev = min([self.rev(root) for root in roots])
994 994 else:
995 995 # No more roots? Return empty list
996 996 return nonodes
997 997 else:
998 998 # We are descending from nullid, and don't need to care about
999 999 # any other roots.
1000 1000 lowestrev = nullrev
1001 1001 roots = [nullid]
1002 1002 # Transform our roots list into a set.
1003 1003 descendants = set(roots)
1004 1004 # Also, keep the original roots so we can filter out roots that aren't
1005 1005 # 'real' roots (i.e. are descended from other roots).
1006 1006 roots = descendants.copy()
1007 1007 # Our topologically sorted list of output nodes.
1008 1008 orderedout = []
1009 1009 # Don't start at nullid since we don't want nullid in our output list,
1010 1010 # and if nullid shows up in descendants, empty parents will look like
1011 1011 # they're descendants.
1012 1012 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
1013 1013 n = self.node(r)
1014 1014 isdescendant = False
1015 1015 if lowestrev == nullrev: # Everybody is a descendant of nullid
1016 1016 isdescendant = True
1017 1017 elif n in descendants:
1018 1018 # n is already a descendant
1019 1019 isdescendant = True
1020 1020 # This check only needs to be done here because all the roots
1021 1021 # will start being marked is descendants before the loop.
1022 1022 if n in roots:
1023 1023 # If n was a root, check if it's a 'real' root.
1024 1024 p = tuple(self.parents(n))
1025 1025 # If any of its parents are descendants, it's not a root.
1026 1026 if (p[0] in descendants) or (p[1] in descendants):
1027 1027 roots.remove(n)
1028 1028 else:
1029 1029 p = tuple(self.parents(n))
1030 1030 # A node is a descendant if either of its parents are
1031 1031 # descendants. (We seeded the dependents list with the roots
1032 1032 # up there, remember?)
1033 1033 if (p[0] in descendants) or (p[1] in descendants):
1034 1034 descendants.add(n)
1035 1035 isdescendant = True
1036 1036 if isdescendant and ((ancestors is None) or (n in ancestors)):
1037 1037 # Only include nodes that are both descendants and ancestors.
1038 1038 orderedout.append(n)
1039 1039 if (ancestors is not None) and (n in heads):
1040 1040 # We're trying to figure out which heads are reachable
1041 1041 # from roots.
1042 1042 # Mark this head as having been reached
1043 1043 heads[n] = True
1044 1044 elif ancestors is None:
1045 1045 # Otherwise, we're trying to discover the heads.
1046 1046 # Assume this is a head because if it isn't, the next step
1047 1047 # will eventually remove it.
1048 1048 heads[n] = True
1049 1049 # But, obviously its parents aren't.
1050 1050 for p in self.parents(n):
1051 1051 heads.pop(p, None)
1052 1052 heads = [head for head, flag in heads.iteritems() if flag]
1053 1053 roots = list(roots)
1054 1054 assert orderedout
1055 1055 assert roots
1056 1056 assert heads
1057 1057 return (orderedout, roots, heads)
1058 1058
1059 1059 def headrevs(self):
1060 1060 try:
1061 1061 return self.index.headrevs()
1062 1062 except AttributeError:
1063 1063 return self._headrevs()
1064 1064
1065 1065 def computephases(self, roots):
1066 1066 return self.index.computephasesmapsets(roots)
1067 1067
1068 1068 def _headrevs(self):
1069 1069 count = len(self)
1070 1070 if not count:
1071 1071 return [nullrev]
1072 1072 # we won't iter over filtered rev so nobody is a head at start
1073 1073 ishead = [0] * (count + 1)
1074 1074 index = self.index
1075 1075 for r in self:
1076 1076 ishead[r] = 1 # I may be an head
1077 1077 e = index[r]
1078 1078 ishead[e[5]] = ishead[e[6]] = 0 # my parent are not
1079 1079 return [r for r, val in enumerate(ishead) if val]
1080 1080
1081 1081 def heads(self, start=None, stop=None):
1082 1082 """return the list of all nodes that have no children
1083 1083
1084 1084 if start is specified, only heads that are descendants of
1085 1085 start will be returned
1086 1086 if stop is specified, it will consider all the revs from stop
1087 1087 as if they had no children
1088 1088 """
1089 1089 if start is None and stop is None:
1090 1090 if not len(self):
1091 1091 return [nullid]
1092 1092 return [self.node(r) for r in self.headrevs()]
1093 1093
1094 1094 if start is None:
1095 1095 start = nullrev
1096 1096 else:
1097 1097 start = self.rev(start)
1098 1098
1099 1099 stoprevs = set(self.rev(n) for n in stop or [])
1100 1100
1101 1101 revs = dagop.headrevssubset(self.revs, self.parentrevs, startrev=start,
1102 1102 stoprevs=stoprevs)
1103 1103
1104 1104 return [self.node(rev) for rev in revs]
1105 1105
1106 1106 def children(self, node):
1107 1107 """find the children of a given node"""
1108 1108 c = []
1109 1109 p = self.rev(node)
1110 1110 for r in self.revs(start=p + 1):
1111 1111 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
1112 1112 if prevs:
1113 1113 for pr in prevs:
1114 1114 if pr == p:
1115 1115 c.append(self.node(r))
1116 1116 elif p == nullrev:
1117 1117 c.append(self.node(r))
1118 1118 return c
1119 1119
1120 1120 def commonancestorsheads(self, a, b):
1121 1121 """calculate all the heads of the common ancestors of nodes a and b"""
1122 1122 a, b = self.rev(a), self.rev(b)
1123 1123 ancs = self._commonancestorsheads(a, b)
1124 1124 return pycompat.maplist(self.node, ancs)
1125 1125
1126 1126 def _commonancestorsheads(self, *revs):
1127 1127 """calculate all the heads of the common ancestors of revs"""
1128 1128 try:
1129 1129 ancs = self.index.commonancestorsheads(*revs)
1130 1130 except (AttributeError, OverflowError): # C implementation failed
1131 1131 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
1132 1132 return ancs
1133 1133
1134 1134 def isancestor(self, a, b):
1135 1135 """return True if node a is an ancestor of node b
1136 1136
1137 1137 A revision is considered an ancestor of itself."""
1138 1138 a, b = self.rev(a), self.rev(b)
1139 1139 return self.isancestorrev(a, b)
1140 1140
1141 1141 def isancestorrev(self, a, b):
1142 1142 """return True if revision a is an ancestor of revision b
1143 1143
1144 1144 A revision is considered an ancestor of itself.
1145 1145
1146 1146 The implementation of this is trivial but the use of
1147 1147 commonancestorsheads is not."""
1148 1148 if a == nullrev:
1149 1149 return True
1150 1150 elif a == b:
1151 1151 return True
1152 1152 elif a > b:
1153 1153 return False
1154 1154 return a in self._commonancestorsheads(a, b)
1155 1155
1156 1156 def ancestor(self, a, b):
1157 1157 """calculate the "best" common ancestor of nodes a and b"""
1158 1158
1159 1159 a, b = self.rev(a), self.rev(b)
1160 1160 try:
1161 1161 ancs = self.index.ancestors(a, b)
1162 1162 except (AttributeError, OverflowError):
1163 1163 ancs = ancestor.ancestors(self.parentrevs, a, b)
1164 1164 if ancs:
1165 1165 # choose a consistent winner when there's a tie
1166 1166 return min(map(self.node, ancs))
1167 1167 return nullid
1168 1168
1169 1169 def _match(self, id):
1170 1170 if isinstance(id, int):
1171 1171 # rev
1172 1172 return self.node(id)
1173 1173 if len(id) == 20:
1174 1174 # possibly a binary node
1175 1175 # odds of a binary node being all hex in ASCII are 1 in 10**25
1176 1176 try:
1177 1177 node = id
1178 1178 self.rev(node) # quick search the index
1179 1179 return node
1180 1180 except error.LookupError:
1181 1181 pass # may be partial hex id
1182 1182 try:
1183 1183 # str(rev)
1184 1184 rev = int(id)
1185 1185 if "%d" % rev != id:
1186 1186 raise ValueError
1187 1187 if rev < 0:
1188 1188 rev = len(self) + rev
1189 1189 if rev < 0 or rev >= len(self):
1190 1190 raise ValueError
1191 1191 return self.node(rev)
1192 1192 except (ValueError, OverflowError):
1193 1193 pass
1194 1194 if len(id) == 40:
1195 1195 try:
1196 1196 # a full hex nodeid?
1197 1197 node = bin(id)
1198 1198 self.rev(node)
1199 1199 return node
1200 1200 except (TypeError, error.LookupError):
1201 1201 pass
1202 1202
1203 1203 def _partialmatch(self, id):
1204 1204 # we don't care wdirfilenodeids as they should be always full hash
1205 1205 maybewdir = wdirhex.startswith(id)
1206 1206 try:
1207 1207 partial = self.index.partialmatch(id)
1208 1208 if partial and self.hasnode(partial):
1209 1209 if maybewdir:
1210 1210 # single 'ff...' match in radix tree, ambiguous with wdir
1211 1211 raise error.RevlogError
1212 1212 return partial
1213 1213 if maybewdir:
1214 1214 # no 'ff...' match in radix tree, wdir identified
1215 1215 raise error.WdirUnsupported
1216 1216 return None
1217 1217 except error.RevlogError:
1218 1218 # parsers.c radix tree lookup gave multiple matches
1219 1219 # fast path: for unfiltered changelog, radix tree is accurate
1220 1220 if not getattr(self, 'filteredrevs', None):
1221 1221 raise error.AmbiguousPrefixLookupError(
1222 1222 id, self.indexfile, _('ambiguous identifier'))
1223 1223 # fall through to slow path that filters hidden revisions
1224 1224 except (AttributeError, ValueError):
1225 1225 # we are pure python, or key was too short to search radix tree
1226 1226 pass
1227 1227
1228 1228 if id in self._pcache:
1229 1229 return self._pcache[id]
1230 1230
1231 1231 if len(id) <= 40:
1232 1232 try:
1233 1233 # hex(node)[:...]
1234 1234 l = len(id) // 2 # grab an even number of digits
1235 1235 prefix = bin(id[:l * 2])
1236 1236 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
1237 1237 nl = [n for n in nl if hex(n).startswith(id) and
1238 1238 self.hasnode(n)]
1239 1239 if nullhex.startswith(id):
1240 1240 nl.append(nullid)
1241 1241 if len(nl) > 0:
1242 1242 if len(nl) == 1 and not maybewdir:
1243 1243 self._pcache[id] = nl[0]
1244 1244 return nl[0]
1245 1245 raise error.AmbiguousPrefixLookupError(
1246 1246 id, self.indexfile, _('ambiguous identifier'))
1247 1247 if maybewdir:
1248 1248 raise error.WdirUnsupported
1249 1249 return None
1250 1250 except TypeError:
1251 1251 pass
1252 1252
1253 1253 def lookup(self, id):
1254 1254 """locate a node based on:
1255 1255 - revision number or str(revision number)
1256 1256 - nodeid or subset of hex nodeid
1257 1257 """
1258 1258 n = self._match(id)
1259 1259 if n is not None:
1260 1260 return n
1261 1261 n = self._partialmatch(id)
1262 1262 if n:
1263 1263 return n
1264 1264
1265 1265 raise error.LookupError(id, self.indexfile, _('no match found'))
1266 1266
1267 1267 def shortest(self, node, minlength=1):
1268 1268 """Find the shortest unambiguous prefix that matches node."""
1269 1269 def isvalid(prefix):
1270 1270 try:
1271 1271 node = self._partialmatch(prefix)
1272 1272 except error.AmbiguousPrefixLookupError:
1273 1273 return False
1274 1274 except error.WdirUnsupported:
1275 1275 # single 'ff...' match
1276 1276 return True
1277 1277 if node is None:
1278 1278 raise error.LookupError(node, self.indexfile, _('no node'))
1279 1279 return True
1280 1280
1281 1281 def maybewdir(prefix):
1282 1282 return all(c == 'f' for c in prefix)
1283 1283
1284 1284 hexnode = hex(node)
1285 1285
1286 1286 def disambiguate(hexnode, minlength):
1287 1287 """Disambiguate against wdirid."""
1288 1288 for length in range(minlength, 41):
1289 1289 prefix = hexnode[:length]
1290 1290 if not maybewdir(prefix):
1291 1291 return prefix
1292 1292
1293 1293 if not getattr(self, 'filteredrevs', None):
1294 1294 try:
1295 1295 length = max(self.index.shortest(node), minlength)
1296 1296 return disambiguate(hexnode, length)
1297 1297 except error.RevlogError:
1298 1298 if node != wdirid:
1299 1299 raise error.LookupError(node, self.indexfile, _('no node'))
1300 1300 except AttributeError:
1301 1301 # Fall through to pure code
1302 1302 pass
1303 1303
1304 1304 if node == wdirid:
1305 1305 for length in range(minlength, 41):
1306 1306 prefix = hexnode[:length]
1307 1307 if isvalid(prefix):
1308 1308 return prefix
1309 1309
1310 1310 for length in range(minlength, 41):
1311 1311 prefix = hexnode[:length]
1312 1312 if isvalid(prefix):
1313 1313 return disambiguate(hexnode, length)
1314 1314
1315 1315 def cmp(self, node, text):
1316 1316 """compare text with a given file revision
1317 1317
1318 1318 returns True if text is different than what is stored.
1319 1319 """
1320 1320 p1, p2 = self.parents(node)
1321 1321 return storageutil.hashrevisionsha1(text, p1, p2) != node
1322 1322
1323 1323 def _cachesegment(self, offset, data):
1324 1324 """Add a segment to the revlog cache.
1325 1325
1326 1326 Accepts an absolute offset and the data that is at that location.
1327 1327 """
1328 1328 o, d = self._chunkcache
1329 1329 # try to add to existing cache
1330 1330 if o + len(d) == offset and len(d) + len(data) < _chunksize:
1331 1331 self._chunkcache = o, d + data
1332 1332 else:
1333 1333 self._chunkcache = offset, data
1334 1334
1335 1335 def _readsegment(self, offset, length, df=None):
1336 1336 """Load a segment of raw data from the revlog.
1337 1337
1338 1338 Accepts an absolute offset, length to read, and an optional existing
1339 1339 file handle to read from.
1340 1340
1341 1341 If an existing file handle is passed, it will be seeked and the
1342 1342 original seek position will NOT be restored.
1343 1343
1344 1344 Returns a str or buffer of raw byte data.
1345 1345 """
1346 1346 # Cache data both forward and backward around the requested
1347 1347 # data, in a fixed size window. This helps speed up operations
1348 1348 # involving reading the revlog backwards.
1349 1349 cachesize = self._chunkcachesize
1350 1350 realoffset = offset & ~(cachesize - 1)
1351 1351 reallength = (((offset + length + cachesize) & ~(cachesize - 1))
1352 1352 - realoffset)
1353 1353 with self._datareadfp(df) as df:
1354 1354 df.seek(realoffset)
1355 1355 d = df.read(reallength)
1356 1356 self._cachesegment(realoffset, d)
1357 1357 if offset != realoffset or reallength != length:
1358 1358 return util.buffer(d, offset - realoffset, length)
1359 1359 return d
1360 1360
1361 1361 def _getsegment(self, offset, length, df=None):
1362 1362 """Obtain a segment of raw data from the revlog.
1363 1363
1364 1364 Accepts an absolute offset, length of bytes to obtain, and an
1365 1365 optional file handle to the already-opened revlog. If the file
1366 1366 handle is used, it's original seek position will not be preserved.
1367 1367
1368 1368 Requests for data may be returned from a cache.
1369 1369
1370 1370 Returns a str or a buffer instance of raw byte data.
1371 1371 """
1372 1372 o, d = self._chunkcache
1373 1373 l = len(d)
1374 1374
1375 1375 # is it in the cache?
1376 1376 cachestart = offset - o
1377 1377 cacheend = cachestart + length
1378 1378 if cachestart >= 0 and cacheend <= l:
1379 1379 if cachestart == 0 and cacheend == l:
1380 1380 return d # avoid a copy
1381 1381 return util.buffer(d, cachestart, cacheend - cachestart)
1382 1382
1383 1383 return self._readsegment(offset, length, df=df)
1384 1384
1385 1385 def _getsegmentforrevs(self, startrev, endrev, df=None):
1386 1386 """Obtain a segment of raw data corresponding to a range of revisions.
1387 1387
1388 1388 Accepts the start and end revisions and an optional already-open
1389 1389 file handle to be used for reading. If the file handle is read, its
1390 1390 seek position will not be preserved.
1391 1391
1392 1392 Requests for data may be satisfied by a cache.
1393 1393
1394 1394 Returns a 2-tuple of (offset, data) for the requested range of
1395 1395 revisions. Offset is the integer offset from the beginning of the
1396 1396 revlog and data is a str or buffer of the raw byte data.
1397 1397
1398 1398 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
1399 1399 to determine where each revision's data begins and ends.
1400 1400 """
1401 1401 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
1402 1402 # (functions are expensive).
1403 1403 index = self.index
1404 1404 istart = index[startrev]
1405 1405 start = int(istart[0] >> 16)
1406 1406 if startrev == endrev:
1407 1407 end = start + istart[1]
1408 1408 else:
1409 1409 iend = index[endrev]
1410 1410 end = int(iend[0] >> 16) + iend[1]
1411 1411
1412 1412 if self._inline:
1413 1413 start += (startrev + 1) * self._io.size
1414 1414 end += (endrev + 1) * self._io.size
1415 1415 length = end - start
1416 1416
1417 1417 return start, self._getsegment(start, length, df=df)
1418 1418
1419 1419 def _chunk(self, rev, df=None):
1420 1420 """Obtain a single decompressed chunk for a revision.
1421 1421
1422 1422 Accepts an integer revision and an optional already-open file handle
1423 1423 to be used for reading. If used, the seek position of the file will not
1424 1424 be preserved.
1425 1425
1426 1426 Returns a str holding uncompressed data for the requested revision.
1427 1427 """
1428 1428 return self.decompress(self._getsegmentforrevs(rev, rev, df=df)[1])
1429 1429
1430 1430 def _chunks(self, revs, df=None, targetsize=None):
1431 1431 """Obtain decompressed chunks for the specified revisions.
1432 1432
1433 1433 Accepts an iterable of numeric revisions that are assumed to be in
1434 1434 ascending order. Also accepts an optional already-open file handle
1435 1435 to be used for reading. If used, the seek position of the file will
1436 1436 not be preserved.
1437 1437
1438 1438 This function is similar to calling ``self._chunk()`` multiple times,
1439 1439 but is faster.
1440 1440
1441 1441 Returns a list with decompressed data for each requested revision.
1442 1442 """
1443 1443 if not revs:
1444 1444 return []
1445 1445 start = self.start
1446 1446 length = self.length
1447 1447 inline = self._inline
1448 1448 iosize = self._io.size
1449 1449 buffer = util.buffer
1450 1450
1451 1451 l = []
1452 1452 ladd = l.append
1453 1453
1454 1454 if not self._withsparseread:
1455 1455 slicedchunks = (revs,)
1456 1456 else:
1457 1457 slicedchunks = deltautil.slicechunk(self, revs,
1458 1458 targetsize=targetsize)
1459 1459
1460 1460 for revschunk in slicedchunks:
1461 1461 firstrev = revschunk[0]
1462 1462 # Skip trailing revisions with empty diff
1463 1463 for lastrev in revschunk[::-1]:
1464 1464 if length(lastrev) != 0:
1465 1465 break
1466 1466
1467 1467 try:
1468 1468 offset, data = self._getsegmentforrevs(firstrev, lastrev, df=df)
1469 1469 except OverflowError:
1470 1470 # issue4215 - we can't cache a run of chunks greater than
1471 1471 # 2G on Windows
1472 1472 return [self._chunk(rev, df=df) for rev in revschunk]
1473 1473
1474 1474 decomp = self.decompress
1475 1475 for rev in revschunk:
1476 1476 chunkstart = start(rev)
1477 1477 if inline:
1478 1478 chunkstart += (rev + 1) * iosize
1479 1479 chunklength = length(rev)
1480 1480 ladd(decomp(buffer(data, chunkstart - offset, chunklength)))
1481 1481
1482 1482 return l
1483 1483
1484 1484 def _chunkclear(self):
1485 1485 """Clear the raw chunk cache."""
1486 1486 self._chunkcache = (0, '')
1487 1487
1488 1488 def deltaparent(self, rev):
1489 1489 """return deltaparent of the given revision"""
1490 1490 base = self.index[rev][3]
1491 1491 if base == rev:
1492 1492 return nullrev
1493 1493 elif self._generaldelta:
1494 1494 return base
1495 1495 else:
1496 1496 return rev - 1
1497 1497
1498 1498 def issnapshot(self, rev):
1499 1499 """tells whether rev is a snapshot
1500 1500 """
1501 1501 if rev == nullrev:
1502 1502 return True
1503 1503 deltap = self.deltaparent(rev)
1504 1504 if deltap == nullrev:
1505 1505 return True
1506 1506 p1, p2 = self.parentrevs(rev)
1507 1507 if deltap in (p1, p2):
1508 1508 return False
1509 1509 return self.issnapshot(deltap)
1510 1510
1511 1511 def snapshotdepth(self, rev):
1512 1512 """number of snapshot in the chain before this one"""
1513 1513 if not self.issnapshot(rev):
1514 1514 raise error.ProgrammingError('revision %d not a snapshot')
1515 1515 return len(self._deltachain(rev)[0]) - 1
1516 1516
1517 1517 def revdiff(self, rev1, rev2):
1518 1518 """return or calculate a delta between two revisions
1519 1519
1520 1520 The delta calculated is in binary form and is intended to be written to
1521 1521 revlog data directly. So this function needs raw revision data.
1522 1522 """
1523 1523 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1524 1524 return bytes(self._chunk(rev2))
1525 1525
1526 1526 return mdiff.textdiff(self.revision(rev1, raw=True),
1527 1527 self.revision(rev2, raw=True))
1528 1528
1529 1529 def revision(self, nodeorrev, _df=None, raw=False):
1530 1530 """return an uncompressed revision of a given node or revision
1531 1531 number.
1532 1532
1533 1533 _df - an existing file handle to read from. (internal-only)
1534 1534 raw - an optional argument specifying if the revision data is to be
1535 1535 treated as raw data when applying flag transforms. 'raw' should be set
1536 1536 to True when generating changegroups or in debug commands.
1537 1537 """
1538 1538 if isinstance(nodeorrev, int):
1539 1539 rev = nodeorrev
1540 1540 node = self.node(rev)
1541 1541 else:
1542 1542 node = nodeorrev
1543 1543 rev = None
1544 1544
1545 1545 cachedrev = None
1546 1546 flags = None
1547 1547 rawtext = None
1548 1548 if node == nullid:
1549 1549 return ""
1550 1550 if self._revisioncache:
1551 1551 if self._revisioncache[0] == node:
1552 1552 # _cache only stores rawtext
1553 1553 if raw:
1554 1554 return self._revisioncache[2]
1555 1555 # duplicated, but good for perf
1556 1556 if rev is None:
1557 1557 rev = self.rev(node)
1558 1558 if flags is None:
1559 1559 flags = self.flags(rev)
1560 1560 # no extra flags set, no flag processor runs, text = rawtext
1561 1561 if flags == REVIDX_DEFAULT_FLAGS:
1562 1562 return self._revisioncache[2]
1563 1563 # rawtext is reusable. need to run flag processor
1564 1564 rawtext = self._revisioncache[2]
1565 1565
1566 1566 cachedrev = self._revisioncache[1]
1567 1567
1568 1568 # look up what we need to read
1569 1569 if rawtext is None:
1570 1570 if rev is None:
1571 1571 rev = self.rev(node)
1572 1572
1573 1573 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1574 1574 if stopped:
1575 1575 rawtext = self._revisioncache[2]
1576 1576
1577 1577 # drop cache to save memory
1578 1578 self._revisioncache = None
1579 1579
1580 1580 targetsize = None
1581 1581 rawsize = self.index[rev][2]
1582 1582 if 0 <= rawsize:
1583 1583 targetsize = 4 * rawsize
1584 1584
1585 1585 bins = self._chunks(chain, df=_df, targetsize=targetsize)
1586 1586 if rawtext is None:
1587 1587 rawtext = bytes(bins[0])
1588 1588 bins = bins[1:]
1589 1589
1590 1590 rawtext = mdiff.patches(rawtext, bins)
1591 1591 self._revisioncache = (node, rev, rawtext)
1592 1592
1593 1593 if flags is None:
1594 1594 if rev is None:
1595 1595 rev = self.rev(node)
1596 1596 flags = self.flags(rev)
1597 1597
1598 1598 text, validatehash = self._processflags(rawtext, flags, 'read', raw=raw)
1599 1599 if validatehash:
1600 1600 self.checkhash(text, node, rev=rev)
1601 1601
1602 1602 return text
1603 1603
1604 1604 def hash(self, text, p1, p2):
1605 1605 """Compute a node hash.
1606 1606
1607 1607 Available as a function so that subclasses can replace the hash
1608 1608 as needed.
1609 1609 """
1610 1610 return storageutil.hashrevisionsha1(text, p1, p2)
1611 1611
1612 1612 def _processflags(self, text, flags, operation, raw=False):
1613 1613 """Inspect revision data flags and applies transforms defined by
1614 1614 registered flag processors.
1615 1615
1616 1616 ``text`` - the revision data to process
1617 1617 ``flags`` - the revision flags
1618 1618 ``operation`` - the operation being performed (read or write)
1619 1619 ``raw`` - an optional argument describing if the raw transform should be
1620 1620 applied.
1621 1621
1622 1622 This method processes the flags in the order (or reverse order if
1623 1623 ``operation`` is 'write') defined by REVIDX_FLAGS_ORDER, applying the
1624 1624 flag processors registered for present flags. The order of flags defined
1625 1625 in REVIDX_FLAGS_ORDER needs to be stable to allow non-commutativity.
1626 1626
1627 1627 Returns a 2-tuple of ``(text, validatehash)`` where ``text`` is the
1628 1628 processed text and ``validatehash`` is a bool indicating whether the
1629 1629 returned text should be checked for hash integrity.
1630 1630
1631 1631 Note: If the ``raw`` argument is set, it has precedence over the
1632 1632 operation and will only update the value of ``validatehash``.
1633 1633 """
1634 1634 # fast path: no flag processors will run
1635 1635 if flags == 0:
1636 1636 return text, True
1637 1637 if not operation in ('read', 'write'):
1638 1638 raise error.ProgrammingError(_("invalid '%s' operation") %
1639 1639 operation)
1640 1640 # Check all flags are known.
1641 1641 if flags & ~REVIDX_KNOWN_FLAGS:
1642 1642 raise error.RevlogError(_("incompatible revision flag '%#x'") %
1643 1643 (flags & ~REVIDX_KNOWN_FLAGS))
1644 1644 validatehash = True
1645 1645 # Depending on the operation (read or write), the order might be
1646 1646 # reversed due to non-commutative transforms.
1647 1647 orderedflags = REVIDX_FLAGS_ORDER
1648 1648 if operation == 'write':
1649 1649 orderedflags = reversed(orderedflags)
1650 1650
1651 1651 for flag in orderedflags:
1652 1652 # If a flagprocessor has been registered for a known flag, apply the
1653 1653 # related operation transform and update result tuple.
1654 1654 if flag & flags:
1655 1655 vhash = True
1656 1656
1657 1657 if flag not in self._flagprocessors:
1658 1658 message = _("missing processor for flag '%#x'") % (flag)
1659 1659 raise error.RevlogError(message)
1660 1660
1661 1661 processor = self._flagprocessors[flag]
1662 1662 if processor is not None:
1663 1663 readtransform, writetransform, rawtransform = processor
1664 1664
1665 1665 if raw:
1666 1666 vhash = rawtransform(self, text)
1667 1667 elif operation == 'read':
1668 1668 text, vhash = readtransform(self, text)
1669 1669 else: # write operation
1670 1670 text, vhash = writetransform(self, text)
1671 1671 validatehash = validatehash and vhash
1672 1672
1673 1673 return text, validatehash
1674 1674
1675 1675 def checkhash(self, text, node, p1=None, p2=None, rev=None):
1676 1676 """Check node hash integrity.
1677 1677
1678 1678 Available as a function so that subclasses can extend hash mismatch
1679 1679 behaviors as needed.
1680 1680 """
1681 1681 try:
1682 1682 if p1 is None and p2 is None:
1683 1683 p1, p2 = self.parents(node)
1684 1684 if node != self.hash(text, p1, p2):
1685 1685 # Clear the revision cache on hash failure. The revision cache
1686 1686 # only stores the raw revision and clearing the cache does have
1687 1687 # the side-effect that we won't have a cache hit when the raw
1688 1688 # revision data is accessed. But this case should be rare and
1689 1689 # it is extra work to teach the cache about the hash
1690 1690 # verification state.
1691 1691 if self._revisioncache and self._revisioncache[0] == node:
1692 1692 self._revisioncache = None
1693 1693
1694 1694 revornode = rev
1695 1695 if revornode is None:
1696 1696 revornode = templatefilters.short(hex(node))
1697 1697 raise error.RevlogError(_("integrity check failed on %s:%s")
1698 1698 % (self.indexfile, pycompat.bytestr(revornode)))
1699 1699 except error.RevlogError:
1700 1700 if self._censorable and storageutil.iscensoredtext(text):
1701 1701 raise error.CensoredNodeError(self.indexfile, node, text)
1702 1702 raise
1703 1703
1704 1704 def _enforceinlinesize(self, tr, fp=None):
1705 1705 """Check if the revlog is too big for inline and convert if so.
1706 1706
1707 1707 This should be called after revisions are added to the revlog. If the
1708 1708 revlog has grown too large to be an inline revlog, it will convert it
1709 1709 to use multiple index and data files.
1710 1710 """
1711 1711 tiprev = len(self) - 1
1712 1712 if (not self._inline or
1713 1713 (self.start(tiprev) + self.length(tiprev)) < _maxinline):
1714 1714 return
1715 1715
1716 1716 trinfo = tr.find(self.indexfile)
1717 1717 if trinfo is None:
1718 1718 raise error.RevlogError(_("%s not found in the transaction")
1719 1719 % self.indexfile)
1720 1720
1721 1721 trindex = trinfo[2]
1722 1722 if trindex is not None:
1723 1723 dataoff = self.start(trindex)
1724 1724 else:
1725 1725 # revlog was stripped at start of transaction, use all leftover data
1726 1726 trindex = len(self) - 1
1727 1727 dataoff = self.end(tiprev)
1728 1728
1729 1729 tr.add(self.datafile, dataoff)
1730 1730
1731 1731 if fp:
1732 1732 fp.flush()
1733 1733 fp.close()
1734 1734
1735 1735 with self._datafp('w') as df:
1736 1736 for r in self:
1737 1737 df.write(self._getsegmentforrevs(r, r)[1])
1738 1738
1739 1739 with self._indexfp('w') as fp:
1740 1740 self.version &= ~FLAG_INLINE_DATA
1741 1741 self._inline = False
1742 1742 io = self._io
1743 1743 for i in self:
1744 1744 e = io.packentry(self.index[i], self.node, self.version, i)
1745 1745 fp.write(e)
1746 1746
1747 1747 # the temp file replace the real index when we exit the context
1748 1748 # manager
1749 1749
1750 1750 tr.replace(self.indexfile, trindex * self._io.size)
1751 1751 self._chunkclear()
1752 1752
1753 1753 def _nodeduplicatecallback(self, transaction, node):
1754 1754 """called when trying to add a node already stored.
1755 1755 """
1756 1756
1757 1757 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None,
1758 1758 node=None, flags=REVIDX_DEFAULT_FLAGS, deltacomputer=None):
1759 1759 """add a revision to the log
1760 1760
1761 1761 text - the revision data to add
1762 1762 transaction - the transaction object used for rollback
1763 1763 link - the linkrev data to add
1764 1764 p1, p2 - the parent nodeids of the revision
1765 1765 cachedelta - an optional precomputed delta
1766 1766 node - nodeid of revision; typically node is not specified, and it is
1767 1767 computed by default as hash(text, p1, p2), however subclasses might
1768 1768 use different hashing method (and override checkhash() in such case)
1769 1769 flags - the known flags to set on the revision
1770 1770 deltacomputer - an optional deltacomputer instance shared between
1771 1771 multiple calls
1772 1772 """
1773 1773 if link == nullrev:
1774 1774 raise error.RevlogError(_("attempted to add linkrev -1 to %s")
1775 1775 % self.indexfile)
1776 1776
1777 1777 if flags:
1778 1778 node = node or self.hash(text, p1, p2)
1779 1779
1780 1780 rawtext, validatehash = self._processflags(text, flags, 'write')
1781 1781
1782 1782 # If the flag processor modifies the revision data, ignore any provided
1783 1783 # cachedelta.
1784 1784 if rawtext != text:
1785 1785 cachedelta = None
1786 1786
1787 1787 if len(rawtext) > _maxentrysize:
1788 1788 raise error.RevlogError(
1789 1789 _("%s: size of %d bytes exceeds maximum revlog storage of 2GiB")
1790 1790 % (self.indexfile, len(rawtext)))
1791 1791
1792 1792 node = node or self.hash(rawtext, p1, p2)
1793 1793 if node in self.nodemap:
1794 1794 return node
1795 1795
1796 1796 if validatehash:
1797 1797 self.checkhash(rawtext, node, p1=p1, p2=p2)
1798 1798
1799 1799 return self.addrawrevision(rawtext, transaction, link, p1, p2, node,
1800 1800 flags, cachedelta=cachedelta,
1801 1801 deltacomputer=deltacomputer)
1802 1802
1803 1803 def addrawrevision(self, rawtext, transaction, link, p1, p2, node, flags,
1804 1804 cachedelta=None, deltacomputer=None):
1805 1805 """add a raw revision with known flags, node and parents
1806 1806 useful when reusing a revision not stored in this revlog (ex: received
1807 1807 over wire, or read from an external bundle).
1808 1808 """
1809 1809 dfh = None
1810 1810 if not self._inline:
1811 1811 dfh = self._datafp("a+")
1812 1812 ifh = self._indexfp("a+")
1813 1813 try:
1814 1814 return self._addrevision(node, rawtext, transaction, link, p1, p2,
1815 1815 flags, cachedelta, ifh, dfh,
1816 1816 deltacomputer=deltacomputer)
1817 1817 finally:
1818 1818 if dfh:
1819 1819 dfh.close()
1820 1820 ifh.close()
1821 1821
1822 1822 def compress(self, data):
1823 1823 """Generate a possibly-compressed representation of data."""
1824 1824 if not data:
1825 1825 return '', data
1826 1826
1827 1827 compressed = self._compressor.compress(data)
1828 1828
1829 1829 if compressed:
1830 1830 # The revlog compressor added the header in the returned data.
1831 1831 return '', compressed
1832 1832
1833 1833 if data[0:1] == '\0':
1834 1834 return '', data
1835 1835 return 'u', data
1836 1836
1837 1837 def decompress(self, data):
1838 1838 """Decompress a revlog chunk.
1839 1839
1840 1840 The chunk is expected to begin with a header identifying the
1841 1841 format type so it can be routed to an appropriate decompressor.
1842 1842 """
1843 1843 if not data:
1844 1844 return data
1845 1845
1846 1846 # Revlogs are read much more frequently than they are written and many
1847 1847 # chunks only take microseconds to decompress, so performance is
1848 1848 # important here.
1849 1849 #
1850 1850 # We can make a few assumptions about revlogs:
1851 1851 #
1852 1852 # 1) the majority of chunks will be compressed (as opposed to inline
1853 1853 # raw data).
1854 1854 # 2) decompressing *any* data will likely by at least 10x slower than
1855 1855 # returning raw inline data.
1856 1856 # 3) we want to prioritize common and officially supported compression
1857 1857 # engines
1858 1858 #
1859 1859 # It follows that we want to optimize for "decompress compressed data
1860 1860 # when encoded with common and officially supported compression engines"
1861 1861 # case over "raw data" and "data encoded by less common or non-official
1862 1862 # compression engines." That is why we have the inline lookup first
1863 1863 # followed by the compengines lookup.
1864 1864 #
1865 1865 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
1866 1866 # compressed chunks. And this matters for changelog and manifest reads.
1867 1867 t = data[0:1]
1868 1868
1869 1869 if t == 'x':
1870 1870 try:
1871 1871 return _zlibdecompress(data)
1872 1872 except zlib.error as e:
1873 1873 raise error.RevlogError(_('revlog decompress error: %s') %
1874 1874 stringutil.forcebytestr(e))
1875 1875 # '\0' is more common than 'u' so it goes first.
1876 1876 elif t == '\0':
1877 1877 return data
1878 1878 elif t == 'u':
1879 1879 return util.buffer(data, 1)
1880 1880
1881 1881 try:
1882 1882 compressor = self._decompressors[t]
1883 1883 except KeyError:
1884 1884 try:
1885 1885 engine = util.compengines.forrevlogheader(t)
1886 1886 compressor = engine.revlogcompressor()
1887 1887 self._decompressors[t] = compressor
1888 1888 except KeyError:
1889 1889 raise error.RevlogError(_('unknown compression type %r') % t)
1890 1890
1891 1891 return compressor.decompress(data)
1892 1892
1893 1893 def _addrevision(self, node, rawtext, transaction, link, p1, p2, flags,
1894 1894 cachedelta, ifh, dfh, alwayscache=False,
1895 1895 deltacomputer=None):
1896 1896 """internal function to add revisions to the log
1897 1897
1898 1898 see addrevision for argument descriptions.
1899 1899
1900 1900 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
1901 1901
1902 1902 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
1903 1903 be used.
1904 1904
1905 1905 invariants:
1906 1906 - rawtext is optional (can be None); if not set, cachedelta must be set.
1907 1907 if both are set, they must correspond to each other.
1908 1908 """
1909 1909 if node == nullid:
1910 1910 raise error.RevlogError(_("%s: attempt to add null revision") %
1911 1911 self.indexfile)
1912 1912 if node == wdirid or node in wdirfilenodeids:
1913 1913 raise error.RevlogError(_("%s: attempt to add wdir revision") %
1914 1914 self.indexfile)
1915 1915
1916 1916 if self._inline:
1917 1917 fh = ifh
1918 1918 else:
1919 1919 fh = dfh
1920 1920
1921 1921 btext = [rawtext]
1922 1922
1923 1923 curr = len(self)
1924 1924 prev = curr - 1
1925 1925 offset = self.end(prev)
1926 1926 p1r, p2r = self.rev(p1), self.rev(p2)
1927 1927
1928 1928 # full versions are inserted when the needed deltas
1929 1929 # become comparable to the uncompressed text
1930 1930 if rawtext is None:
1931 1931 # need rawtext size, before changed by flag processors, which is
1932 1932 # the non-raw size. use revlog explicitly to avoid filelog's extra
1933 1933 # logic that might remove metadata size.
1934 1934 textlen = mdiff.patchedsize(revlog.size(self, cachedelta[0]),
1935 1935 cachedelta[1])
1936 1936 else:
1937 1937 textlen = len(rawtext)
1938 1938
1939 1939 if deltacomputer is None:
1940 1940 deltacomputer = deltautil.deltacomputer(self)
1941 1941
1942 1942 revinfo = _revisioninfo(node, p1, p2, btext, textlen, cachedelta, flags)
1943 1943
1944 1944 deltainfo = deltacomputer.finddeltainfo(revinfo, fh)
1945 1945
1946 1946 e = (offset_type(offset, flags), deltainfo.deltalen, textlen,
1947 1947 deltainfo.base, link, p1r, p2r, node)
1948 1948 self.index.append(e)
1949 1949 self.nodemap[node] = curr
1950 1950
1951 1951 # Reset the pure node cache start lookup offset to account for new
1952 1952 # revision.
1953 1953 if self._nodepos is not None:
1954 1954 self._nodepos = curr
1955 1955
1956 1956 entry = self._io.packentry(e, self.node, self.version, curr)
1957 1957 self._writeentry(transaction, ifh, dfh, entry, deltainfo.data,
1958 1958 link, offset)
1959 1959
1960 1960 rawtext = btext[0]
1961 1961
1962 1962 if alwayscache and rawtext is None:
1963 1963 rawtext = deltacomputer.buildtext(revinfo, fh)
1964 1964
1965 1965 if type(rawtext) == bytes: # only accept immutable objects
1966 1966 self._revisioncache = (node, curr, rawtext)
1967 1967 self._chainbasecache[curr] = deltainfo.chainbase
1968 1968 return node
1969 1969
1970 1970 def _writeentry(self, transaction, ifh, dfh, entry, data, link, offset):
1971 1971 # Files opened in a+ mode have inconsistent behavior on various
1972 1972 # platforms. Windows requires that a file positioning call be made
1973 1973 # when the file handle transitions between reads and writes. See
1974 1974 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1975 1975 # platforms, Python or the platform itself can be buggy. Some versions
1976 1976 # of Solaris have been observed to not append at the end of the file
1977 1977 # if the file was seeked to before the end. See issue4943 for more.
1978 1978 #
1979 1979 # We work around this issue by inserting a seek() before writing.
1980 1980 # Note: This is likely not necessary on Python 3.
1981 1981 ifh.seek(0, os.SEEK_END)
1982 1982 if dfh:
1983 1983 dfh.seek(0, os.SEEK_END)
1984 1984
1985 1985 curr = len(self) - 1
1986 1986 if not self._inline:
1987 1987 transaction.add(self.datafile, offset)
1988 1988 transaction.add(self.indexfile, curr * len(entry))
1989 1989 if data[0]:
1990 1990 dfh.write(data[0])
1991 1991 dfh.write(data[1])
1992 1992 ifh.write(entry)
1993 1993 else:
1994 1994 offset += curr * self._io.size
1995 1995 transaction.add(self.indexfile, offset, curr)
1996 1996 ifh.write(entry)
1997 1997 ifh.write(data[0])
1998 1998 ifh.write(data[1])
1999 1999 self._enforceinlinesize(transaction, ifh)
2000 2000
2001 2001 def addgroup(self, deltas, linkmapper, transaction, addrevisioncb=None):
2002 2002 """
2003 2003 add a delta group
2004 2004
2005 2005 given a set of deltas, add them to the revision log. the
2006 2006 first delta is against its parent, which should be in our
2007 2007 log, the rest are against the previous delta.
2008 2008
2009 2009 If ``addrevisioncb`` is defined, it will be called with arguments of
2010 2010 this revlog and the node that was added.
2011 2011 """
2012 2012
2013 2013 nodes = []
2014 2014
2015 2015 r = len(self)
2016 2016 end = 0
2017 2017 if r:
2018 2018 end = self.end(r - 1)
2019 2019 ifh = self._indexfp("a+")
2020 2020 isize = r * self._io.size
2021 2021 if self._inline:
2022 2022 transaction.add(self.indexfile, end + isize, r)
2023 2023 dfh = None
2024 2024 else:
2025 2025 transaction.add(self.indexfile, isize, r)
2026 2026 transaction.add(self.datafile, end)
2027 2027 dfh = self._datafp("a+")
2028 2028 def flush():
2029 2029 if dfh:
2030 2030 dfh.flush()
2031 2031 ifh.flush()
2032 2032 try:
2033 2033 deltacomputer = deltautil.deltacomputer(self)
2034 2034 # loop through our set of deltas
2035 2035 for data in deltas:
2036 2036 node, p1, p2, linknode, deltabase, delta, flags = data
2037 2037 link = linkmapper(linknode)
2038 2038 flags = flags or REVIDX_DEFAULT_FLAGS
2039 2039
2040 2040 nodes.append(node)
2041 2041
2042 2042 if node in self.nodemap:
2043 2043 self._nodeduplicatecallback(transaction, node)
2044 2044 # this can happen if two branches make the same change
2045 2045 continue
2046 2046
2047 2047 for p in (p1, p2):
2048 2048 if p not in self.nodemap:
2049 2049 raise error.LookupError(p, self.indexfile,
2050 2050 _('unknown parent'))
2051 2051
2052 2052 if deltabase not in self.nodemap:
2053 2053 raise error.LookupError(deltabase, self.indexfile,
2054 2054 _('unknown delta base'))
2055 2055
2056 2056 baserev = self.rev(deltabase)
2057 2057
2058 2058 if baserev != nullrev and self.iscensored(baserev):
2059 2059 # if base is censored, delta must be full replacement in a
2060 2060 # single patch operation
2061 2061 hlen = struct.calcsize(">lll")
2062 2062 oldlen = self.rawsize(baserev)
2063 2063 newlen = len(delta) - hlen
2064 2064 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
2065 2065 raise error.CensoredBaseError(self.indexfile,
2066 2066 self.node(baserev))
2067 2067
2068 2068 if not flags and self._peek_iscensored(baserev, delta, flush):
2069 2069 flags |= REVIDX_ISCENSORED
2070 2070
2071 2071 # We assume consumers of addrevisioncb will want to retrieve
2072 2072 # the added revision, which will require a call to
2073 2073 # revision(). revision() will fast path if there is a cache
2074 2074 # hit. So, we tell _addrevision() to always cache in this case.
2075 2075 # We're only using addgroup() in the context of changegroup
2076 2076 # generation so the revision data can always be handled as raw
2077 2077 # by the flagprocessor.
2078 2078 self._addrevision(node, None, transaction, link,
2079 2079 p1, p2, flags, (baserev, delta),
2080 2080 ifh, dfh,
2081 2081 alwayscache=bool(addrevisioncb),
2082 2082 deltacomputer=deltacomputer)
2083 2083
2084 2084 if addrevisioncb:
2085 2085 addrevisioncb(self, node)
2086 2086
2087 2087 if not dfh and not self._inline:
2088 2088 # addrevision switched from inline to conventional
2089 2089 # reopen the index
2090 2090 ifh.close()
2091 2091 dfh = self._datafp("a+")
2092 2092 ifh = self._indexfp("a+")
2093 2093 finally:
2094 2094 if dfh:
2095 2095 dfh.close()
2096 2096 ifh.close()
2097 2097
2098 2098 return nodes
2099 2099
2100 2100 def iscensored(self, rev):
2101 2101 """Check if a file revision is censored."""
2102 2102 if not self._censorable:
2103 2103 return False
2104 2104
2105 2105 return self.flags(rev) & REVIDX_ISCENSORED
2106 2106
2107 2107 def _peek_iscensored(self, baserev, delta, flush):
2108 2108 """Quickly check if a delta produces a censored revision."""
2109 2109 if not self._censorable:
2110 2110 return False
2111 2111
2112 2112 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
2113 2113
2114 2114 def getstrippoint(self, minlink):
2115 2115 """find the minimum rev that must be stripped to strip the linkrev
2116 2116
2117 2117 Returns a tuple containing the minimum rev and a set of all revs that
2118 2118 have linkrevs that will be broken by this strip.
2119 2119 """
2120 2120 return storageutil.resolvestripinfo(minlink, len(self) - 1,
2121 2121 self.headrevs(),
2122 2122 self.linkrev, self.parentrevs)
2123 2123
2124 2124 def strip(self, minlink, transaction):
2125 2125 """truncate the revlog on the first revision with a linkrev >= minlink
2126 2126
2127 2127 This function is called when we're stripping revision minlink and
2128 2128 its descendants from the repository.
2129 2129
2130 2130 We have to remove all revisions with linkrev >= minlink, because
2131 2131 the equivalent changelog revisions will be renumbered after the
2132 2132 strip.
2133 2133
2134 2134 So we truncate the revlog on the first of these revisions, and
2135 2135 trust that the caller has saved the revisions that shouldn't be
2136 2136 removed and that it'll re-add them after this truncation.
2137 2137 """
2138 2138 if len(self) == 0:
2139 2139 return
2140 2140
2141 2141 rev, _ = self.getstrippoint(minlink)
2142 2142 if rev == len(self):
2143 2143 return
2144 2144
2145 2145 # first truncate the files on disk
2146 2146 end = self.start(rev)
2147 2147 if not self._inline:
2148 2148 transaction.add(self.datafile, end)
2149 2149 end = rev * self._io.size
2150 2150 else:
2151 2151 end += rev * self._io.size
2152 2152
2153 2153 transaction.add(self.indexfile, end)
2154 2154
2155 2155 # then reset internal state in memory to forget those revisions
2156 2156 self._revisioncache = None
2157 2157 self._chaininfocache = {}
2158 2158 self._chunkclear()
2159 2159 for x in pycompat.xrange(rev, len(self)):
2160 2160 del self.nodemap[self.node(x)]
2161 2161
2162 2162 del self.index[rev:-1]
2163 2163 self._nodepos = None
2164 2164
2165 2165 def checksize(self):
2166 2166 expected = 0
2167 2167 if len(self):
2168 2168 expected = max(0, self.end(len(self) - 1))
2169 2169
2170 2170 try:
2171 2171 with self._datafp() as f:
2172 2172 f.seek(0, 2)
2173 2173 actual = f.tell()
2174 2174 dd = actual - expected
2175 2175 except IOError as inst:
2176 2176 if inst.errno != errno.ENOENT:
2177 2177 raise
2178 2178 dd = 0
2179 2179
2180 2180 try:
2181 2181 f = self.opener(self.indexfile)
2182 2182 f.seek(0, 2)
2183 2183 actual = f.tell()
2184 2184 f.close()
2185 2185 s = self._io.size
2186 2186 i = max(0, actual // s)
2187 2187 di = actual - (i * s)
2188 2188 if self._inline:
2189 2189 databytes = 0
2190 2190 for r in self:
2191 2191 databytes += max(0, self.length(r))
2192 2192 dd = 0
2193 2193 di = actual - len(self) * s - databytes
2194 2194 except IOError as inst:
2195 2195 if inst.errno != errno.ENOENT:
2196 2196 raise
2197 2197 di = 0
2198 2198
2199 2199 return (dd, di)
2200 2200
2201 2201 def files(self):
2202 2202 res = [self.indexfile]
2203 2203 if not self._inline:
2204 2204 res.append(self.datafile)
2205 2205 return res
2206 2206
2207 2207 def emitrevisions(self, nodes, nodesorder=None, revisiondata=False,
2208 2208 assumehaveparentrevisions=False, deltaprevious=False):
2209 2209 if nodesorder not in ('nodes', 'storage', None):
2210 2210 raise error.ProgrammingError('unhandled value for nodesorder: %s' %
2211 2211 nodesorder)
2212 2212
2213 2213 if nodesorder is None and not self._generaldelta:
2214 2214 nodesorder = 'storage'
2215 2215
2216 deltamode = repository.CG_DELTAMODE_STD
2217 if deltaprevious:
2218 deltamode = repository.CG_DELTAMODE_PREV
2219 elif not self._storedeltachains:
2220 deltamode = repository.CG_DELTAMODE_FULL
2221
2216 2222 return storageutil.emitrevisions(
2217 2223 self, nodes, nodesorder, revlogrevisiondelta,
2218 2224 deltaparentfn=self.deltaparent,
2219 2225 candeltafn=self.candelta,
2220 2226 rawsizefn=self.rawsize,
2221 2227 revdifffn=self.revdiff,
2222 2228 flagsfn=self.flags,
2223 sendfulltext=not self._storedeltachains,
2229 deltamode=deltamode,
2224 2230 revisiondata=revisiondata,
2225 assumehaveparentrevisions=assumehaveparentrevisions,
2226 deltaprevious=deltaprevious)
2231 assumehaveparentrevisions=assumehaveparentrevisions)
2227 2232
2228 2233 DELTAREUSEALWAYS = 'always'
2229 2234 DELTAREUSESAMEREVS = 'samerevs'
2230 2235 DELTAREUSENEVER = 'never'
2231 2236
2232 2237 DELTAREUSEFULLADD = 'fulladd'
2233 2238
2234 2239 DELTAREUSEALL = {'always', 'samerevs', 'never', 'fulladd'}
2235 2240
2236 2241 def clone(self, tr, destrevlog, addrevisioncb=None,
2237 2242 deltareuse=DELTAREUSESAMEREVS, deltabothparents=None):
2238 2243 """Copy this revlog to another, possibly with format changes.
2239 2244
2240 2245 The destination revlog will contain the same revisions and nodes.
2241 2246 However, it may not be bit-for-bit identical due to e.g. delta encoding
2242 2247 differences.
2243 2248
2244 2249 The ``deltareuse`` argument control how deltas from the existing revlog
2245 2250 are preserved in the destination revlog. The argument can have the
2246 2251 following values:
2247 2252
2248 2253 DELTAREUSEALWAYS
2249 2254 Deltas will always be reused (if possible), even if the destination
2250 2255 revlog would not select the same revisions for the delta. This is the
2251 2256 fastest mode of operation.
2252 2257 DELTAREUSESAMEREVS
2253 2258 Deltas will be reused if the destination revlog would pick the same
2254 2259 revisions for the delta. This mode strikes a balance between speed
2255 2260 and optimization.
2256 2261 DELTAREUSENEVER
2257 2262 Deltas will never be reused. This is the slowest mode of execution.
2258 2263 This mode can be used to recompute deltas (e.g. if the diff/delta
2259 2264 algorithm changes).
2260 2265
2261 2266 Delta computation can be slow, so the choice of delta reuse policy can
2262 2267 significantly affect run time.
2263 2268
2264 2269 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
2265 2270 two extremes. Deltas will be reused if they are appropriate. But if the
2266 2271 delta could choose a better revision, it will do so. This means if you
2267 2272 are converting a non-generaldelta revlog to a generaldelta revlog,
2268 2273 deltas will be recomputed if the delta's parent isn't a parent of the
2269 2274 revision.
2270 2275
2271 2276 In addition to the delta policy, the ``deltabothparents`` argument
2272 2277 controls whether to compute deltas against both parents for merges.
2273 2278 By default, the current default is used.
2274 2279 """
2275 2280 if deltareuse not in self.DELTAREUSEALL:
2276 2281 raise ValueError(_('value for deltareuse invalid: %s') % deltareuse)
2277 2282
2278 2283 if len(destrevlog):
2279 2284 raise ValueError(_('destination revlog is not empty'))
2280 2285
2281 2286 if getattr(self, 'filteredrevs', None):
2282 2287 raise ValueError(_('source revlog has filtered revisions'))
2283 2288 if getattr(destrevlog, 'filteredrevs', None):
2284 2289 raise ValueError(_('destination revlog has filtered revisions'))
2285 2290
2286 2291 # lazydeltabase controls whether to reuse a cached delta, if possible.
2287 2292 oldlazydeltabase = destrevlog._lazydeltabase
2288 2293 oldamd = destrevlog._deltabothparents
2289 2294
2290 2295 try:
2291 2296 if deltareuse == self.DELTAREUSEALWAYS:
2292 2297 destrevlog._lazydeltabase = True
2293 2298 elif deltareuse == self.DELTAREUSESAMEREVS:
2294 2299 destrevlog._lazydeltabase = False
2295 2300
2296 2301 destrevlog._deltabothparents = deltabothparents or oldamd
2297 2302
2298 2303 populatecachedelta = deltareuse in (self.DELTAREUSEALWAYS,
2299 2304 self.DELTAREUSESAMEREVS)
2300 2305
2301 2306 deltacomputer = deltautil.deltacomputer(destrevlog)
2302 2307 index = self.index
2303 2308 for rev in self:
2304 2309 entry = index[rev]
2305 2310
2306 2311 # Some classes override linkrev to take filtered revs into
2307 2312 # account. Use raw entry from index.
2308 2313 flags = entry[0] & 0xffff
2309 2314 linkrev = entry[4]
2310 2315 p1 = index[entry[5]][7]
2311 2316 p2 = index[entry[6]][7]
2312 2317 node = entry[7]
2313 2318
2314 2319 # (Possibly) reuse the delta from the revlog if allowed and
2315 2320 # the revlog chunk is a delta.
2316 2321 cachedelta = None
2317 2322 rawtext = None
2318 2323 if populatecachedelta:
2319 2324 dp = self.deltaparent(rev)
2320 2325 if dp != nullrev:
2321 2326 cachedelta = (dp, bytes(self._chunk(rev)))
2322 2327
2323 2328 if not cachedelta:
2324 2329 rawtext = self.revision(rev, raw=True)
2325 2330
2326 2331
2327 2332 if deltareuse == self.DELTAREUSEFULLADD:
2328 2333 destrevlog.addrevision(rawtext, tr, linkrev, p1, p2,
2329 2334 cachedelta=cachedelta,
2330 2335 node=node, flags=flags,
2331 2336 deltacomputer=deltacomputer)
2332 2337 else:
2333 2338 ifh = destrevlog.opener(destrevlog.indexfile, 'a+',
2334 2339 checkambig=False)
2335 2340 dfh = None
2336 2341 if not destrevlog._inline:
2337 2342 dfh = destrevlog.opener(destrevlog.datafile, 'a+')
2338 2343 try:
2339 2344 destrevlog._addrevision(node, rawtext, tr, linkrev, p1,
2340 2345 p2, flags, cachedelta, ifh, dfh,
2341 2346 deltacomputer=deltacomputer)
2342 2347 finally:
2343 2348 if dfh:
2344 2349 dfh.close()
2345 2350 ifh.close()
2346 2351
2347 2352 if addrevisioncb:
2348 2353 addrevisioncb(self, rev, node)
2349 2354 finally:
2350 2355 destrevlog._lazydeltabase = oldlazydeltabase
2351 2356 destrevlog._deltabothparents = oldamd
2352 2357
2353 2358 def censorrevision(self, tr, censornode, tombstone=b''):
2354 2359 if (self.version & 0xFFFF) == REVLOGV0:
2355 2360 raise error.RevlogError(_('cannot censor with version %d revlogs') %
2356 2361 self.version)
2357 2362
2358 2363 censorrev = self.rev(censornode)
2359 2364 tombstone = storageutil.packmeta({b'censored': tombstone}, b'')
2360 2365
2361 2366 if len(tombstone) > self.rawsize(censorrev):
2362 2367 raise error.Abort(_('censor tombstone must be no longer than '
2363 2368 'censored data'))
2364 2369
2365 2370 # Rewriting the revlog in place is hard. Our strategy for censoring is
2366 2371 # to create a new revlog, copy all revisions to it, then replace the
2367 2372 # revlogs on transaction close.
2368 2373
2369 2374 newindexfile = self.indexfile + b'.tmpcensored'
2370 2375 newdatafile = self.datafile + b'.tmpcensored'
2371 2376
2372 2377 # This is a bit dangerous. We could easily have a mismatch of state.
2373 2378 newrl = revlog(self.opener, newindexfile, newdatafile,
2374 2379 censorable=True)
2375 2380 newrl.version = self.version
2376 2381 newrl._generaldelta = self._generaldelta
2377 2382 newrl._io = self._io
2378 2383
2379 2384 for rev in self.revs():
2380 2385 node = self.node(rev)
2381 2386 p1, p2 = self.parents(node)
2382 2387
2383 2388 if rev == censorrev:
2384 2389 newrl.addrawrevision(tombstone, tr, self.linkrev(censorrev),
2385 2390 p1, p2, censornode, REVIDX_ISCENSORED)
2386 2391
2387 2392 if newrl.deltaparent(rev) != nullrev:
2388 2393 raise error.Abort(_('censored revision stored as delta; '
2389 2394 'cannot censor'),
2390 2395 hint=_('censoring of revlogs is not '
2391 2396 'fully implemented; please report '
2392 2397 'this bug'))
2393 2398 continue
2394 2399
2395 2400 if self.iscensored(rev):
2396 2401 if self.deltaparent(rev) != nullrev:
2397 2402 raise error.Abort(_('cannot censor due to censored '
2398 2403 'revision having delta stored'))
2399 2404 rawtext = self._chunk(rev)
2400 2405 else:
2401 2406 rawtext = self.revision(rev, raw=True)
2402 2407
2403 2408 newrl.addrawrevision(rawtext, tr, self.linkrev(rev), p1, p2, node,
2404 2409 self.flags(rev))
2405 2410
2406 2411 tr.addbackup(self.indexfile, location='store')
2407 2412 if not self._inline:
2408 2413 tr.addbackup(self.datafile, location='store')
2409 2414
2410 2415 self.opener.rename(newrl.indexfile, self.indexfile)
2411 2416 if not self._inline:
2412 2417 self.opener.rename(newrl.datafile, self.datafile)
2413 2418
2414 2419 self.clearcaches()
2415 2420 self._loadindex(self.version, None)
2416 2421
2417 2422 def verifyintegrity(self, state):
2418 2423 """Verifies the integrity of the revlog.
2419 2424
2420 2425 Yields ``revlogproblem`` instances describing problems that are
2421 2426 found.
2422 2427 """
2423 2428 dd, di = self.checksize()
2424 2429 if dd:
2425 2430 yield revlogproblem(error=_('data length off by %d bytes') % dd)
2426 2431 if di:
2427 2432 yield revlogproblem(error=_('index contains %d extra bytes') % di)
2428 2433
2429 2434 version = self.version & 0xFFFF
2430 2435
2431 2436 # The verifier tells us what version revlog we should be.
2432 2437 if version != state['expectedversion']:
2433 2438 yield revlogproblem(
2434 2439 warning=_("warning: '%s' uses revlog format %d; expected %d") %
2435 2440 (self.indexfile, version, state['expectedversion']))
2436 2441
2437 2442 state['skipread'] = set()
2438 2443
2439 2444 for rev in self:
2440 2445 node = self.node(rev)
2441 2446
2442 2447 # Verify contents. 4 cases to care about:
2443 2448 #
2444 2449 # common: the most common case
2445 2450 # rename: with a rename
2446 2451 # meta: file content starts with b'\1\n', the metadata
2447 2452 # header defined in filelog.py, but without a rename
2448 2453 # ext: content stored externally
2449 2454 #
2450 2455 # More formally, their differences are shown below:
2451 2456 #
2452 2457 # | common | rename | meta | ext
2453 2458 # -------------------------------------------------------
2454 2459 # flags() | 0 | 0 | 0 | not 0
2455 2460 # renamed() | False | True | False | ?
2456 2461 # rawtext[0:2]=='\1\n'| False | True | True | ?
2457 2462 #
2458 2463 # "rawtext" means the raw text stored in revlog data, which
2459 2464 # could be retrieved by "revision(rev, raw=True)". "text"
2460 2465 # mentioned below is "revision(rev, raw=False)".
2461 2466 #
2462 2467 # There are 3 different lengths stored physically:
2463 2468 # 1. L1: rawsize, stored in revlog index
2464 2469 # 2. L2: len(rawtext), stored in revlog data
2465 2470 # 3. L3: len(text), stored in revlog data if flags==0, or
2466 2471 # possibly somewhere else if flags!=0
2467 2472 #
2468 2473 # L1 should be equal to L2. L3 could be different from them.
2469 2474 # "text" may or may not affect commit hash depending on flag
2470 2475 # processors (see revlog.addflagprocessor).
2471 2476 #
2472 2477 # | common | rename | meta | ext
2473 2478 # -------------------------------------------------
2474 2479 # rawsize() | L1 | L1 | L1 | L1
2475 2480 # size() | L1 | L2-LM | L1(*) | L1 (?)
2476 2481 # len(rawtext) | L2 | L2 | L2 | L2
2477 2482 # len(text) | L2 | L2 | L2 | L3
2478 2483 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
2479 2484 #
2480 2485 # LM: length of metadata, depending on rawtext
2481 2486 # (*): not ideal, see comment in filelog.size
2482 2487 # (?): could be "- len(meta)" if the resolved content has
2483 2488 # rename metadata
2484 2489 #
2485 2490 # Checks needed to be done:
2486 2491 # 1. length check: L1 == L2, in all cases.
2487 2492 # 2. hash check: depending on flag processor, we may need to
2488 2493 # use either "text" (external), or "rawtext" (in revlog).
2489 2494
2490 2495 try:
2491 2496 skipflags = state.get('skipflags', 0)
2492 2497 if skipflags:
2493 2498 skipflags &= self.flags(rev)
2494 2499
2495 2500 if skipflags:
2496 2501 state['skipread'].add(node)
2497 2502 else:
2498 2503 # Side-effect: read content and verify hash.
2499 2504 self.revision(node)
2500 2505
2501 2506 l1 = self.rawsize(rev)
2502 2507 l2 = len(self.revision(node, raw=True))
2503 2508
2504 2509 if l1 != l2:
2505 2510 yield revlogproblem(
2506 2511 error=_('unpacked size is %d, %d expected') % (l2, l1),
2507 2512 node=node)
2508 2513
2509 2514 except error.CensoredNodeError:
2510 2515 if state['erroroncensored']:
2511 2516 yield revlogproblem(error=_('censored file data'),
2512 2517 node=node)
2513 2518 state['skipread'].add(node)
2514 2519 except Exception as e:
2515 2520 yield revlogproblem(
2516 2521 error=_('unpacking %s: %s') % (short(node),
2517 2522 stringutil.forcebytestr(e)),
2518 2523 node=node)
2519 2524 state['skipread'].add(node)
2520 2525
2521 2526 def storageinfo(self, exclusivefiles=False, sharedfiles=False,
2522 2527 revisionscount=False, trackedsize=False,
2523 2528 storedsize=False):
2524 2529 d = {}
2525 2530
2526 2531 if exclusivefiles:
2527 2532 d['exclusivefiles'] = [(self.opener, self.indexfile)]
2528 2533 if not self._inline:
2529 2534 d['exclusivefiles'].append((self.opener, self.datafile))
2530 2535
2531 2536 if sharedfiles:
2532 2537 d['sharedfiles'] = []
2533 2538
2534 2539 if revisionscount:
2535 2540 d['revisionscount'] = len(self)
2536 2541
2537 2542 if trackedsize:
2538 2543 d['trackedsize'] = sum(map(self.rawsize, iter(self)))
2539 2544
2540 2545 if storedsize:
2541 2546 d['storedsize'] = sum(self.opener.stat(path).st_size
2542 2547 for path in self.files())
2543 2548
2544 2549 return d
@@ -1,480 +1,484
1 1 # storageutil.py - Storage functionality agnostic of backend implementation.
2 2 #
3 3 # Copyright 2018 Gregory Szorc <gregory.szorc@gmail.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import hashlib
11 11 import re
12 12 import struct
13 13
14 14 from ..i18n import _
15 15 from ..node import (
16 16 bin,
17 17 nullid,
18 18 nullrev,
19 19 )
20 20 from .. import (
21 21 dagop,
22 22 error,
23 23 mdiff,
24 24 pycompat,
25 repository,
25 26 )
26 27
27 28 _nullhash = hashlib.sha1(nullid)
28 29
29 30 def hashrevisionsha1(text, p1, p2):
30 31 """Compute the SHA-1 for revision data and its parents.
31 32
32 33 This hash combines both the current file contents and its history
33 34 in a manner that makes it easy to distinguish nodes with the same
34 35 content in the revision graph.
35 36 """
36 37 # As of now, if one of the parent node is null, p2 is null
37 38 if p2 == nullid:
38 39 # deep copy of a hash is faster than creating one
39 40 s = _nullhash.copy()
40 41 s.update(p1)
41 42 else:
42 43 # none of the parent nodes are nullid
43 44 if p1 < p2:
44 45 a = p1
45 46 b = p2
46 47 else:
47 48 a = p2
48 49 b = p1
49 50 s = hashlib.sha1(a)
50 51 s.update(b)
51 52 s.update(text)
52 53 return s.digest()
53 54
54 55 METADATA_RE = re.compile(b'\x01\n')
55 56
56 57 def parsemeta(text):
57 58 """Parse metadata header from revision data.
58 59
59 60 Returns a 2-tuple of (metadata, offset), where both can be None if there
60 61 is no metadata.
61 62 """
62 63 # text can be buffer, so we can't use .startswith or .index
63 64 if text[:2] != b'\x01\n':
64 65 return None, None
65 66 s = METADATA_RE.search(text, 2).start()
66 67 mtext = text[2:s]
67 68 meta = {}
68 69 for l in mtext.splitlines():
69 70 k, v = l.split(b': ', 1)
70 71 meta[k] = v
71 72 return meta, s + 2
72 73
73 74 def packmeta(meta, text):
74 75 """Add metadata to fulltext to produce revision text."""
75 76 keys = sorted(meta)
76 77 metatext = b''.join(b'%s: %s\n' % (k, meta[k]) for k in keys)
77 78 return b'\x01\n%s\x01\n%s' % (metatext, text)
78 79
79 80 def iscensoredtext(text):
80 81 meta = parsemeta(text)[0]
81 82 return meta and b'censored' in meta
82 83
83 84 def filtermetadata(text):
84 85 """Extract just the revision data from source text.
85 86
86 87 Returns ``text`` unless it has a metadata header, in which case we return
87 88 a new buffer without hte metadata.
88 89 """
89 90 if not text.startswith(b'\x01\n'):
90 91 return text
91 92
92 93 offset = text.index(b'\x01\n', 2)
93 94 return text[offset + 2:]
94 95
95 96 def filerevisioncopied(store, node):
96 97 """Resolve file revision copy metadata.
97 98
98 99 Returns ``False`` if the file has no copy metadata. Otherwise a
99 100 2-tuple of the source filename and node.
100 101 """
101 102 if store.parents(node)[0] != nullid:
102 103 return False
103 104
104 105 meta = parsemeta(store.revision(node))[0]
105 106
106 107 # copy and copyrev occur in pairs. In rare cases due to old bugs,
107 108 # one can occur without the other. So ensure both are present to flag
108 109 # as a copy.
109 110 if meta and b'copy' in meta and b'copyrev' in meta:
110 111 return meta[b'copy'], bin(meta[b'copyrev'])
111 112
112 113 return False
113 114
114 115 def filedataequivalent(store, node, filedata):
115 116 """Determines whether file data is equivalent to a stored node.
116 117
117 118 Returns True if the passed file data would hash to the same value
118 119 as a stored revision and False otherwise.
119 120
120 121 When a stored revision is censored, filedata must be empty to have
121 122 equivalence.
122 123
123 124 When a stored revision has copy metadata, it is ignored as part
124 125 of the compare.
125 126 """
126 127
127 128 if filedata.startswith(b'\x01\n'):
128 129 revisiontext = b'\x01\n\x01\n' + filedata
129 130 else:
130 131 revisiontext = filedata
131 132
132 133 p1, p2 = store.parents(node)
133 134
134 135 computednode = hashrevisionsha1(revisiontext, p1, p2)
135 136
136 137 if computednode == node:
137 138 return True
138 139
139 140 # Censored files compare against the empty file.
140 141 if store.iscensored(store.rev(node)):
141 142 return filedata == b''
142 143
143 144 # Renaming a file produces a different hash, even if the data
144 145 # remains unchanged. Check if that's the case.
145 146 if store.renamed(node):
146 147 return store.read(node) == filedata
147 148
148 149 return False
149 150
150 151 def iterrevs(storelen, start=0, stop=None):
151 152 """Iterate over revision numbers in a store."""
152 153 step = 1
153 154
154 155 if stop is not None:
155 156 if start > stop:
156 157 step = -1
157 158 stop += step
158 159 if stop > storelen:
159 160 stop = storelen
160 161 else:
161 162 stop = storelen
162 163
163 164 return pycompat.xrange(start, stop, step)
164 165
165 166 def fileidlookup(store, fileid, identifier):
166 167 """Resolve the file node for a value.
167 168
168 169 ``store`` is an object implementing the ``ifileindex`` interface.
169 170
170 171 ``fileid`` can be:
171 172
172 173 * A 20 byte binary node.
173 174 * An integer revision number
174 175 * A 40 byte hex node.
175 176 * A bytes that can be parsed as an integer representing a revision number.
176 177
177 178 ``identifier`` is used to populate ``error.LookupError`` with an identifier
178 179 for the store.
179 180
180 181 Raises ``error.LookupError`` on failure.
181 182 """
182 183 if isinstance(fileid, int):
183 184 try:
184 185 return store.node(fileid)
185 186 except IndexError:
186 187 raise error.LookupError('%d' % fileid, identifier,
187 188 _('no match found'))
188 189
189 190 if len(fileid) == 20:
190 191 try:
191 192 store.rev(fileid)
192 193 return fileid
193 194 except error.LookupError:
194 195 pass
195 196
196 197 if len(fileid) == 40:
197 198 try:
198 199 rawnode = bin(fileid)
199 200 store.rev(rawnode)
200 201 return rawnode
201 202 except TypeError:
202 203 pass
203 204
204 205 try:
205 206 rev = int(fileid)
206 207
207 208 if b'%d' % rev != fileid:
208 209 raise ValueError
209 210
210 211 try:
211 212 return store.node(rev)
212 213 except (IndexError, TypeError):
213 214 pass
214 215 except (ValueError, OverflowError):
215 216 pass
216 217
217 218 raise error.LookupError(fileid, identifier, _('no match found'))
218 219
219 220 def resolvestripinfo(minlinkrev, tiprev, headrevs, linkrevfn, parentrevsfn):
220 221 """Resolve information needed to strip revisions.
221 222
222 223 Finds the minimum revision number that must be stripped in order to
223 224 strip ``minlinkrev``.
224 225
225 226 Returns a 2-tuple of the minimum revision number to do that and a set
226 227 of all revision numbers that have linkrevs that would be broken
227 228 by that strip.
228 229
229 230 ``tiprev`` is the current tip-most revision. It is ``len(store) - 1``.
230 231 ``headrevs`` is an iterable of head revisions.
231 232 ``linkrevfn`` is a callable that receives a revision and returns a linked
232 233 revision.
233 234 ``parentrevsfn`` is a callable that receives a revision number and returns
234 235 an iterable of its parent revision numbers.
235 236 """
236 237 brokenrevs = set()
237 238 strippoint = tiprev + 1
238 239
239 240 heads = {}
240 241 futurelargelinkrevs = set()
241 242 for head in headrevs:
242 243 headlinkrev = linkrevfn(head)
243 244 heads[head] = headlinkrev
244 245 if headlinkrev >= minlinkrev:
245 246 futurelargelinkrevs.add(headlinkrev)
246 247
247 248 # This algorithm involves walking down the rev graph, starting at the
248 249 # heads. Since the revs are topologically sorted according to linkrev,
249 250 # once all head linkrevs are below the minlink, we know there are
250 251 # no more revs that could have a linkrev greater than minlink.
251 252 # So we can stop walking.
252 253 while futurelargelinkrevs:
253 254 strippoint -= 1
254 255 linkrev = heads.pop(strippoint)
255 256
256 257 if linkrev < minlinkrev:
257 258 brokenrevs.add(strippoint)
258 259 else:
259 260 futurelargelinkrevs.remove(linkrev)
260 261
261 262 for p in parentrevsfn(strippoint):
262 263 if p != nullrev:
263 264 plinkrev = linkrevfn(p)
264 265 heads[p] = plinkrev
265 266 if plinkrev >= minlinkrev:
266 267 futurelargelinkrevs.add(plinkrev)
267 268
268 269 return strippoint, brokenrevs
269 270
270 271 def emitrevisions(store, nodes, nodesorder, resultcls, deltaparentfn=None,
271 272 candeltafn=None, rawsizefn=None, revdifffn=None, flagsfn=None,
272 sendfulltext=False,
273 revisiondata=False, assumehaveparentrevisions=False,
274 deltaprevious=False):
273 deltamode=repository.CG_DELTAMODE_STD,
274 revisiondata=False, assumehaveparentrevisions=False):
275 275 """Generic implementation of ifiledata.emitrevisions().
276 276
277 277 Emitting revision data is subtly complex. This function attempts to
278 278 encapsulate all the logic for doing so in a backend-agnostic way.
279 279
280 280 ``store``
281 281 Object conforming to ``ifilestorage`` interface.
282 282
283 283 ``nodes``
284 284 List of revision nodes whose data to emit.
285 285
286 286 ``resultcls``
287 287 A type implementing the ``irevisiondelta`` interface that will be
288 288 constructed and returned.
289 289
290 290 ``deltaparentfn`` (optional)
291 291 Callable receiving a revision number and returning the revision number
292 292 of a revision that the internal delta is stored against. This delta
293 293 will be preferred over computing a new arbitrary delta.
294 294
295 295 If not defined, a delta will always be computed from raw revision
296 296 data.
297 297
298 298 ``candeltafn`` (optional)
299 299 Callable receiving a pair of revision numbers that returns a bool
300 300 indicating whether a delta between them can be produced.
301 301
302 302 If not defined, it is assumed that any two revisions can delta with
303 303 each other.
304 304
305 305 ``rawsizefn`` (optional)
306 306 Callable receiving a revision number and returning the length of the
307 307 ``store.revision(rev, raw=True)``.
308 308
309 309 If not defined, ``len(store.revision(rev, raw=True))`` will be called.
310 310
311 311 ``revdifffn`` (optional)
312 312 Callable receiving a pair of revision numbers that returns a delta
313 313 between them.
314 314
315 315 If not defined, a delta will be computed by invoking mdiff code
316 316 on ``store.revision()`` results.
317 317
318 318 Defining this function allows a precomputed or stored delta to be
319 319 used without having to compute on.
320 320
321 321 ``flagsfn`` (optional)
322 322 Callable receiving a revision number and returns the integer flags
323 323 value for it. If not defined, flags value will be 0.
324 324
325 ``sendfulltext``
325 ``deltamode``
326 constaint on delta to be sent:
327 * CG_DELTAMODE_STD - normal mode, try to reuse storage deltas,
328 * CG_DELTAMODE_PREV - only delta against "prev",
329 * CG_DELTAMODE_FULL - only issue full snapshot.
330
326 331 Whether to send fulltext revisions instead of deltas, if allowed.
327 332
328 333 ``nodesorder``
329 334 ``revisiondata``
330 335 ``assumehaveparentrevisions``
331 ``deltaprevious``
332 See ``ifiledata.emitrevisions()`` interface documentation.
333 336 """
334 337
335 338 fnode = store.node
336 339 frev = store.rev
337 340
338 341 if nodesorder == 'nodes':
339 342 revs = [frev(n) for n in nodes]
340 343 elif nodesorder == 'storage':
341 344 revs = sorted(frev(n) for n in nodes)
342 345 else:
343 346 revs = set(frev(n) for n in nodes)
344 347 revs = dagop.linearize(revs, store.parentrevs)
345 348
346 349 prevrev = None
347 350
348 if deltaprevious or assumehaveparentrevisions:
351 if deltamode == repository.CG_DELTAMODE_PREV or assumehaveparentrevisions:
349 352 prevrev = store.parentrevs(revs[0])[0]
350 353
351 354 # Set of revs available to delta against.
352 355 available = set()
353 356
354 357 for rev in revs:
355 358 if rev == nullrev:
356 359 continue
357 360
358 361 node = fnode(rev)
359 362 p1rev, p2rev = store.parentrevs(rev)
360 363
361 364 if deltaparentfn:
362 365 deltaparentrev = deltaparentfn(rev)
363 366 else:
364 367 deltaparentrev = nullrev
365 368
366 369 # Forced delta against previous mode.
367 if deltaprevious:
370 if deltamode == repository.CG_DELTAMODE_PREV:
368 371 baserev = prevrev
369 372
370 373 # We're instructed to send fulltext. Honor that.
371 elif sendfulltext:
374 elif deltamode == repository.CG_DELTAMODE_FULL:
372 375 baserev = nullrev
373 376
374 377 # There is a delta in storage. We try to use that because it
375 378 # amounts to effectively copying data from storage and is
376 379 # therefore the fastest.
377 380 elif deltaparentrev != nullrev:
378 381 # Base revision was already emitted in this group. We can
379 382 # always safely use the delta.
380 383 if deltaparentrev in available:
381 384 baserev = deltaparentrev
382 385
383 386 # Base revision is a parent that hasn't been emitted already.
384 387 # Use it if we can assume the receiver has the parent revision.
385 388 elif (assumehaveparentrevisions
386 389 and deltaparentrev in (p1rev, p2rev)):
387 390 baserev = deltaparentrev
388 391
389 392 # No guarantee the receiver has the delta parent. Send delta
390 393 # against last revision (if possible), which in the common case
391 394 # should be similar enough to this revision that the delta is
392 395 # reasonable.
393 396 elif prevrev is not None:
394 397 baserev = prevrev
395 398 else:
396 399 baserev = nullrev
397 400
398 401 # Storage has a fulltext revision.
399 402
400 403 # Let's use the previous revision, which is as good a guess as any.
401 404 # There is definitely room to improve this logic.
402 405 elif prevrev is not None:
403 406 baserev = prevrev
404 407 else:
405 408 baserev = nullrev
406 409
407 410 # But we can't actually use our chosen delta base for whatever
408 411 # reason. Reset to fulltext.
409 412 if baserev != nullrev and (candeltafn and not candeltafn(baserev, rev)):
410 413 baserev = nullrev
411 414
412 415 revision = None
413 416 delta = None
414 417 baserevisionsize = None
415 418
416 419 if revisiondata:
417 420 if store.iscensored(baserev) or store.iscensored(rev):
418 421 try:
419 422 revision = store.revision(node, raw=True)
420 423 except error.CensoredNodeError as e:
421 424 revision = e.tombstone
422 425
423 426 if baserev != nullrev:
424 427 if rawsizefn:
425 428 baserevisionsize = rawsizefn(baserev)
426 429 else:
427 430 baserevisionsize = len(store.revision(baserev,
428 431 raw=True))
429 432
430 elif baserev == nullrev and not deltaprevious:
433 elif (baserev == nullrev
434 and deltamode != repository.CG_DELTAMODE_PREV):
431 435 revision = store.revision(node, raw=True)
432 436 available.add(rev)
433 437 else:
434 438 if revdifffn:
435 439 delta = revdifffn(baserev, rev)
436 440 else:
437 441 delta = mdiff.textdiff(store.revision(baserev, raw=True),
438 442 store.revision(rev, raw=True))
439 443
440 444 available.add(rev)
441 445
442 446 yield resultcls(
443 447 node=node,
444 448 p1node=fnode(p1rev),
445 449 p2node=fnode(p2rev),
446 450 basenode=fnode(baserev),
447 451 flags=flagsfn(rev) if flagsfn else 0,
448 452 baserevisionsize=baserevisionsize,
449 453 revision=revision,
450 454 delta=delta)
451 455
452 456 prevrev = rev
453 457
454 458 def deltaiscensored(delta, baserev, baselenfn):
455 459 """Determine if a delta represents censored revision data.
456 460
457 461 ``baserev`` is the base revision this delta is encoded against.
458 462 ``baselenfn`` is a callable receiving a revision number that resolves the
459 463 length of the revision fulltext.
460 464
461 465 Returns a bool indicating if the result of the delta represents a censored
462 466 revision.
463 467 """
464 468 # Fragile heuristic: unless new file meta keys are added alphabetically
465 469 # preceding "censored", all censored revisions are prefixed by
466 470 # "\1\ncensored:". A delta producing such a censored revision must be a
467 471 # full-replacement delta, so we inspect the first and only patch in the
468 472 # delta for this prefix.
469 473 hlen = struct.calcsize(">lll")
470 474 if len(delta) <= hlen:
471 475 return False
472 476
473 477 oldlen = baselenfn(baserev)
474 478 newlen = len(delta) - hlen
475 479 if delta[:hlen] != mdiff.replacediffheader(oldlen, newlen):
476 480 return False
477 481
478 482 add = "\1\ncensored:"
479 483 addlen = len(add)
480 484 return newlen >= addlen and delta[hlen:hlen + addlen] == add
General Comments 0
You need to be logged in to leave comments. Login now