##// END OF EJS Templates
branchmap-v3: introduce a "stop_rev" argument to `headsrevs`...
marmoute -
r52870:42a116f1 default
parent child Browse files
Show More
@@ -1,1088 +1,1087
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 from .node import (
11 11 bin,
12 12 hex,
13 13 nullrev,
14 14 )
15 15
16 16 from typing import (
17 17 Any,
18 18 Callable,
19 19 Dict,
20 20 Iterable,
21 21 List,
22 22 Optional,
23 23 Set,
24 24 TYPE_CHECKING,
25 25 Tuple,
26 26 Union,
27 27 cast,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50
51 51 class BranchMapCache:
52 52 """mapping of filtered views of repo with their branchcache"""
53 53
54 54 def __init__(self):
55 55 self._per_filter = {}
56 56
57 57 def __getitem__(self, repo):
58 58 self.updatecache(repo)
59 59 bcache = self._per_filter[repo.filtername]
60 60 bcache._ensure_populated(repo)
61 61 assert bcache._filtername == repo.filtername, (
62 62 bcache._filtername,
63 63 repo.filtername,
64 64 )
65 65 return bcache
66 66
67 67 def update_disk(self, repo, detect_pure_topo=False):
68 68 """ensure and up-to-date cache is (or will be) written on disk
69 69
70 70 The cache for this repository view is updated if needed and written on
71 71 disk.
72 72
73 73 If a transaction is in progress, the writing is schedule to transaction
74 74 close. See the `BranchMapCache.write_dirty` method.
75 75
76 76 This method exist independently of __getitem__ as it is sometime useful
77 77 to signal that we have no intend to use the data in memory yet.
78 78 """
79 79 self.updatecache(repo)
80 80 bcache = self._per_filter[repo.filtername]
81 81 assert bcache._filtername == repo.filtername, (
82 82 bcache._filtername,
83 83 repo.filtername,
84 84 )
85 85 if detect_pure_topo:
86 86 bcache._detect_pure_topo(repo)
87 87 tr = repo.currenttransaction()
88 88 if getattr(tr, 'finalized', True):
89 89 bcache.sync_disk(repo)
90 90
91 91 def updatecache(self, repo):
92 92 """Update the cache for the given filtered view on a repository"""
93 93 # This can trigger updates for the caches for subsets of the filtered
94 94 # view, e.g. when there is no cache for this filtered view or the cache
95 95 # is stale.
96 96
97 97 cl = repo.changelog
98 98 filtername = repo.filtername
99 99 bcache = self._per_filter.get(filtername)
100 100 if bcache is None or not bcache.validfor(repo):
101 101 # cache object missing or cache object stale? Read from disk
102 102 bcache = branch_cache_from_file(repo)
103 103
104 104 revs = []
105 105 if bcache is None:
106 106 # no (fresh) cache available anymore, perhaps we can re-use
107 107 # the cache for a subset, then extend that to add info on missing
108 108 # revisions.
109 109 subsetname = subsettable.get(filtername)
110 110 if subsetname is not None:
111 111 subset = repo.filtered(subsetname)
112 112 self.updatecache(subset)
113 113 bcache = self._per_filter[subset.filtername].inherit_for(repo)
114 114 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
115 115 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
116 116 else:
117 117 # nothing to fall back on, start empty.
118 118 bcache = new_branch_cache(repo)
119 119
120 120 revs.extend(cl.revs(start=bcache.tiprev + 1))
121 121 if revs:
122 122 bcache.update(repo, revs)
123 123
124 124 assert bcache.validfor(repo), filtername
125 125 self._per_filter[repo.filtername] = bcache
126 126
127 127 def replace(self, repo, remotebranchmap):
128 128 """Replace the branchmap cache for a repo with a branch mapping.
129 129
130 130 This is likely only called during clone with a branch map from a
131 131 remote.
132 132
133 133 """
134 134 cl = repo.changelog
135 135 clrev = cl.rev
136 136 clbranchinfo = cl.branchinfo
137 137 rbheads = []
138 138 closed = set()
139 139 for bheads in remotebranchmap.values():
140 140 rbheads += bheads
141 141 for h in bheads:
142 142 r = clrev(h)
143 143 b, c = clbranchinfo(r)
144 144 if c:
145 145 closed.add(h)
146 146
147 147 if rbheads:
148 148 rtiprev = max((int(clrev(node)) for node in rbheads))
149 149 cache = new_branch_cache(
150 150 repo,
151 151 remotebranchmap,
152 152 repo[rtiprev].node(),
153 153 rtiprev,
154 154 closednodes=closed,
155 155 )
156 156
157 157 # Try to stick it as low as possible
158 158 # filter above served are unlikely to be fetch from a clone
159 159 for candidate in (b'base', b'immutable', b'served'):
160 160 rview = repo.filtered(candidate)
161 161 if cache.validfor(rview):
162 162 cache._filtername = candidate
163 163 self._per_filter[candidate] = cache
164 164 cache._state = STATE_DIRTY
165 165 cache.write(rview)
166 166 return
167 167
168 168 def clear(self):
169 169 self._per_filter.clear()
170 170
171 171 def write_dirty(self, repo):
172 172 unfi = repo.unfiltered()
173 173 for filtername in repoviewutil.get_ordered_subset():
174 174 cache = self._per_filter.get(filtername)
175 175 if cache is None:
176 176 continue
177 177 if filtername is None:
178 178 repo = unfi
179 179 else:
180 180 repo = unfi.filtered(filtername)
181 181 cache.sync_disk(repo)
182 182
183 183
184 184 def _unknownnode(node):
185 185 """raises ValueError when branchcache found a node which does not exists"""
186 186 raise ValueError('node %s does not exist' % node.hex())
187 187
188 188
189 189 def _branchcachedesc(repo):
190 190 if repo.filtername is not None:
191 191 return b'branch cache (%s)' % repo.filtername
192 192 else:
193 193 return b'branch cache'
194 194
195 195
196 196 class _BaseBranchCache:
197 197 """A dict like object that hold branches heads cache.
198 198
199 199 This cache is used to avoid costly computations to determine all the
200 200 branch heads of a repo.
201 201 """
202 202
203 203 def __init__(
204 204 self,
205 205 repo: "localrepo.localrepository",
206 206 entries: Union[
207 207 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
208 208 ] = (),
209 209 closed_nodes: Optional[Set[bytes]] = None,
210 210 ) -> None:
211 211 """hasnode is a function which can be used to verify whether changelog
212 212 has a given node or not. If it's not provided, we assume that every node
213 213 we have exists in changelog"""
214 214 # closednodes is a set of nodes that close their branch. If the branch
215 215 # cache has been updated, it may contain nodes that are no longer
216 216 # heads.
217 217 if closed_nodes is None:
218 218 closed_nodes = set()
219 219 self._closednodes = set(closed_nodes)
220 220 self._entries = dict(entries)
221 221
222 222 def __iter__(self):
223 223 return iter(self._entries)
224 224
225 225 def __setitem__(self, key, value):
226 226 self._entries[key] = value
227 227
228 228 def __getitem__(self, key):
229 229 return self._entries[key]
230 230
231 231 def __contains__(self, key):
232 232 return key in self._entries
233 233
234 234 def iteritems(self):
235 235 return self._entries.items()
236 236
237 237 items = iteritems
238 238
239 239 def hasbranch(self, label):
240 240 """checks whether a branch of this name exists or not"""
241 241 return label in self._entries
242 242
243 243 def _branchtip(self, heads):
244 244 """Return tuple with last open head in heads and false,
245 245 otherwise return last closed head and true."""
246 246 tip = heads[-1]
247 247 closed = True
248 248 for h in reversed(heads):
249 249 if h not in self._closednodes:
250 250 tip = h
251 251 closed = False
252 252 break
253 253 return tip, closed
254 254
255 255 def branchtip(self, branch):
256 256 """Return the tipmost open head on branch head, otherwise return the
257 257 tipmost closed head on branch.
258 258 Raise KeyError for unknown branch."""
259 259 return self._branchtip(self[branch])[0]
260 260
261 261 def iteropen(self, nodes):
262 262 return (n for n in nodes if n not in self._closednodes)
263 263
264 264 def branchheads(self, branch, closed=False):
265 265 heads = self._entries[branch]
266 266 if not closed:
267 267 heads = list(self.iteropen(heads))
268 268 return heads
269 269
270 270 def iterbranches(self):
271 271 for bn, heads in self.items():
272 272 yield (bn, heads) + self._branchtip(heads)
273 273
274 274 def iterheads(self):
275 275 """returns all the heads"""
276 276 return self._entries.values()
277 277
278 278 def update(self, repo, revgen):
279 279 """Given a branchhead cache, self, that may have extra nodes or be
280 280 missing heads, and a generator of nodes that are strictly a superset of
281 281 heads missing, this function updates self to be correct.
282 282 """
283 283 starttime = util.timer()
284 284 cl = repo.changelog
285 285 # Faster than using ctx.obsolete()
286 286 obsrevs = obsolete.getrevs(repo, b'obsolete')
287 287 # collect new branch entries
288 288 newbranches = {}
289 289 new_closed = set()
290 290 obs_ignored = set()
291 291 getbranchinfo = repo.revbranchcache().branchinfo
292 292 max_rev = -1
293 293 for r in revgen:
294 294 max_rev = max(max_rev, r)
295 295 if r in obsrevs:
296 296 # We ignore obsolete changesets as they shouldn't be
297 297 # considered heads.
298 298 obs_ignored.add(r)
299 299 continue
300 300 branch, closesbranch = getbranchinfo(r)
301 301 newbranches.setdefault(branch, []).append(r)
302 302 if closesbranch:
303 303 new_closed.add(r)
304 304 if max_rev < 0:
305 305 msg = "running branchcache.update without revision to update"
306 306 raise error.ProgrammingError(msg)
307 307
308 308 self._process_new(
309 309 repo,
310 310 newbranches,
311 311 new_closed,
312 312 obs_ignored,
313 313 max_rev,
314 314 )
315 315
316 316 self._closednodes.update(cl.node(rev) for rev in new_closed)
317 317
318 318 duration = util.timer() - starttime
319 319 repo.ui.log(
320 320 b'branchcache',
321 321 b'updated %s in %.4f seconds\n',
322 322 _branchcachedesc(repo),
323 323 duration,
324 324 )
325 325 return max_rev
326 326
327 327 def _process_new(
328 328 self,
329 329 repo,
330 330 newbranches,
331 331 new_closed,
332 332 obs_ignored,
333 333 max_rev,
334 334 ):
335 335 """update the branchmap from a set of new information"""
336 336 # Delay fetching the topological heads until they are needed.
337 337 # A repository without non-continous branches can skip this part.
338 338 topoheads = None
339 339
340 340 cl = repo.changelog
341 341 getbranchinfo = repo.revbranchcache().branchinfo
342 342 # Faster than using ctx.obsolete()
343 343 obsrevs = obsolete.getrevs(repo, b'obsolete')
344 344
345 345 # If a changeset is visible, its parents must be visible too, so
346 346 # use the faster unfiltered parent accessor.
347 347 parentrevs = cl._uncheckedparentrevs
348 348
349 349 for branch, newheadrevs in newbranches.items():
350 350 # For every branch, compute the new branchheads.
351 351 # A branchhead is a revision such that no descendant is on
352 352 # the same branch.
353 353 #
354 354 # The branchheads are computed iteratively in revision order.
355 355 # This ensures topological order, i.e. parents are processed
356 356 # before their children. Ancestors are inclusive here, i.e.
357 357 # any revision is an ancestor of itself.
358 358 #
359 359 # Core observations:
360 360 # - The current revision is always a branchhead for the
361 361 # repository up to that point.
362 362 # - It is the first revision of the branch if and only if
363 363 # there was no branchhead before. In that case, it is the
364 364 # only branchhead as there are no possible ancestors on
365 365 # the same branch.
366 366 # - If a parent is on the same branch, a branchhead can
367 367 # only be an ancestor of that parent, if it is parent
368 368 # itself. Otherwise it would have been removed as ancestor
369 369 # of that parent before.
370 370 # - Therefore, if all parents are on the same branch, they
371 371 # can just be removed from the branchhead set.
372 372 # - If one parent is on the same branch and the other is not
373 373 # and there was exactly one branchhead known, the existing
374 374 # branchhead can only be an ancestor if it is the parent.
375 375 # Otherwise it would have been removed as ancestor of
376 376 # the parent before. The other parent therefore can't have
377 377 # a branchhead as ancestor.
378 378 # - In all other cases, the parents on different branches
379 379 # could have a branchhead as ancestor. Those parents are
380 380 # kept in the "uncertain" set. If all branchheads are also
381 381 # topological heads, they can't have descendants and further
382 382 # checks can be skipped. Otherwise, the ancestors of the
383 383 # "uncertain" set are removed from branchheads.
384 384 # This computation is heavy and avoided if at all possible.
385 385 bheads = self._entries.get(branch, [])
386 386 bheadset = {cl.rev(node) for node in bheads}
387 387 uncertain = set()
388 388 for newrev in sorted(newheadrevs):
389 389 if not bheadset:
390 390 bheadset.add(newrev)
391 391 continue
392 392
393 393 parents = [p for p in parentrevs(newrev) if p != nullrev]
394 394 samebranch = set()
395 395 otherbranch = set()
396 396 obsparents = set()
397 397 for p in parents:
398 398 if p in obsrevs:
399 399 # We ignored this obsolete changeset earlier, but now
400 400 # that it has non-ignored children, we need to make
401 401 # sure their ancestors are not considered heads. To
402 402 # achieve that, we will simply treat this obsolete
403 403 # changeset as a parent from other branch.
404 404 obsparents.add(p)
405 405 elif p in bheadset or getbranchinfo(p)[0] == branch:
406 406 samebranch.add(p)
407 407 else:
408 408 otherbranch.add(p)
409 409 if not (len(bheadset) == len(samebranch) == 1):
410 410 uncertain.update(otherbranch)
411 411 uncertain.update(obsparents)
412 412 bheadset.difference_update(samebranch)
413 413 bheadset.add(newrev)
414 414
415 415 if uncertain:
416 416 if topoheads is None:
417 417 topoheads = set(cl.headrevs())
418 418 if bheadset - topoheads:
419 419 floorrev = min(bheadset)
420 420 if floorrev <= max(uncertain):
421 421 ancestors = set(cl.ancestors(uncertain, floorrev))
422 422 bheadset -= ancestors
423 423 if bheadset:
424 424 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
425 425
426 426
427 427 STATE_CLEAN = 1
428 428 STATE_INHERITED = 2
429 429 STATE_DIRTY = 3
430 430
431 431
432 432 class _LocalBranchCache(_BaseBranchCache):
433 433 """base class of branch-map info for a local repo or repoview"""
434 434
435 435 _base_filename = None
436 436 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
437 437
438 438 def __init__(
439 439 self,
440 440 repo: "localrepo.localrepository",
441 441 entries: Union[
442 442 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
443 443 ] = (),
444 444 tipnode: Optional[bytes] = None,
445 445 tiprev: Optional[int] = nullrev,
446 446 key_hashes: Optional[Tuple[bytes]] = None,
447 447 closednodes: Optional[Set[bytes]] = None,
448 448 hasnode: Optional[Callable[[bytes], bool]] = None,
449 449 verify_node: bool = False,
450 450 inherited: bool = False,
451 451 ) -> None:
452 452 """hasnode is a function which can be used to verify whether changelog
453 453 has a given node or not. If it's not provided, we assume that every node
454 454 we have exists in changelog"""
455 455 self._filtername = repo.filtername
456 456 if tipnode is None:
457 457 self.tipnode = repo.nullid
458 458 else:
459 459 self.tipnode = tipnode
460 460 self.tiprev = tiprev
461 461 if key_hashes is None:
462 462 self.key_hashes = self._default_key_hashes
463 463 else:
464 464 self.key_hashes = key_hashes
465 465 self._state = STATE_CLEAN
466 466 if inherited:
467 467 self._state = STATE_INHERITED
468 468
469 469 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
470 470 # closednodes is a set of nodes that close their branch. If the branch
471 471 # cache has been updated, it may contain nodes that are no longer
472 472 # heads.
473 473
474 474 # Do we need to verify branch at all ?
475 475 self._verify_node = verify_node
476 476 # branches for which nodes are verified
477 477 self._verifiedbranches = set()
478 478 self._hasnode = None
479 479 if self._verify_node:
480 480 self._hasnode = repo.changelog.hasnode
481 481
482 482 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
483 483 raise NotImplementedError
484 484
485 485 def _ensure_populated(self, repo):
486 486 """make sure any lazily loaded values are fully populated"""
487 487
488 488 def _detect_pure_topo(self, repo) -> None:
489 489 pass
490 490
491 491 def validfor(self, repo):
492 492 """check that cache contents are valid for (a subset of) this repo
493 493
494 494 - False when the order of changesets changed or if we detect a strip.
495 495 - True when cache is up-to-date for the current repo or its subset."""
496 496 try:
497 497 node = repo.changelog.node(self.tiprev)
498 498 except IndexError:
499 499 # changesets were stripped and now we don't even have enough to
500 500 # find tiprev
501 501 return False
502 502 if self.tipnode != node:
503 503 # tiprev doesn't correspond to tipnode: repo was stripped, or this
504 504 # repo has a different order of changesets
505 505 return False
506 506 repo_key_hashes = self._compute_key_hashes(repo)
507 507 # hashes don't match if this repo view has a different set of filtered
508 508 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
509 509 # history was rewritten)
510 510 return self.key_hashes == repo_key_hashes
511 511
512 512 @classmethod
513 513 def fromfile(cls, repo):
514 514 f = None
515 515 try:
516 516 f = repo.cachevfs(cls._filename(repo))
517 517 lineiter = iter(f)
518 518 init_kwargs = cls._load_header(repo, lineiter)
519 519 bcache = cls(
520 520 repo,
521 521 verify_node=True,
522 522 **init_kwargs,
523 523 )
524 524 if not bcache.validfor(repo):
525 525 # invalidate the cache
526 526 raise ValueError('tip differs')
527 527 bcache._load_heads(repo, lineiter)
528 528 except (IOError, OSError):
529 529 return None
530 530
531 531 except Exception as inst:
532 532 if repo.ui.debugflag:
533 533 msg = b'invalid %s: %s\n'
534 534 msg %= (
535 535 _branchcachedesc(repo),
536 536 stringutil.forcebytestr(inst),
537 537 )
538 538 repo.ui.debug(msg)
539 539 bcache = None
540 540
541 541 finally:
542 542 if f:
543 543 f.close()
544 544
545 545 return bcache
546 546
547 547 @classmethod
548 548 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
549 549 raise NotImplementedError
550 550
551 551 def _load_heads(self, repo, lineiter):
552 552 """fully loads the branchcache by reading from the file using the line
553 553 iterator passed"""
554 554 for line in lineiter:
555 555 line = line.rstrip(b'\n')
556 556 if not line:
557 557 continue
558 558 node, state, label = line.split(b" ", 2)
559 559 if state not in b'oc':
560 560 raise ValueError('invalid branch state')
561 561 label = encoding.tolocal(label.strip())
562 562 node = bin(node)
563 563 self._entries.setdefault(label, []).append(node)
564 564 if state == b'c':
565 565 self._closednodes.add(node)
566 566
567 567 @classmethod
568 568 def _filename(cls, repo):
569 569 """name of a branchcache file for a given repo or repoview"""
570 570 filename = cls._base_filename
571 571 assert filename is not None
572 572 if repo.filtername:
573 573 filename = b'%s-%s' % (filename, repo.filtername)
574 574 return filename
575 575
576 576 def inherit_for(self, repo):
577 577 """return a deep copy of the branchcache object"""
578 578 assert repo.filtername != self._filtername
579 579 other = type(self)(
580 580 repo=repo,
581 581 # we always do a shally copy of self._entries, and the values is
582 582 # always replaced, so no need to deepcopy until the above remains
583 583 # true.
584 584 entries=self._entries,
585 585 tipnode=self.tipnode,
586 586 tiprev=self.tiprev,
587 587 key_hashes=self.key_hashes,
588 588 closednodes=set(self._closednodes),
589 589 verify_node=self._verify_node,
590 590 inherited=True,
591 591 )
592 592 # also copy information about the current verification state
593 593 other._verifiedbranches = set(self._verifiedbranches)
594 594 return other
595 595
596 596 def sync_disk(self, repo):
597 597 """synchronise the on disk file with the cache state
598 598
599 599 If new value specific to this filter level need to be written, the file
600 600 will be updated, if the state of the branchcache is inherited from a
601 601 subset, any stalled on disk file will be deleted.
602 602
603 603 That method does nothing if there is nothing to do.
604 604 """
605 605 if self._state == STATE_DIRTY:
606 606 self.write(repo)
607 607 elif self._state == STATE_INHERITED:
608 608 filename = self._filename(repo)
609 609 repo.cachevfs.tryunlink(filename)
610 610
611 611 def write(self, repo):
612 612 assert self._filtername == repo.filtername, (
613 613 self._filtername,
614 614 repo.filtername,
615 615 )
616 616 assert self._state == STATE_DIRTY, self._state
617 617 # This method should not be called during an open transaction
618 618 tr = repo.currenttransaction()
619 619 if not getattr(tr, 'finalized', True):
620 620 msg = "writing branchcache in the middle of a transaction"
621 621 raise error.ProgrammingError(msg)
622 622 try:
623 623 filename = self._filename(repo)
624 624 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
625 625 self._write_header(f)
626 626 nodecount = self._write_heads(repo, f)
627 627 repo.ui.log(
628 628 b'branchcache',
629 629 b'wrote %s with %d labels and %d nodes\n',
630 630 _branchcachedesc(repo),
631 631 len(self._entries),
632 632 nodecount,
633 633 )
634 634 self._state = STATE_CLEAN
635 635 except (IOError, OSError, error.Abort) as inst:
636 636 # Abort may be raised by read only opener, so log and continue
637 637 repo.ui.debug(
638 638 b"couldn't write branch cache: %s\n"
639 639 % stringutil.forcebytestr(inst)
640 640 )
641 641
642 642 def _write_header(self, fp) -> None:
643 643 raise NotImplementedError
644 644
645 645 def _write_heads(self, repo, fp) -> int:
646 646 """write list of heads to a file
647 647
648 648 Return the number of heads written."""
649 649 nodecount = 0
650 650 for label, nodes in sorted(self._entries.items()):
651 651 label = encoding.fromlocal(label)
652 652 for node in nodes:
653 653 nodecount += 1
654 654 if node in self._closednodes:
655 655 state = b'c'
656 656 else:
657 657 state = b'o'
658 658 fp.write(b"%s %s %s\n" % (hex(node), state, label))
659 659 return nodecount
660 660
661 661 def _verifybranch(self, branch):
662 662 """verify head nodes for the given branch."""
663 663 if not self._verify_node:
664 664 return
665 665 if branch not in self._entries or branch in self._verifiedbranches:
666 666 return
667 667 assert self._hasnode is not None
668 668 for n in self._entries[branch]:
669 669 if not self._hasnode(n):
670 670 _unknownnode(n)
671 671
672 672 self._verifiedbranches.add(branch)
673 673
674 674 def _verifyall(self):
675 675 """verifies nodes of all the branches"""
676 676 for b in self._entries.keys():
677 677 if b not in self._verifiedbranches:
678 678 self._verifybranch(b)
679 679
680 680 def __getitem__(self, key):
681 681 self._verifybranch(key)
682 682 return super().__getitem__(key)
683 683
684 684 def __contains__(self, key):
685 685 self._verifybranch(key)
686 686 return super().__contains__(key)
687 687
688 688 def iteritems(self):
689 689 self._verifyall()
690 690 return super().iteritems()
691 691
692 692 items = iteritems
693 693
694 694 def iterheads(self):
695 695 """returns all the heads"""
696 696 self._verifyall()
697 697 return super().iterheads()
698 698
699 699 def hasbranch(self, label):
700 700 """checks whether a branch of this name exists or not"""
701 701 self._verifybranch(label)
702 702 return super().hasbranch(label)
703 703
704 704 def branchheads(self, branch, closed=False):
705 705 self._verifybranch(branch)
706 706 return super().branchheads(branch, closed=closed)
707 707
708 708 def update(self, repo, revgen):
709 709 assert self._filtername == repo.filtername, (
710 710 self._filtername,
711 711 repo.filtername,
712 712 )
713 713 cl = repo.changelog
714 714 max_rev = super().update(repo, revgen)
715 715 # new tip revision which we found after iterating items from new
716 716 # branches
717 717 if max_rev is not None and max_rev > self.tiprev:
718 718 self.tiprev = max_rev
719 719 self.tipnode = cl.node(max_rev)
720 720 else:
721 721 # We should not be here is if this is false
722 722 assert cl.node(self.tiprev) == self.tipnode
723 723
724 724 if not self.validfor(repo):
725 725 # the tiprev and tipnode should be aligned, so if the current repo
726 726 # is not seens as valid this is because old cache key is now
727 727 # invalid for the repo.
728 728 #
729 729 # However. we've just updated the cache and we assume it's valid,
730 730 # so let's make the cache key valid as well by recomputing it from
731 731 # the cached data
732 732 self.key_hashes = self._compute_key_hashes(repo)
733 733 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
734 734 repo,
735 735 self.tiprev,
736 736 )
737 737
738 738 self._state = STATE_DIRTY
739 739 tr = repo.currenttransaction()
740 740 if getattr(tr, 'finalized', True):
741 741 # Avoid premature writing.
742 742 #
743 743 # (The cache warming setup by localrepo will update the file later.)
744 744 self.write(repo)
745 745
746 746
747 747 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
748 748 """Build a branch cache from on-disk data if possible
749 749
750 750 Return a branch cache of the right format depending of the repository.
751 751 """
752 752 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
753 753 return BranchCacheV3.fromfile(repo)
754 754 else:
755 755 return BranchCacheV2.fromfile(repo)
756 756
757 757
758 758 def new_branch_cache(repo, *args, **kwargs):
759 759 """Build a new branch cache from argument
760 760
761 761 Return a branch cache of the right format depending of the repository.
762 762 """
763 763 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
764 764 return BranchCacheV3(repo, *args, **kwargs)
765 765 else:
766 766 return BranchCacheV2(repo, *args, **kwargs)
767 767
768 768
769 769 class BranchCacheV2(_LocalBranchCache):
770 770 """a branch cache using version 2 of the format on disk
771 771
772 772 The cache is serialized on disk in the following format:
773 773
774 774 <tip hex node> <tip rev number> [optional filtered repo hex hash]
775 775 <branch head hex node> <open/closed state> <branch name>
776 776 <branch head hex node> <open/closed state> <branch name>
777 777 ...
778 778
779 779 The first line is used to check if the cache is still valid. If the
780 780 branch cache is for a filtered repo view, an optional third hash is
781 781 included that hashes the hashes of all filtered and obsolete revisions.
782 782
783 783 The open/closed state is represented by a single letter 'o' or 'c'.
784 784 This field can be used to avoid changelog reads when determining if a
785 785 branch head closes a branch or not.
786 786 """
787 787
788 788 _base_filename = b"branch2"
789 789
790 790 @classmethod
791 791 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
792 792 """parse the head of a branchmap file
793 793
794 794 return parameters to pass to a newly created class instance.
795 795 """
796 796 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
797 797 last, lrev = cachekey[:2]
798 798 last, lrev = bin(last), int(lrev)
799 799 filteredhash = ()
800 800 if len(cachekey) > 2:
801 801 filteredhash = (bin(cachekey[2]),)
802 802 return {
803 803 "tipnode": last,
804 804 "tiprev": lrev,
805 805 "key_hashes": filteredhash,
806 806 }
807 807
808 808 def _write_header(self, fp) -> None:
809 809 """write the branch cache header to a file"""
810 810 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
811 811 if self.key_hashes:
812 812 cachekey.append(hex(self.key_hashes[0]))
813 813 fp.write(b" ".join(cachekey) + b'\n')
814 814
815 815 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
816 816 """return the cache key hashes that match this repoview state"""
817 817 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
818 818 repo,
819 819 self.tiprev,
820 820 needobsolete=True,
821 821 )
822 822 keys: Tuple[bytes] = cast(Tuple[bytes], ())
823 823 if filtered_hash is not None:
824 824 keys: Tuple[bytes] = (filtered_hash,)
825 825 return keys
826 826
827 827
828 828 class BranchCacheV3(_LocalBranchCache):
829 829 """a branch cache using version 3 of the format on disk
830 830
831 831 This version is still EXPERIMENTAL and the format is subject to changes.
832 832
833 833 The cache is serialized on disk in the following format:
834 834
835 835 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
836 836 <branch head hex node> <open/closed state> <branch name>
837 837 <branch head hex node> <open/closed state> <branch name>
838 838 ...
839 839
840 840 The first line is used to check if the cache is still valid. It is a series
841 841 of key value pair. The following key are recognized:
842 842
843 843 - tip-rev: the rev-num of the tip-most revision seen by this cache
844 844 - tip-node: the node-id of the tip-most revision sen by this cache
845 845 - filtered-hash: the hash of all filtered revisions (before tip-rev)
846 846 ignored by this cache.
847 847 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
848 848 tip-rev) ignored by this cache.
849 849
850 850 The tip-rev is used to know how far behind the value in the file are
851 851 compared to the current repository state.
852 852
853 853 The tip-node, filtered-hash and obsolete-hash are used to detect if this
854 854 cache can be used for this repository state at all.
855 855
856 856 The open/closed state is represented by a single letter 'o' or 'c'.
857 857 This field can be used to avoid changelog reads when determining if a
858 858 branch head closes a branch or not.
859 859
860 860 Topological heads are not included in the listing and should be dispatched
861 861 on the right branch at read time. Obsolete topological heads should be
862 862 ignored.
863 863 """
864 864
865 865 _base_filename = b"branch3-exp"
866 866 _default_key_hashes = (None, None)
867 867
868 868 def __init__(self, *args, pure_topo_branch=None, **kwargs):
869 869 super().__init__(*args, **kwargs)
870 870 self._pure_topo_branch = pure_topo_branch
871 871 self._needs_populate = self._pure_topo_branch is not None
872 872
873 873 def inherit_for(self, repo):
874 874 new = super().inherit_for(repo)
875 875 new._pure_topo_branch = self._pure_topo_branch
876 876 new._needs_populate = self._needs_populate
877 877 return new
878 878
879 879 def _get_topo_heads(self, repo):
880 880 """returns the topological head of a repoview content up to self.tiprev"""
881 881 cl = repo.changelog
882 882 if self.tiprev == nullrev:
883 883 return []
884 884 elif self.tiprev == cl.tiprev():
885 885 return cl.headrevs()
886 886 else:
887 # XXX passing tiprev as ceiling of cl.headrevs could be faster
888 heads = cl.headrevs(cl.revs(stop=self.tiprev))
887 heads = cl.headrevs(stop_rev=self.tiprev + 1)
889 888 return heads
890 889
891 890 def _write_header(self, fp) -> None:
892 891 cache_keys = {
893 892 b"tip-node": hex(self.tipnode),
894 893 b"tip-rev": b'%d' % self.tiprev,
895 894 }
896 895 if self.key_hashes:
897 896 if self.key_hashes[0] is not None:
898 897 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
899 898 if self.key_hashes[1] is not None:
900 899 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
901 900 if self._pure_topo_branch is not None:
902 901 cache_keys[b"topo-mode"] = b"pure"
903 902 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
904 903 fp.write(b" ".join(pieces) + b'\n')
905 904 if self._pure_topo_branch is not None:
906 905 label = encoding.fromlocal(self._pure_topo_branch)
907 906 fp.write(label + b'\n')
908 907
909 908 def _write_heads(self, repo, fp) -> int:
910 909 """write list of heads to a file
911 910
912 911 Return the number of heads written."""
913 912 to_node = repo.changelog.node
914 913 nodecount = 0
915 914 topo_heads = None
916 915 if self._pure_topo_branch is None:
917 916 # we match using node because it is faster to built the set of node
918 917 # than to resolve node β†’ rev later.
919 918 topo_heads = set(to_node(r) for r in self._get_topo_heads(repo))
920 919 for label, nodes in sorted(self._entries.items()):
921 920 if label == self._pure_topo_branch:
922 921 # not need to write anything the header took care of that
923 922 continue
924 923 label = encoding.fromlocal(label)
925 924 for node in nodes:
926 925 if topo_heads is not None:
927 926 if node in topo_heads:
928 927 continue
929 928 if node in self._closednodes:
930 929 state = b'c'
931 930 else:
932 931 state = b'o'
933 932 nodecount += 1
934 933 fp.write(b"%s %s %s\n" % (hex(node), state, label))
935 934 return nodecount
936 935
937 936 @classmethod
938 937 def _load_header(cls, repo, lineiter):
939 938 header_line = next(lineiter)
940 939 pieces = header_line.rstrip(b'\n').split(b" ")
941 940 for p in pieces:
942 941 if b'=' not in p:
943 942 msg = b"invalid header_line: %r" % header_line
944 943 raise ValueError(msg)
945 944 cache_keys = dict(p.split(b'=', 1) for p in pieces)
946 945
947 946 args = {}
948 947 filtered_hash = None
949 948 obsolete_hash = None
950 949 has_pure_topo_heads = False
951 950 for k, v in cache_keys.items():
952 951 if k == b"tip-rev":
953 952 args["tiprev"] = int(v)
954 953 elif k == b"tip-node":
955 954 args["tipnode"] = bin(v)
956 955 elif k == b"filtered-hash":
957 956 filtered_hash = bin(v)
958 957 elif k == b"obsolete-hash":
959 958 obsolete_hash = bin(v)
960 959 elif k == b"topo-mode":
961 960 if v == b"pure":
962 961 has_pure_topo_heads = True
963 962 else:
964 963 msg = b"unknown topo-mode: %r" % v
965 964 raise ValueError(msg)
966 965 else:
967 966 msg = b"unknown cache key: %r" % k
968 967 raise ValueError(msg)
969 968 args["key_hashes"] = (filtered_hash, obsolete_hash)
970 969 if has_pure_topo_heads:
971 970 pure_line = next(lineiter).rstrip(b'\n')
972 971 args["pure_topo_branch"] = encoding.tolocal(pure_line)
973 972 return args
974 973
975 974 def _load_heads(self, repo, lineiter):
976 975 """fully loads the branchcache by reading from the file using the line
977 976 iterator passed"""
978 977 super()._load_heads(repo, lineiter)
979 978 if self._pure_topo_branch is not None:
980 979 # no need to read the repository heads, we know their value already.
981 980 return
982 981 cl = repo.changelog
983 982 getbranchinfo = repo.revbranchcache().branchinfo
984 983 obsrevs = obsolete.getrevs(repo, b'obsolete')
985 984 to_node = cl.node
986 985 touched_branch = set()
987 986 for head in self._get_topo_heads(repo):
988 987 if head in obsrevs:
989 988 continue
990 989 node = to_node(head)
991 990 branch, closed = getbranchinfo(head)
992 991 self._entries.setdefault(branch, []).append(node)
993 992 if closed:
994 993 self._closednodes.add(node)
995 994 touched_branch.add(branch)
996 995 to_rev = cl.index.rev
997 996 for branch in touched_branch:
998 997 self._entries[branch].sort(key=to_rev)
999 998
1000 999 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
1001 1000 """return the cache key hashes that match this repoview state"""
1002 1001 return scmutil.filtered_and_obsolete_hash(
1003 1002 repo,
1004 1003 self.tiprev,
1005 1004 )
1006 1005
1007 1006 def _process_new(
1008 1007 self,
1009 1008 repo,
1010 1009 newbranches,
1011 1010 new_closed,
1012 1011 obs_ignored,
1013 1012 max_rev,
1014 1013 ) -> None:
1015 1014 if (
1016 1015 # note: the check about `obs_ignored` is too strict as the
1017 1016 # obsolete revision could be non-topological, but lets keep
1018 1017 # things simple for now
1019 1018 #
1020 1019 # The same apply to `new_closed` if the closed changeset are
1021 1020 # not a head, we don't care that it is closed, but lets keep
1022 1021 # things simple here too.
1023 1022 not (obs_ignored or new_closed)
1024 1023 and (
1025 1024 not newbranches
1026 1025 or (
1027 1026 len(newbranches) == 1
1028 1027 and (
1029 1028 self.tiprev == nullrev
1030 1029 or self._pure_topo_branch in newbranches
1031 1030 )
1032 1031 )
1033 1032 )
1034 1033 ):
1035 1034 if newbranches:
1036 1035 assert len(newbranches) == 1
1037 1036 self._pure_topo_branch = list(newbranches.keys())[0]
1038 1037 self._needs_populate = True
1039 1038 self._entries.pop(self._pure_topo_branch, None)
1040 1039 return
1041 1040
1042 1041 self._ensure_populated(repo)
1043 1042 self._pure_topo_branch = None
1044 1043 super()._process_new(
1045 1044 repo,
1046 1045 newbranches,
1047 1046 new_closed,
1048 1047 obs_ignored,
1049 1048 max_rev,
1050 1049 )
1051 1050
1052 1051 def _ensure_populated(self, repo):
1053 1052 """make sure any lazily loaded values are fully populated"""
1054 1053 if self._needs_populate:
1055 1054 assert self._pure_topo_branch is not None
1056 1055 cl = repo.changelog
1057 1056 to_node = cl.node
1058 1057 topo_heads = self._get_topo_heads(repo)
1059 1058 heads = [to_node(r) for r in topo_heads]
1060 1059 self._entries[self._pure_topo_branch] = heads
1061 1060 self._needs_populate = False
1062 1061
1063 1062 def _detect_pure_topo(self, repo) -> None:
1064 1063 if self._pure_topo_branch is not None:
1065 1064 # we are pure topological already
1066 1065 return
1067 1066 to_node = repo.changelog.node
1068 1067 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1069 1068 if any(n in self._closednodes for n in topo_heads):
1070 1069 return
1071 1070 for branch, heads in self._entries.items():
1072 1071 if heads == topo_heads:
1073 1072 self._pure_topo_branch = branch
1074 1073 break
1075 1074
1076 1075
1077 1076 class remotebranchcache(_BaseBranchCache):
1078 1077 """Branchmap info for a remote connection, should not write locally"""
1079 1078
1080 1079 def __init__(
1081 1080 self,
1082 1081 repo: "localrepo.localrepository",
1083 1082 entries: Union[
1084 1083 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1085 1084 ] = (),
1086 1085 closednodes: Optional[Set[bytes]] = None,
1087 1086 ) -> None:
1088 1087 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
@@ -1,485 +1,490
1 1 # repoview.py - Filtered view of a localrepo object
2 2 #
3 3 # Copyright 2012 Pierre-Yves David <pierre-yves.david@ens-lyon.org>
4 4 # Logilab SA <contact@logilab.fr>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 from __future__ import annotations
10 10
11 11 import copy
12 12 import weakref
13 13
14 14 from .i18n import _
15 15 from .node import (
16 16 hex,
17 17 nullrev,
18 18 )
19 19 from . import (
20 20 error,
21 21 obsolete,
22 22 phases,
23 23 pycompat,
24 24 tags as tagsmod,
25 25 util,
26 26 )
27 27 from .utils import repoviewutil
28 28
29 29
30 30 def hideablerevs(repo):
31 31 """Revision candidates to be hidden
32 32
33 33 This is a standalone function to allow extensions to wrap it.
34 34
35 35 Because we use the set of immutable changesets as a fallback subset in
36 36 branchmap (see mercurial.utils.repoviewutils.subsettable), you cannot set
37 37 "public" changesets as "hideable". Doing so would break multiple code
38 38 assertions and lead to crashes."""
39 39 obsoletes = obsolete.getrevs(repo, b'obsolete')
40 40 internals = repo._phasecache.getrevset(repo, phases.localhiddenphases)
41 41 internals = frozenset(internals)
42 42 return obsoletes | internals
43 43
44 44
45 45 def pinnedrevs(repo):
46 46 """revisions blocking hidden changesets from being filtered"""
47 47
48 48 cl = repo.changelog
49 49 pinned = set()
50 50 pinned.update([par.rev() for par in repo[None].parents()])
51 51 pinned.update([cl.rev(bm) for bm in repo._bookmarks.values()])
52 52
53 53 tags = {}
54 54 tagsmod.readlocaltags(repo.ui, repo, tags, {})
55 55 if tags:
56 56 rev = cl.index.get_rev
57 57 pinned.update(rev(t[0]) for t in tags.values())
58 58 pinned.discard(None)
59 59
60 60 # Avoid cycle: mercurial.filemerge -> mercurial.templater ->
61 61 # mercurial.templatefuncs -> mercurial.revset -> mercurial.repoview ->
62 62 # mercurial.mergestate -> mercurial.filemerge
63 63 from . import mergestate
64 64
65 65 ms = mergestate.mergestate.read(repo)
66 66 if ms.active() and ms.unresolvedcount():
67 67 for node in (ms.local, ms.other):
68 68 rev = cl.index.get_rev(node)
69 69 if rev is not None:
70 70 pinned.add(rev)
71 71
72 72 return pinned
73 73
74 74
75 75 def _revealancestors(pfunc, hidden, revs):
76 76 """reveals contiguous chains of hidden ancestors of 'revs' by removing them
77 77 from 'hidden'
78 78
79 79 - pfunc(r): a funtion returning parent of 'r',
80 80 - hidden: the (preliminary) hidden revisions, to be updated
81 81 - revs: iterable of revnum,
82 82
83 83 (Ancestors are revealed exclusively, i.e. the elements in 'revs' are
84 84 *not* revealed)
85 85 """
86 86 stack = list(revs)
87 87 while stack:
88 88 for p in pfunc(stack.pop()):
89 89 if p != nullrev and p in hidden:
90 90 hidden.remove(p)
91 91 stack.append(p)
92 92
93 93
94 94 def computehidden(repo, visibilityexceptions=None):
95 95 """compute the set of hidden revision to filter
96 96
97 97 During most operation hidden should be filtered."""
98 98 assert not repo.changelog.filteredrevs
99 99
100 100 hidden = hideablerevs(repo)
101 101 if hidden:
102 102 hidden = set(hidden - pinnedrevs(repo))
103 103 if visibilityexceptions:
104 104 hidden -= visibilityexceptions
105 105 pfunc = repo.changelog.parentrevs
106 106 mutable = repo._phasecache.getrevset(repo, phases.mutablephases)
107 107
108 108 visible = mutable - hidden
109 109 _revealancestors(pfunc, hidden, visible)
110 110 return frozenset(hidden)
111 111
112 112
113 113 def computesecret(repo, visibilityexceptions=None):
114 114 """compute the set of revision that can never be exposed through hgweb
115 115
116 116 Changeset in the secret phase (or above) should stay unaccessible."""
117 117 assert not repo.changelog.filteredrevs
118 118 secrets = repo._phasecache.getrevset(repo, phases.remotehiddenphases)
119 119 return frozenset(secrets)
120 120
121 121
122 122 def computeunserved(repo, visibilityexceptions=None):
123 123 """compute the set of revision that should be filtered when used a server
124 124
125 125 Secret and hidden changeset should not pretend to be here."""
126 126 assert not repo.changelog.filteredrevs
127 127 # fast path in simple case to avoid impact of non optimised code
128 128 hiddens = filterrevs(repo, b'visible')
129 129 secrets = filterrevs(repo, b'served.hidden')
130 130 if secrets:
131 131 return frozenset(hiddens | secrets)
132 132 else:
133 133 return hiddens
134 134
135 135
136 136 def computemutable(repo, visibilityexceptions=None):
137 137 assert not repo.changelog.filteredrevs
138 138 # fast check to avoid revset call on huge repo
139 139 if repo._phasecache.hasnonpublicphases(repo):
140 140 return frozenset(repo._phasecache.getrevset(repo, phases.mutablephases))
141 141 return frozenset()
142 142
143 143
144 144 def computeimpactable(repo, visibilityexceptions=None):
145 145 """Everything impactable by mutable revision
146 146
147 147 The immutable filter still have some chance to get invalidated. This will
148 148 happen when:
149 149
150 150 - you garbage collect hidden changeset,
151 151 - public phase is moved backward,
152 152 - something is changed in the filtering (this could be fixed)
153 153
154 154 This filter out any mutable changeset and any public changeset that may be
155 155 impacted by something happening to a mutable revision.
156 156
157 157 This is achieved by filtered everything with a revision number equal or
158 158 higher than the first mutable changeset is filtered."""
159 159 assert not repo.changelog.filteredrevs
160 160 cl = repo.changelog
161 161 firstmutable = len(cl)
162 162 roots = repo._phasecache.nonpublicphaseroots(repo)
163 163 if roots:
164 164 firstmutable = min(firstmutable, min(roots))
165 165 # protect from nullrev root
166 166 firstmutable = max(0, firstmutable)
167 167 return frozenset(range(firstmutable, len(cl)))
168 168
169 169
170 170 # function to compute filtered set
171 171 #
172 172 # When adding a new filter you MUST update the table at:
173 173 # mercurial.utils.repoviewutil.subsettable
174 174 # Otherwise your filter will have to recompute all its branches cache
175 175 # from scratch (very slow).
176 176 filtertable = {
177 177 b'visible': computehidden,
178 178 b'visible-hidden': computehidden,
179 179 b'served.hidden': computesecret,
180 180 b'served': computeunserved,
181 181 b'immutable': computemutable,
182 182 b'base': computeimpactable,
183 183 }
184 184
185 185 # set of filter level that will include the working copy parent no matter what.
186 186 filter_has_wc = {b'visible', b'visible-hidden'}
187 187
188 188 _basefiltername = list(filtertable)
189 189
190 190
191 191 def extrafilter(ui):
192 192 """initialize extra filter and return its id
193 193
194 194 If extra filtering is configured, we make sure the associated filtered view
195 195 are declared and return the associated id.
196 196 """
197 197 frevs = ui.config(b'experimental', b'extra-filter-revs')
198 198 if frevs is None:
199 199 return None
200 200
201 201 fid = pycompat.sysbytes(util.DIGESTS[b'sha1'](frevs).hexdigest())[:12]
202 202
203 203 combine = lambda fname: fname + b'%' + fid
204 204
205 205 subsettable = repoviewutil.subsettable
206 206
207 207 if combine(b'base') not in filtertable:
208 208 for base_name in _basefiltername:
209 209
210 210 def extrafilteredrevs(repo, *args, name=base_name, **kwargs):
211 211 baserevs = filtertable[name](repo, *args, **kwargs)
212 212 extrarevs = frozenset(repo.revs(frevs))
213 213 return baserevs | extrarevs
214 214
215 215 filtertable[combine(base_name)] = extrafilteredrevs
216 216 if base_name in subsettable:
217 217 subsettable[combine(base_name)] = combine(
218 218 subsettable[base_name]
219 219 )
220 220 return fid
221 221
222 222
223 223 def filterrevs(repo, filtername, visibilityexceptions=None):
224 224 """returns set of filtered revision for this filter name
225 225
226 226 visibilityexceptions is a set of revs which must are exceptions for
227 227 hidden-state and must be visible. They are dynamic and hence we should not
228 228 cache it's result"""
229 229 if filtername not in repo.filteredrevcache:
230 230 if repo.ui.configbool(b'devel', b'debug.repo-filters'):
231 231 msg = b'computing revision filter for "%s"'
232 232 msg %= filtername
233 233 if repo.ui.tracebackflag and repo.ui.debugflag:
234 234 # XXX use ui.write_err
235 235 util.debugstacktrace(
236 236 msg,
237 237 f=repo.ui._fout,
238 238 otherf=repo.ui._ferr,
239 239 prefix=b'debug.filters: ',
240 240 )
241 241 else:
242 242 repo.ui.debug(b'debug.filters: %s\n' % msg)
243 243 func = filtertable[filtername]
244 244 if visibilityexceptions:
245 245 return func(repo.unfiltered, visibilityexceptions)
246 246 repo.filteredrevcache[filtername] = func(repo.unfiltered())
247 247 return repo.filteredrevcache[filtername]
248 248
249 249
250 250 def wrapchangelog(unfichangelog, filteredrevs):
251 251 cl = copy.copy(unfichangelog)
252 252 cl.filteredrevs = filteredrevs
253 253
254 254 class filteredchangelog(filteredchangelogmixin, cl.__class__):
255 255 pass
256 256
257 257 cl.__class__ = filteredchangelog
258 258
259 259 return cl
260 260
261 261
262 262 class filteredchangelogmixin:
263 263 def tiprev(self):
264 264 """filtered version of revlog.tiprev"""
265 265 for i in range(len(self) - 1, -2, -1):
266 266 if i not in self.filteredrevs:
267 267 return i
268 268
269 269 def __contains__(self, rev):
270 270 """filtered version of revlog.__contains__"""
271 271 return 0 <= rev < len(self) and rev not in self.filteredrevs
272 272
273 273 def __iter__(self):
274 274 """filtered version of revlog.__iter__"""
275 275
276 276 def filterediter():
277 277 for i in range(len(self)):
278 278 if i not in self.filteredrevs:
279 279 yield i
280 280
281 281 return filterediter()
282 282
283 283 def revs(self, start=0, stop=None):
284 284 """filtered version of revlog.revs"""
285 285 for i in super(filteredchangelogmixin, self).revs(start, stop):
286 286 if i not in self.filteredrevs:
287 287 yield i
288 288
289 289 def _checknofilteredinrevs(self, revs):
290 290 """raise the appropriate error if 'revs' contains a filtered revision
291 291
292 292 This returns a version of 'revs' to be used thereafter by the caller.
293 293 In particular, if revs is an iterator, it is converted into a set.
294 294 """
295 295 if hasattr(revs, '__next__'):
296 296 # Note that inspect.isgenerator() is not true for iterators,
297 297 revs = set(revs)
298 298
299 299 filteredrevs = self.filteredrevs
300 300 if hasattr(revs, 'first'): # smartset
301 301 offenders = revs & filteredrevs
302 302 else:
303 303 offenders = filteredrevs.intersection(revs)
304 304
305 305 for rev in offenders:
306 306 raise error.FilteredIndexError(rev)
307 307 return revs
308 308
309 309 def _head_node_ids(self):
310 310 # no Rust fast path implemented yet, so just loop in Python
311 311 return [self.node(r) for r in self.headrevs()]
312 312
313 def headrevs(self, revs=None):
313 def headrevs(self, revs=None, stop_rev=None):
314 314 if revs is None:
315 return self.index.headrevs(self.filteredrevs)
315 filtered = self.filteredrevs
316 if stop_rev is not None and stop_rev < len(self.index):
317 filtered = set(self.filteredrevs)
318 filtered.update(range(stop_rev, len(self.index)))
319 return self.index.headrevs(filtered)
320 assert stop_rev is None
316 321
317 322 revs = self._checknofilteredinrevs(revs)
318 323 return super(filteredchangelogmixin, self).headrevs(revs)
319 324
320 325 def strip(self, *args, **kwargs):
321 326 # XXX make something better than assert
322 327 # We can't expect proper strip behavior if we are filtered.
323 328 assert not self.filteredrevs
324 329 super(filteredchangelogmixin, self).strip(*args, **kwargs)
325 330
326 331 def rev(self, node):
327 332 """filtered version of revlog.rev"""
328 333 r = super(filteredchangelogmixin, self).rev(node)
329 334 if r in self.filteredrevs:
330 335 raise error.FilteredLookupError(
331 336 hex(node), self.display_id, _(b'filtered node')
332 337 )
333 338 return r
334 339
335 340 def node(self, rev):
336 341 """filtered version of revlog.node"""
337 342 if rev in self.filteredrevs:
338 343 raise error.FilteredIndexError(rev)
339 344 return super(filteredchangelogmixin, self).node(rev)
340 345
341 346 def linkrev(self, rev):
342 347 """filtered version of revlog.linkrev"""
343 348 if rev in self.filteredrevs:
344 349 raise error.FilteredIndexError(rev)
345 350 return super(filteredchangelogmixin, self).linkrev(rev)
346 351
347 352 def parentrevs(self, rev):
348 353 """filtered version of revlog.parentrevs"""
349 354 if rev in self.filteredrevs:
350 355 raise error.FilteredIndexError(rev)
351 356 return super(filteredchangelogmixin, self).parentrevs(rev)
352 357
353 358 def flags(self, rev):
354 359 """filtered version of revlog.flags"""
355 360 if rev in self.filteredrevs:
356 361 raise error.FilteredIndexError(rev)
357 362 return super(filteredchangelogmixin, self).flags(rev)
358 363
359 364
360 365 class repoview:
361 366 """Provide a read/write view of a repo through a filtered changelog
362 367
363 368 This object is used to access a filtered version of a repository without
364 369 altering the original repository object itself. We can not alter the
365 370 original object for two main reasons:
366 371 - It prevents the use of a repo with multiple filters at the same time. In
367 372 particular when multiple threads are involved.
368 373 - It makes scope of the filtering harder to control.
369 374
370 375 This object behaves very closely to the original repository. All attribute
371 376 operations are done on the original repository:
372 377 - An access to `repoview.someattr` actually returns `repo.someattr`,
373 378 - A write to `repoview.someattr` actually sets value of `repo.someattr`,
374 379 - A deletion of `repoview.someattr` actually drops `someattr`
375 380 from `repo.__dict__`.
376 381
377 382 The only exception is the `changelog` property. It is overridden to return
378 383 a (surface) copy of `repo.changelog` with some revisions filtered. The
379 384 `filtername` attribute of the view control the revisions that need to be
380 385 filtered. (the fact the changelog is copied is an implementation detail).
381 386
382 387 Unlike attributes, this object intercepts all method calls. This means that
383 388 all methods are run on the `repoview` object with the filtered `changelog`
384 389 property. For this purpose the simple `repoview` class must be mixed with
385 390 the actual class of the repository. This ensures that the resulting
386 391 `repoview` object have the very same methods than the repo object. This
387 392 leads to the property below.
388 393
389 394 repoview.method() --> repo.__class__.method(repoview)
390 395
391 396 The inheritance has to be done dynamically because `repo` can be of any
392 397 subclasses of `localrepo`. Eg: `bundlerepo` or `statichttprepo`.
393 398 """
394 399
395 400 def __init__(self, repo, filtername, visibilityexceptions=None):
396 401 if filtername is None:
397 402 msg = "repoview should have a non-None filtername"
398 403 raise error.ProgrammingError(msg)
399 404 object.__setattr__(self, '_unfilteredrepo', repo)
400 405 object.__setattr__(self, 'filtername', filtername)
401 406 object.__setattr__(self, '_clcachekey', None)
402 407 object.__setattr__(self, '_clcache', None)
403 408 # revs which are exceptions and must not be hidden
404 409 object.__setattr__(self, '_visibilityexceptions', visibilityexceptions)
405 410
406 411 # not a propertycache on purpose we shall implement a proper cache later
407 412 @property
408 413 def changelog(self):
409 414 """return a filtered version of the changeset
410 415
411 416 this changelog must not be used for writing"""
412 417 # some cache may be implemented later
413 418 unfi = self._unfilteredrepo
414 419 unfichangelog = unfi.changelog
415 420 # bypass call to changelog.method
416 421 unfiindex = unfichangelog.index
417 422 unfilen = len(unfiindex)
418 423 unfinode = unfiindex[unfilen - 1][7]
419 424 with util.timedcm('repo filter for %s', self.filtername):
420 425 revs = filterrevs(unfi, self.filtername, self._visibilityexceptions)
421 426 cl = self._clcache
422 427 newkey = (unfilen, unfinode, hash(revs), unfichangelog.is_delaying)
423 428 # if cl.index is not unfiindex, unfi.changelog would be
424 429 # recreated, and our clcache refers to garbage object
425 430 if cl is not None and (
426 431 cl.index is not unfiindex or newkey != self._clcachekey
427 432 ):
428 433 cl = None
429 434 # could have been made None by the previous if
430 435 if cl is None:
431 436 # Only filter if there's something to filter
432 437 cl = wrapchangelog(unfichangelog, revs) if revs else unfichangelog
433 438 object.__setattr__(self, '_clcache', cl)
434 439 object.__setattr__(self, '_clcachekey', newkey)
435 440 return cl
436 441
437 442 def unfiltered(self):
438 443 """Return an unfiltered version of a repo"""
439 444 return self._unfilteredrepo
440 445
441 446 def filtered(self, name, visibilityexceptions=None):
442 447 """Return a filtered version of a repository"""
443 448 if name == self.filtername and not visibilityexceptions:
444 449 return self
445 450 return self.unfiltered().filtered(name, visibilityexceptions)
446 451
447 452 def __repr__(self):
448 453 return '<%s:%s %r>' % (
449 454 self.__class__.__name__,
450 455 pycompat.sysstr(self.filtername),
451 456 self.unfiltered(),
452 457 )
453 458
454 459 # everything access are forwarded to the proxied repo
455 460 def __getattr__(self, attr):
456 461 return getattr(self._unfilteredrepo, attr)
457 462
458 463 def __setattr__(self, attr, value):
459 464 return setattr(self._unfilteredrepo, attr, value)
460 465
461 466 def __delattr__(self, attr):
462 467 return delattr(self._unfilteredrepo, attr)
463 468
464 469
465 470 # Dynamically created classes introduce memory cycles via __mro__. See
466 471 # https://bugs.python.org/issue17950.
467 472 # This need of the garbage collector can turn into memory leak in
468 473 # Python <3.4, which is the first version released with PEP 442.
469 474 _filteredrepotypes = weakref.WeakKeyDictionary()
470 475
471 476
472 477 def newtype(base):
473 478 """Create a new type with the repoview mixin and the given base class"""
474 479 ref = _filteredrepotypes.get(base)
475 480 if ref is not None:
476 481 cls = ref()
477 482 if cls is not None:
478 483 return cls
479 484
480 485 class filteredrepo(repoview, base):
481 486 pass
482 487
483 488 _filteredrepotypes[base] = weakref.ref(filteredrepo)
484 489 # do not reread from weakref to be 100% sure not to return None
485 490 return filteredrepo
@@ -1,4112 +1,4118
1 1 # revlog.py - storage back-end for mercurial
2 2 # coding: utf8
3 3 #
4 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
5 5 #
6 6 # This software may be used and distributed according to the terms of the
7 7 # GNU General Public License version 2 or any later version.
8 8
9 9 """Storage back-end for Mercurial.
10 10
11 11 This provides efficient delta storage with O(1) retrieve and append
12 12 and O(changes) merge between branches.
13 13 """
14 14
15 15 from __future__ import annotations
16 16
17 17 import binascii
18 18 import collections
19 19 import contextlib
20 20 import functools
21 21 import io
22 22 import os
23 23 import struct
24 24 import typing
25 25 import weakref
26 26 import zlib
27 27
28 28 from typing import (
29 29 Iterable,
30 30 Iterator,
31 31 Optional,
32 32 Tuple,
33 33 )
34 34
35 35 # import stuff from node for others to import from revlog
36 36 from .node import (
37 37 bin,
38 38 hex,
39 39 nullrev,
40 40 sha1nodeconstants,
41 41 short,
42 42 wdirrev,
43 43 )
44 44 from .i18n import _
45 45 from .revlogutils.constants import (
46 46 ALL_KINDS,
47 47 CHANGELOGV2,
48 48 COMP_MODE_DEFAULT,
49 49 COMP_MODE_INLINE,
50 50 COMP_MODE_PLAIN,
51 51 DELTA_BASE_REUSE_NO,
52 52 DELTA_BASE_REUSE_TRY,
53 53 ENTRY_RANK,
54 54 FEATURES_BY_VERSION,
55 55 FLAG_GENERALDELTA,
56 56 FLAG_INLINE_DATA,
57 57 INDEX_HEADER,
58 58 KIND_CHANGELOG,
59 59 KIND_FILELOG,
60 60 RANK_UNKNOWN,
61 61 REVLOGV0,
62 62 REVLOGV1,
63 63 REVLOGV1_FLAGS,
64 64 REVLOGV2,
65 65 REVLOGV2_FLAGS,
66 66 REVLOG_DEFAULT_FLAGS,
67 67 REVLOG_DEFAULT_FORMAT,
68 68 REVLOG_DEFAULT_VERSION,
69 69 SUPPORTED_FLAGS,
70 70 )
71 71 from .revlogutils.flagutil import (
72 72 REVIDX_DEFAULT_FLAGS,
73 73 REVIDX_ELLIPSIS,
74 74 REVIDX_EXTSTORED,
75 75 REVIDX_FLAGS_ORDER,
76 76 REVIDX_HASCOPIESINFO,
77 77 REVIDX_ISCENSORED,
78 78 REVIDX_RAWTEXT_CHANGING_FLAGS,
79 79 )
80 80 from .thirdparty import attr
81 81
82 82 # Force pytype to use the non-vendored package
83 83 if typing.TYPE_CHECKING:
84 84 # noinspection PyPackageRequirements
85 85 import attr
86 86
87 87 from . import (
88 88 ancestor,
89 89 dagop,
90 90 error,
91 91 mdiff,
92 92 policy,
93 93 pycompat,
94 94 revlogutils,
95 95 templatefilters,
96 96 util,
97 97 vfs as vfsmod,
98 98 )
99 99 from .interfaces import (
100 100 repository,
101 101 util as interfaceutil,
102 102 )
103 103 from .revlogutils import (
104 104 deltas as deltautil,
105 105 docket as docketutil,
106 106 flagutil,
107 107 nodemap as nodemaputil,
108 108 randomaccessfile,
109 109 revlogv0,
110 110 rewrite,
111 111 sidedata as sidedatautil,
112 112 )
113 113 from .utils import (
114 114 storageutil,
115 115 stringutil,
116 116 )
117 117
118 118 # blanked usage of all the name to prevent pyflakes constraints
119 119 # We need these name available in the module for extensions.
120 120
121 121 REVLOGV0
122 122 REVLOGV1
123 123 REVLOGV2
124 124 CHANGELOGV2
125 125 FLAG_INLINE_DATA
126 126 FLAG_GENERALDELTA
127 127 REVLOG_DEFAULT_FLAGS
128 128 REVLOG_DEFAULT_FORMAT
129 129 REVLOG_DEFAULT_VERSION
130 130 REVLOGV1_FLAGS
131 131 REVLOGV2_FLAGS
132 132 REVIDX_ISCENSORED
133 133 REVIDX_ELLIPSIS
134 134 REVIDX_HASCOPIESINFO
135 135 REVIDX_EXTSTORED
136 136 REVIDX_DEFAULT_FLAGS
137 137 REVIDX_FLAGS_ORDER
138 138 REVIDX_RAWTEXT_CHANGING_FLAGS
139 139
140 140 parsers = policy.importmod('parsers')
141 141 rustancestor = policy.importrust('ancestor')
142 142 rustdagop = policy.importrust('dagop')
143 143 rustrevlog = policy.importrust('revlog')
144 144
145 145 # Aliased for performance.
146 146 _zlibdecompress = zlib.decompress
147 147
148 148 # max size of inline data embedded into a revlog
149 149 _maxinline = 131072
150 150
151 151
152 152 # Flag processors for REVIDX_ELLIPSIS.
153 153 def ellipsisreadprocessor(rl, text):
154 154 return text, False
155 155
156 156
157 157 def ellipsiswriteprocessor(rl, text):
158 158 return text, False
159 159
160 160
161 161 def ellipsisrawprocessor(rl, text):
162 162 return False
163 163
164 164
165 165 ellipsisprocessor = (
166 166 ellipsisreadprocessor,
167 167 ellipsiswriteprocessor,
168 168 ellipsisrawprocessor,
169 169 )
170 170
171 171
172 172 def _verify_revision(rl, skipflags, state, node):
173 173 """Verify the integrity of the given revlog ``node`` while providing a hook
174 174 point for extensions to influence the operation."""
175 175 if skipflags:
176 176 state[b'skipread'].add(node)
177 177 else:
178 178 # Side-effect: read content and verify hash.
179 179 rl.revision(node)
180 180
181 181
182 182 # True if a fast implementation for persistent-nodemap is available
183 183 #
184 184 # We also consider we have a "fast" implementation in "pure" python because
185 185 # people using pure don't really have performance consideration (and a
186 186 # wheelbarrow of other slowness source)
187 187 HAS_FAST_PERSISTENT_NODEMAP = rustrevlog is not None or hasattr(
188 188 parsers, 'BaseIndexObject'
189 189 )
190 190
191 191
192 192 @attr.s(slots=True)
193 193 class RevLogRevisionDelta:
194 194 node = attr.ib()
195 195 p1node = attr.ib()
196 196 p2node = attr.ib()
197 197 basenode = attr.ib()
198 198 flags = attr.ib()
199 199 baserevisionsize = attr.ib()
200 200 revision = attr.ib()
201 201 delta = attr.ib()
202 202 sidedata = attr.ib()
203 203 protocol_flags = attr.ib()
204 204 linknode = attr.ib(default=None)
205 205
206 206
207 207 revlogrevisiondelta = interfaceutil.implementer(repository.irevisiondelta)(
208 208 RevLogRevisionDelta
209 209 )
210 210
211 211 if typing.TYPE_CHECKING:
212 212 revlogrevisiondelta = RevLogRevisionDelta
213 213
214 214
215 215 @attr.s(frozen=True)
216 216 class RevLogProblem:
217 217 warning = attr.ib(default=None, type=Optional[bytes])
218 218 error = attr.ib(default=None, type=Optional[bytes])
219 219 node = attr.ib(default=None, type=Optional[bytes])
220 220
221 221
222 222 revlogproblem = interfaceutil.implementer(repository.iverifyproblem)(
223 223 RevLogProblem
224 224 )
225 225
226 226 if typing.TYPE_CHECKING:
227 227 revlogproblem = RevLogProblem
228 228
229 229
230 230 def parse_index_v1(data, inline):
231 231 # call the C implementation to parse the index data
232 232 index, cache = parsers.parse_index2(data, inline)
233 233 return index, cache
234 234
235 235
236 236 def parse_index_v2(data, inline):
237 237 # call the C implementation to parse the index data
238 238 index, cache = parsers.parse_index2(data, inline, format=REVLOGV2)
239 239 return index, cache
240 240
241 241
242 242 def parse_index_cl_v2(data, inline):
243 243 # call the C implementation to parse the index data
244 244 index, cache = parsers.parse_index2(data, inline, format=CHANGELOGV2)
245 245 return index, cache
246 246
247 247
248 248 if hasattr(parsers, 'parse_index_devel_nodemap'):
249 249
250 250 def parse_index_v1_nodemap(data, inline):
251 251 index, cache = parsers.parse_index_devel_nodemap(data, inline)
252 252 return index, cache
253 253
254 254 else:
255 255 parse_index_v1_nodemap = None
256 256
257 257
258 258 def parse_index_v1_rust(data, inline, default_header):
259 259 cache = (0, data) if inline else None
260 260 return rustrevlog.Index(data, default_header), cache
261 261
262 262
263 263 # corresponds to uncompressed length of indexformatng (2 gigs, 4-byte
264 264 # signed integer)
265 265 _maxentrysize = 0x7FFFFFFF
266 266
267 267 FILE_TOO_SHORT_MSG = _(
268 268 b'cannot read from revlog %s;'
269 269 b' expected %d bytes from offset %d, data size is %d'
270 270 )
271 271
272 272 hexdigits = b'0123456789abcdefABCDEF'
273 273
274 274
275 275 class _Config:
276 276 def copy(self):
277 277 return self.__class__(**self.__dict__)
278 278
279 279
280 280 @attr.s()
281 281 class FeatureConfig(_Config):
282 282 """Hold configuration values about the available revlog features"""
283 283
284 284 # the default compression engine
285 285 compression_engine = attr.ib(default=b'zlib')
286 286 # compression engines options
287 287 compression_engine_options = attr.ib(default=attr.Factory(dict))
288 288
289 289 # can we use censor on this revlog
290 290 censorable = attr.ib(default=False)
291 291 # does this revlog use the "side data" feature
292 292 has_side_data = attr.ib(default=False)
293 293 # might remove rank configuration once the computation has no impact
294 294 compute_rank = attr.ib(default=False)
295 295 # parent order is supposed to be semantically irrelevant, so we
296 296 # normally resort parents to ensure that the first parent is non-null,
297 297 # if there is a non-null parent at all.
298 298 # filelog abuses the parent order as flag to mark some instances of
299 299 # meta-encoded files, so allow it to disable this behavior.
300 300 canonical_parent_order = attr.ib(default=False)
301 301 # can ellipsis commit be used
302 302 enable_ellipsis = attr.ib(default=False)
303 303
304 304 def copy(self):
305 305 new = super().copy()
306 306 new.compression_engine_options = self.compression_engine_options.copy()
307 307 return new
308 308
309 309
310 310 @attr.s()
311 311 class DataConfig(_Config):
312 312 """Hold configuration value about how the revlog data are read"""
313 313
314 314 # should we try to open the "pending" version of the revlog
315 315 try_pending = attr.ib(default=False)
316 316 # should we try to open the "splitted" version of the revlog
317 317 try_split = attr.ib(default=False)
318 318 # When True, indexfile should be opened with checkambig=True at writing,
319 319 # to avoid file stat ambiguity.
320 320 check_ambig = attr.ib(default=False)
321 321
322 322 # If true, use mmap instead of reading to deal with large index
323 323 mmap_large_index = attr.ib(default=False)
324 324 # how much data is large
325 325 mmap_index_threshold = attr.ib(default=None)
326 326 # How much data to read and cache into the raw revlog data cache.
327 327 chunk_cache_size = attr.ib(default=65536)
328 328
329 329 # The size of the uncompressed cache compared to the largest revision seen.
330 330 uncompressed_cache_factor = attr.ib(default=None)
331 331
332 332 # The number of chunk cached
333 333 uncompressed_cache_count = attr.ib(default=None)
334 334
335 335 # Allow sparse reading of the revlog data
336 336 with_sparse_read = attr.ib(default=False)
337 337 # minimal density of a sparse read chunk
338 338 sr_density_threshold = attr.ib(default=0.50)
339 339 # minimal size of data we skip when performing sparse read
340 340 sr_min_gap_size = attr.ib(default=262144)
341 341
342 342 # are delta encoded against arbitrary bases.
343 343 generaldelta = attr.ib(default=False)
344 344
345 345
346 346 @attr.s()
347 347 class DeltaConfig(_Config):
348 348 """Hold configuration value about how new delta are computed
349 349
350 350 Some attributes are duplicated from DataConfig to help havign each object
351 351 self contained.
352 352 """
353 353
354 354 # can delta be encoded against arbitrary bases.
355 355 general_delta = attr.ib(default=False)
356 356 # Allow sparse writing of the revlog data
357 357 sparse_revlog = attr.ib(default=False)
358 358 # maximum length of a delta chain
359 359 max_chain_len = attr.ib(default=None)
360 360 # Maximum distance between delta chain base start and end
361 361 max_deltachain_span = attr.ib(default=-1)
362 362 # If `upper_bound_comp` is not None, this is the expected maximal gain from
363 363 # compression for the data content.
364 364 upper_bound_comp = attr.ib(default=None)
365 365 # Should we try a delta against both parent
366 366 delta_both_parents = attr.ib(default=True)
367 367 # Test delta base candidate group by chunk of this maximal size.
368 368 candidate_group_chunk_size = attr.ib(default=0)
369 369 # Should we display debug information about delta computation
370 370 debug_delta = attr.ib(default=False)
371 371 # trust incoming delta by default
372 372 lazy_delta = attr.ib(default=True)
373 373 # trust the base of incoming delta by default
374 374 lazy_delta_base = attr.ib(default=False)
375 375
376 376
377 377 class _InnerRevlog:
378 378 """An inner layer of the revlog object
379 379
380 380 That layer exist to be able to delegate some operation to Rust, its
381 381 boundaries are arbitrary and based on what we can delegate to Rust.
382 382 """
383 383
384 384 opener: vfsmod.vfs
385 385
386 386 def __init__(
387 387 self,
388 388 opener: vfsmod.vfs,
389 389 index,
390 390 index_file,
391 391 data_file,
392 392 sidedata_file,
393 393 inline,
394 394 data_config,
395 395 delta_config,
396 396 feature_config,
397 397 chunk_cache,
398 398 default_compression_header,
399 399 ):
400 400 self.opener = opener
401 401 self.index = index
402 402
403 403 self.index_file = index_file
404 404 self.data_file = data_file
405 405 self.sidedata_file = sidedata_file
406 406 self.inline = inline
407 407 self.data_config = data_config
408 408 self.delta_config = delta_config
409 409 self.feature_config = feature_config
410 410
411 411 # used during diverted write.
412 412 self._orig_index_file = None
413 413
414 414 self._default_compression_header = default_compression_header
415 415
416 416 # index
417 417
418 418 # 3-tuple of file handles being used for active writing.
419 419 self._writinghandles = None
420 420
421 421 self._segmentfile = randomaccessfile.randomaccessfile(
422 422 self.opener,
423 423 (self.index_file if self.inline else self.data_file),
424 424 self.data_config.chunk_cache_size,
425 425 chunk_cache,
426 426 )
427 427 self._segmentfile_sidedata = randomaccessfile.randomaccessfile(
428 428 self.opener,
429 429 self.sidedata_file,
430 430 self.data_config.chunk_cache_size,
431 431 )
432 432
433 433 # revlog header -> revlog compressor
434 434 self._decompressors = {}
435 435 # 3-tuple of (node, rev, text) for a raw revision.
436 436 self._revisioncache = None
437 437
438 438 # cache some uncompressed chunks
439 439 # rev β†’ uncompressed_chunk
440 440 #
441 441 # the max cost is dynamically updated to be proportionnal to the
442 442 # size of revision we actually encounter.
443 443 self._uncompressed_chunk_cache = None
444 444 if self.data_config.uncompressed_cache_factor is not None:
445 445 self._uncompressed_chunk_cache = util.lrucachedict(
446 446 self.data_config.uncompressed_cache_count,
447 447 maxcost=65536, # some arbitrary initial value
448 448 )
449 449
450 450 self._delay_buffer = None
451 451
452 452 def __len__(self):
453 453 return len(self.index)
454 454
455 455 def clear_cache(self):
456 456 assert not self.is_delaying
457 457 self._revisioncache = None
458 458 if self._uncompressed_chunk_cache is not None:
459 459 self._uncompressed_chunk_cache.clear()
460 460 self._segmentfile.clear_cache()
461 461 self._segmentfile_sidedata.clear_cache()
462 462
463 463 @property
464 464 def canonical_index_file(self):
465 465 if self._orig_index_file is not None:
466 466 return self._orig_index_file
467 467 return self.index_file
468 468
469 469 @property
470 470 def is_delaying(self):
471 471 """is the revlog is currently delaying the visibility of written data?
472 472
473 473 The delaying mechanism can be either in-memory or written on disk in a
474 474 side-file."""
475 475 return (self._delay_buffer is not None) or (
476 476 self._orig_index_file is not None
477 477 )
478 478
479 479 # Derived from index values.
480 480
481 481 def start(self, rev):
482 482 """the offset of the data chunk for this revision"""
483 483 return int(self.index[rev][0] >> 16)
484 484
485 485 def length(self, rev):
486 486 """the length of the data chunk for this revision"""
487 487 return self.index[rev][1]
488 488
489 489 def end(self, rev):
490 490 """the end of the data chunk for this revision"""
491 491 return self.start(rev) + self.length(rev)
492 492
493 493 def deltaparent(self, rev):
494 494 """return deltaparent of the given revision"""
495 495 base = self.index[rev][3]
496 496 if base == rev:
497 497 return nullrev
498 498 elif self.delta_config.general_delta:
499 499 return base
500 500 else:
501 501 return rev - 1
502 502
503 503 def issnapshot(self, rev):
504 504 """tells whether rev is a snapshot"""
505 505 if not self.delta_config.sparse_revlog:
506 506 return self.deltaparent(rev) == nullrev
507 507 elif hasattr(self.index, 'issnapshot'):
508 508 # directly assign the method to cache the testing and access
509 509 self.issnapshot = self.index.issnapshot
510 510 return self.issnapshot(rev)
511 511 if rev == nullrev:
512 512 return True
513 513 entry = self.index[rev]
514 514 base = entry[3]
515 515 if base == rev:
516 516 return True
517 517 if base == nullrev:
518 518 return True
519 519 p1 = entry[5]
520 520 while self.length(p1) == 0:
521 521 b = self.deltaparent(p1)
522 522 if b == p1:
523 523 break
524 524 p1 = b
525 525 p2 = entry[6]
526 526 while self.length(p2) == 0:
527 527 b = self.deltaparent(p2)
528 528 if b == p2:
529 529 break
530 530 p2 = b
531 531 if base == p1 or base == p2:
532 532 return False
533 533 return self.issnapshot(base)
534 534
535 535 def _deltachain(self, rev, stoprev=None):
536 536 """Obtain the delta chain for a revision.
537 537
538 538 ``stoprev`` specifies a revision to stop at. If not specified, we
539 539 stop at the base of the chain.
540 540
541 541 Returns a 2-tuple of (chain, stopped) where ``chain`` is a list of
542 542 revs in ascending order and ``stopped`` is a bool indicating whether
543 543 ``stoprev`` was hit.
544 544 """
545 545 generaldelta = self.delta_config.general_delta
546 546 # Try C implementation.
547 547 try:
548 548 return self.index.deltachain(rev, stoprev, generaldelta)
549 549 except AttributeError:
550 550 pass
551 551
552 552 chain = []
553 553
554 554 # Alias to prevent attribute lookup in tight loop.
555 555 index = self.index
556 556
557 557 iterrev = rev
558 558 e = index[iterrev]
559 559 while iterrev != e[3] and iterrev != stoprev:
560 560 chain.append(iterrev)
561 561 if generaldelta:
562 562 iterrev = e[3]
563 563 else:
564 564 iterrev -= 1
565 565 e = index[iterrev]
566 566
567 567 if iterrev == stoprev:
568 568 stopped = True
569 569 else:
570 570 chain.append(iterrev)
571 571 stopped = False
572 572
573 573 chain.reverse()
574 574 return chain, stopped
575 575
576 576 @util.propertycache
577 577 def _compressor(self):
578 578 engine = util.compengines[self.feature_config.compression_engine]
579 579 return engine.revlogcompressor(
580 580 self.feature_config.compression_engine_options
581 581 )
582 582
583 583 @util.propertycache
584 584 def _decompressor(self):
585 585 """the default decompressor"""
586 586 if self._default_compression_header is None:
587 587 return None
588 588 t = self._default_compression_header
589 589 c = self._get_decompressor(t)
590 590 return c.decompress
591 591
592 592 def _get_decompressor(self, t: bytes):
593 593 try:
594 594 compressor = self._decompressors[t]
595 595 except KeyError:
596 596 try:
597 597 engine = util.compengines.forrevlogheader(t)
598 598 compressor = engine.revlogcompressor(
599 599 self.feature_config.compression_engine_options
600 600 )
601 601 self._decompressors[t] = compressor
602 602 except KeyError:
603 603 raise error.RevlogError(
604 604 _(b'unknown compression type %s') % binascii.hexlify(t)
605 605 )
606 606 return compressor
607 607
608 608 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
609 609 """Generate a possibly-compressed representation of data."""
610 610 if not data:
611 611 return b'', data
612 612
613 613 compressed = self._compressor.compress(data)
614 614
615 615 if compressed:
616 616 # The revlog compressor added the header in the returned data.
617 617 return b'', compressed
618 618
619 619 if data[0:1] == b'\0':
620 620 return b'', data
621 621 return b'u', data
622 622
623 623 def decompress(self, data: bytes):
624 624 """Decompress a revlog chunk.
625 625
626 626 The chunk is expected to begin with a header identifying the
627 627 format type so it can be routed to an appropriate decompressor.
628 628 """
629 629 if not data:
630 630 return data
631 631
632 632 # Revlogs are read much more frequently than they are written and many
633 633 # chunks only take microseconds to decompress, so performance is
634 634 # important here.
635 635 #
636 636 # We can make a few assumptions about revlogs:
637 637 #
638 638 # 1) the majority of chunks will be compressed (as opposed to inline
639 639 # raw data).
640 640 # 2) decompressing *any* data will likely by at least 10x slower than
641 641 # returning raw inline data.
642 642 # 3) we want to prioritize common and officially supported compression
643 643 # engines
644 644 #
645 645 # It follows that we want to optimize for "decompress compressed data
646 646 # when encoded with common and officially supported compression engines"
647 647 # case over "raw data" and "data encoded by less common or non-official
648 648 # compression engines." That is why we have the inline lookup first
649 649 # followed by the compengines lookup.
650 650 #
651 651 # According to `hg perfrevlogchunks`, this is ~0.5% faster for zlib
652 652 # compressed chunks. And this matters for changelog and manifest reads.
653 653 t = data[0:1]
654 654
655 655 if t == b'x':
656 656 try:
657 657 return _zlibdecompress(data)
658 658 except zlib.error as e:
659 659 raise error.RevlogError(
660 660 _(b'revlog decompress error: %s')
661 661 % stringutil.forcebytestr(e)
662 662 )
663 663 # '\0' is more common than 'u' so it goes first.
664 664 elif t == b'\0':
665 665 return data
666 666 elif t == b'u':
667 667 return util.buffer(data, 1)
668 668
669 669 compressor = self._get_decompressor(t)
670 670
671 671 return compressor.decompress(data)
672 672
673 673 @contextlib.contextmanager
674 674 def reading(self):
675 675 """Context manager that keeps data and sidedata files open for reading"""
676 676 if len(self.index) == 0:
677 677 yield # nothing to be read
678 678 elif self._delay_buffer is not None and self.inline:
679 679 msg = "revlog with delayed write should not be inline"
680 680 raise error.ProgrammingError(msg)
681 681 else:
682 682 with self._segmentfile.reading():
683 683 with self._segmentfile_sidedata.reading():
684 684 yield
685 685
686 686 @property
687 687 def is_writing(self):
688 688 """True is a writing context is open"""
689 689 return self._writinghandles is not None
690 690
691 691 @property
692 692 def is_open(self):
693 693 """True if any file handle is being held
694 694
695 695 Used for assert and debug in the python code"""
696 696 return self._segmentfile.is_open or self._segmentfile_sidedata.is_open
697 697
698 698 @contextlib.contextmanager
699 699 def writing(self, transaction, data_end=None, sidedata_end=None):
700 700 """Open the revlog files for writing
701 701
702 702 Add content to a revlog should be done within such context.
703 703 """
704 704 if self.is_writing:
705 705 yield
706 706 else:
707 707 ifh = dfh = sdfh = None
708 708 try:
709 709 r = len(self.index)
710 710 # opening the data file.
711 711 dsize = 0
712 712 if r:
713 713 dsize = self.end(r - 1)
714 714 dfh = None
715 715 if not self.inline:
716 716 try:
717 717 dfh = self.opener(self.data_file, mode=b"r+")
718 718 if data_end is None:
719 719 dfh.seek(0, os.SEEK_END)
720 720 else:
721 721 dfh.seek(data_end, os.SEEK_SET)
722 722 except FileNotFoundError:
723 723 dfh = self.opener(self.data_file, mode=b"w+")
724 724 transaction.add(self.data_file, dsize)
725 725 if self.sidedata_file is not None:
726 726 assert sidedata_end is not None
727 727 # revlog-v2 does not inline, help Pytype
728 728 assert dfh is not None
729 729 try:
730 730 sdfh = self.opener(self.sidedata_file, mode=b"r+")
731 731 dfh.seek(sidedata_end, os.SEEK_SET)
732 732 except FileNotFoundError:
733 733 sdfh = self.opener(self.sidedata_file, mode=b"w+")
734 734 transaction.add(self.sidedata_file, sidedata_end)
735 735
736 736 # opening the index file.
737 737 isize = r * self.index.entry_size
738 738 ifh = self.__index_write_fp()
739 739 if self.inline:
740 740 transaction.add(self.index_file, dsize + isize)
741 741 else:
742 742 transaction.add(self.index_file, isize)
743 743 # exposing all file handle for writing.
744 744 self._writinghandles = (ifh, dfh, sdfh)
745 745 self._segmentfile.writing_handle = ifh if self.inline else dfh
746 746 self._segmentfile_sidedata.writing_handle = sdfh
747 747 yield
748 748 finally:
749 749 self._writinghandles = None
750 750 self._segmentfile.writing_handle = None
751 751 self._segmentfile_sidedata.writing_handle = None
752 752 if dfh is not None:
753 753 dfh.close()
754 754 if sdfh is not None:
755 755 sdfh.close()
756 756 # closing the index file last to avoid exposing referent to
757 757 # potential unflushed data content.
758 758 if ifh is not None:
759 759 ifh.close()
760 760
761 761 def __index_write_fp(self, index_end=None):
762 762 """internal method to open the index file for writing
763 763
764 764 You should not use this directly and use `_writing` instead
765 765 """
766 766 try:
767 767 if self._delay_buffer is None:
768 768 f = self.opener(
769 769 self.index_file,
770 770 mode=b"r+",
771 771 checkambig=self.data_config.check_ambig,
772 772 )
773 773 else:
774 774 # check_ambig affect we way we open file for writing, however
775 775 # here, we do not actually open a file for writting as write
776 776 # will appened to a delay_buffer. So check_ambig is not
777 777 # meaningful and unneeded here.
778 778 f = randomaccessfile.appender(
779 779 self.opener, self.index_file, b"r+", self._delay_buffer
780 780 )
781 781 if index_end is None:
782 782 f.seek(0, os.SEEK_END)
783 783 else:
784 784 f.seek(index_end, os.SEEK_SET)
785 785 return f
786 786 except FileNotFoundError:
787 787 if self._delay_buffer is None:
788 788 return self.opener(
789 789 self.index_file,
790 790 mode=b"w+",
791 791 checkambig=self.data_config.check_ambig,
792 792 )
793 793 else:
794 794 return randomaccessfile.appender(
795 795 self.opener, self.index_file, b"w+", self._delay_buffer
796 796 )
797 797
798 798 def __index_new_fp(self):
799 799 """internal method to create a new index file for writing
800 800
801 801 You should not use this unless you are upgrading from inline revlog
802 802 """
803 803 return self.opener(
804 804 self.index_file,
805 805 mode=b"w",
806 806 checkambig=self.data_config.check_ambig,
807 807 )
808 808
809 809 def split_inline(self, tr, header, new_index_file_path=None):
810 810 """split the data of an inline revlog into an index and a data file"""
811 811 assert self._delay_buffer is None
812 812 existing_handles = False
813 813 if self._writinghandles is not None:
814 814 existing_handles = True
815 815 fp = self._writinghandles[0]
816 816 fp.flush()
817 817 fp.close()
818 818 # We can't use the cached file handle after close(). So prevent
819 819 # its usage.
820 820 self._writinghandles = None
821 821 self._segmentfile.writing_handle = None
822 822 # No need to deal with sidedata writing handle as it is only
823 823 # relevant with revlog-v2 which is never inline, not reaching
824 824 # this code
825 825
826 826 new_dfh = self.opener(self.data_file, mode=b"w+")
827 827 new_dfh.truncate(0) # drop any potentially existing data
828 828 try:
829 829 with self.reading():
830 830 for r in range(len(self.index)):
831 831 new_dfh.write(self.get_segment_for_revs(r, r)[1])
832 832 new_dfh.flush()
833 833
834 834 if new_index_file_path is not None:
835 835 self.index_file = new_index_file_path
836 836 with self.__index_new_fp() as fp:
837 837 self.inline = False
838 838 for i in range(len(self.index)):
839 839 e = self.index.entry_binary(i)
840 840 if i == 0:
841 841 packed_header = self.index.pack_header(header)
842 842 e = packed_header + e
843 843 fp.write(e)
844 844
845 845 # If we don't use side-write, the temp file replace the real
846 846 # index when we exit the context manager
847 847
848 848 self._segmentfile = randomaccessfile.randomaccessfile(
849 849 self.opener,
850 850 self.data_file,
851 851 self.data_config.chunk_cache_size,
852 852 )
853 853
854 854 if existing_handles:
855 855 # switched from inline to conventional reopen the index
856 856 ifh = self.__index_write_fp()
857 857 self._writinghandles = (ifh, new_dfh, None)
858 858 self._segmentfile.writing_handle = new_dfh
859 859 new_dfh = None
860 860 # No need to deal with sidedata writing handle as it is only
861 861 # relevant with revlog-v2 which is never inline, not reaching
862 862 # this code
863 863 finally:
864 864 if new_dfh is not None:
865 865 new_dfh.close()
866 866 return self.index_file
867 867
868 868 def get_segment_for_revs(self, startrev, endrev):
869 869 """Obtain a segment of raw data corresponding to a range of revisions.
870 870
871 871 Accepts the start and end revisions and an optional already-open
872 872 file handle to be used for reading. If the file handle is read, its
873 873 seek position will not be preserved.
874 874
875 875 Requests for data may be satisfied by a cache.
876 876
877 877 Returns a 2-tuple of (offset, data) for the requested range of
878 878 revisions. Offset is the integer offset from the beginning of the
879 879 revlog and data is a str or buffer of the raw byte data.
880 880
881 881 Callers will need to call ``self.start(rev)`` and ``self.length(rev)``
882 882 to determine where each revision's data begins and ends.
883 883
884 884 API: we should consider making this a private part of the InnerRevlog
885 885 at some point.
886 886 """
887 887 # Inlined self.start(startrev) & self.end(endrev) for perf reasons
888 888 # (functions are expensive).
889 889 index = self.index
890 890 istart = index[startrev]
891 891 start = int(istart[0] >> 16)
892 892 if startrev == endrev:
893 893 end = start + istart[1]
894 894 else:
895 895 iend = index[endrev]
896 896 end = int(iend[0] >> 16) + iend[1]
897 897
898 898 if self.inline:
899 899 start += (startrev + 1) * self.index.entry_size
900 900 end += (endrev + 1) * self.index.entry_size
901 901 length = end - start
902 902
903 903 return start, self._segmentfile.read_chunk(start, length)
904 904
905 905 def _chunk(self, rev):
906 906 """Obtain a single decompressed chunk for a revision.
907 907
908 908 Accepts an integer revision and an optional already-open file handle
909 909 to be used for reading. If used, the seek position of the file will not
910 910 be preserved.
911 911
912 912 Returns a str holding uncompressed data for the requested revision.
913 913 """
914 914 if self._uncompressed_chunk_cache is not None:
915 915 uncomp = self._uncompressed_chunk_cache.get(rev)
916 916 if uncomp is not None:
917 917 return uncomp
918 918
919 919 compression_mode = self.index[rev][10]
920 920 data = self.get_segment_for_revs(rev, rev)[1]
921 921 if compression_mode == COMP_MODE_PLAIN:
922 922 uncomp = data
923 923 elif compression_mode == COMP_MODE_DEFAULT:
924 924 uncomp = self._decompressor(data)
925 925 elif compression_mode == COMP_MODE_INLINE:
926 926 uncomp = self.decompress(data)
927 927 else:
928 928 msg = b'unknown compression mode %d'
929 929 msg %= compression_mode
930 930 raise error.RevlogError(msg)
931 931 if self._uncompressed_chunk_cache is not None:
932 932 self._uncompressed_chunk_cache.insert(rev, uncomp, cost=len(uncomp))
933 933 return uncomp
934 934
935 935 def _chunks(self, revs, targetsize=None):
936 936 """Obtain decompressed chunks for the specified revisions.
937 937
938 938 Accepts an iterable of numeric revisions that are assumed to be in
939 939 ascending order.
940 940
941 941 This function is similar to calling ``self._chunk()`` multiple times,
942 942 but is faster.
943 943
944 944 Returns a list with decompressed data for each requested revision.
945 945 """
946 946 if not revs:
947 947 return []
948 948 start = self.start
949 949 length = self.length
950 950 inline = self.inline
951 951 iosize = self.index.entry_size
952 952 buffer = util.buffer
953 953
954 954 fetched_revs = []
955 955 fadd = fetched_revs.append
956 956
957 957 chunks = []
958 958 ladd = chunks.append
959 959
960 960 if self._uncompressed_chunk_cache is None:
961 961 fetched_revs = revs
962 962 else:
963 963 for rev in revs:
964 964 cached_value = self._uncompressed_chunk_cache.get(rev)
965 965 if cached_value is None:
966 966 fadd(rev)
967 967 else:
968 968 ladd((rev, cached_value))
969 969
970 970 if not fetched_revs:
971 971 slicedchunks = ()
972 972 elif not self.data_config.with_sparse_read:
973 973 slicedchunks = (fetched_revs,)
974 974 else:
975 975 slicedchunks = deltautil.slicechunk(
976 976 self,
977 977 fetched_revs,
978 978 targetsize=targetsize,
979 979 )
980 980
981 981 for revschunk in slicedchunks:
982 982 firstrev = revschunk[0]
983 983 # Skip trailing revisions with empty diff
984 984 for lastrev in revschunk[::-1]:
985 985 if length(lastrev) != 0:
986 986 break
987 987
988 988 try:
989 989 offset, data = self.get_segment_for_revs(firstrev, lastrev)
990 990 except OverflowError:
991 991 # issue4215 - we can't cache a run of chunks greater than
992 992 # 2G on Windows
993 993 for rev in revschunk:
994 994 ladd((rev, self._chunk(rev)))
995 995
996 996 decomp = self.decompress
997 997 # self._decompressor might be None, but will not be used in that case
998 998 def_decomp = self._decompressor
999 999 for rev in revschunk:
1000 1000 chunkstart = start(rev)
1001 1001 if inline:
1002 1002 chunkstart += (rev + 1) * iosize
1003 1003 chunklength = length(rev)
1004 1004 comp_mode = self.index[rev][10]
1005 1005 c = buffer(data, chunkstart - offset, chunklength)
1006 1006 if comp_mode == COMP_MODE_PLAIN:
1007 1007 c = c
1008 1008 elif comp_mode == COMP_MODE_INLINE:
1009 1009 c = decomp(c)
1010 1010 elif comp_mode == COMP_MODE_DEFAULT:
1011 1011 c = def_decomp(c)
1012 1012 else:
1013 1013 msg = b'unknown compression mode %d'
1014 1014 msg %= comp_mode
1015 1015 raise error.RevlogError(msg)
1016 1016 ladd((rev, c))
1017 1017 if self._uncompressed_chunk_cache is not None:
1018 1018 self._uncompressed_chunk_cache.insert(rev, c, len(c))
1019 1019
1020 1020 chunks.sort()
1021 1021 return [x[1] for x in chunks]
1022 1022
1023 1023 def raw_text(self, node, rev) -> bytes:
1024 1024 """return the possibly unvalidated rawtext for a revision
1025 1025
1026 1026 returns rawtext
1027 1027 """
1028 1028
1029 1029 # revision in the cache (could be useful to apply delta)
1030 1030 cachedrev = None
1031 1031 # An intermediate text to apply deltas to
1032 1032 basetext = None
1033 1033
1034 1034 # Check if we have the entry in cache
1035 1035 # The cache entry looks like (node, rev, rawtext)
1036 1036 if self._revisioncache:
1037 1037 cachedrev = self._revisioncache[1]
1038 1038
1039 1039 chain, stopped = self._deltachain(rev, stoprev=cachedrev)
1040 1040 if stopped:
1041 1041 basetext = self._revisioncache[2]
1042 1042
1043 1043 # drop cache to save memory, the caller is expected to
1044 1044 # update self._inner._revisioncache after validating the text
1045 1045 self._revisioncache = None
1046 1046
1047 1047 targetsize = None
1048 1048 rawsize = self.index[rev][2]
1049 1049 if 0 <= rawsize:
1050 1050 targetsize = 4 * rawsize
1051 1051
1052 1052 if self._uncompressed_chunk_cache is not None:
1053 1053 # dynamically update the uncompressed_chunk_cache size to the
1054 1054 # largest revision we saw in this revlog.
1055 1055 factor = self.data_config.uncompressed_cache_factor
1056 1056 candidate_size = rawsize * factor
1057 1057 if candidate_size > self._uncompressed_chunk_cache.maxcost:
1058 1058 self._uncompressed_chunk_cache.maxcost = candidate_size
1059 1059
1060 1060 bins = self._chunks(chain, targetsize=targetsize)
1061 1061 if basetext is None:
1062 1062 basetext = bytes(bins[0])
1063 1063 bins = bins[1:]
1064 1064
1065 1065 rawtext = mdiff.patches(basetext, bins)
1066 1066 del basetext # let us have a chance to free memory early
1067 1067 return rawtext
1068 1068
1069 1069 def sidedata(self, rev, sidedata_end):
1070 1070 """Return the sidedata for a given revision number."""
1071 1071 index_entry = self.index[rev]
1072 1072 sidedata_offset = index_entry[8]
1073 1073 sidedata_size = index_entry[9]
1074 1074
1075 1075 if self.inline:
1076 1076 sidedata_offset += self.index.entry_size * (1 + rev)
1077 1077 if sidedata_size == 0:
1078 1078 return {}
1079 1079
1080 1080 if sidedata_end < sidedata_offset + sidedata_size:
1081 1081 filename = self.sidedata_file
1082 1082 end = sidedata_end
1083 1083 offset = sidedata_offset
1084 1084 length = sidedata_size
1085 1085 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
1086 1086 raise error.RevlogError(m)
1087 1087
1088 1088 comp_segment = self._segmentfile_sidedata.read_chunk(
1089 1089 sidedata_offset, sidedata_size
1090 1090 )
1091 1091
1092 1092 comp = self.index[rev][11]
1093 1093 if comp == COMP_MODE_PLAIN:
1094 1094 segment = comp_segment
1095 1095 elif comp == COMP_MODE_DEFAULT:
1096 1096 segment = self._decompressor(comp_segment)
1097 1097 elif comp == COMP_MODE_INLINE:
1098 1098 segment = self.decompress(comp_segment)
1099 1099 else:
1100 1100 msg = b'unknown compression mode %d'
1101 1101 msg %= comp
1102 1102 raise error.RevlogError(msg)
1103 1103
1104 1104 sidedata = sidedatautil.deserialize_sidedata(segment)
1105 1105 return sidedata
1106 1106
1107 1107 def write_entry(
1108 1108 self,
1109 1109 transaction,
1110 1110 entry,
1111 1111 data,
1112 1112 link,
1113 1113 offset,
1114 1114 sidedata,
1115 1115 sidedata_offset,
1116 1116 index_end,
1117 1117 data_end,
1118 1118 sidedata_end,
1119 1119 ):
1120 1120 # Files opened in a+ mode have inconsistent behavior on various
1121 1121 # platforms. Windows requires that a file positioning call be made
1122 1122 # when the file handle transitions between reads and writes. See
1123 1123 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
1124 1124 # platforms, Python or the platform itself can be buggy. Some versions
1125 1125 # of Solaris have been observed to not append at the end of the file
1126 1126 # if the file was seeked to before the end. See issue4943 for more.
1127 1127 #
1128 1128 # We work around this issue by inserting a seek() before writing.
1129 1129 # Note: This is likely not necessary on Python 3. However, because
1130 1130 # the file handle is reused for reads and may be seeked there, we need
1131 1131 # to be careful before changing this.
1132 1132 if self._writinghandles is None:
1133 1133 msg = b'adding revision outside `revlog._writing` context'
1134 1134 raise error.ProgrammingError(msg)
1135 1135 ifh, dfh, sdfh = self._writinghandles
1136 1136 if index_end is None:
1137 1137 ifh.seek(0, os.SEEK_END)
1138 1138 else:
1139 1139 ifh.seek(index_end, os.SEEK_SET)
1140 1140 if dfh:
1141 1141 if data_end is None:
1142 1142 dfh.seek(0, os.SEEK_END)
1143 1143 else:
1144 1144 dfh.seek(data_end, os.SEEK_SET)
1145 1145 if sdfh:
1146 1146 sdfh.seek(sidedata_end, os.SEEK_SET)
1147 1147
1148 1148 curr = len(self.index) - 1
1149 1149 if not self.inline:
1150 1150 transaction.add(self.data_file, offset)
1151 1151 if self.sidedata_file:
1152 1152 transaction.add(self.sidedata_file, sidedata_offset)
1153 1153 transaction.add(self.canonical_index_file, curr * len(entry))
1154 1154 if data[0]:
1155 1155 dfh.write(data[0])
1156 1156 dfh.write(data[1])
1157 1157 if sidedata:
1158 1158 sdfh.write(sidedata)
1159 1159 if self._delay_buffer is None:
1160 1160 ifh.write(entry)
1161 1161 else:
1162 1162 self._delay_buffer.append(entry)
1163 1163 elif self._delay_buffer is not None:
1164 1164 msg = b'invalid delayed write on inline revlog'
1165 1165 raise error.ProgrammingError(msg)
1166 1166 else:
1167 1167 offset += curr * self.index.entry_size
1168 1168 transaction.add(self.canonical_index_file, offset)
1169 1169 assert not sidedata
1170 1170 ifh.write(entry)
1171 1171 ifh.write(data[0])
1172 1172 ifh.write(data[1])
1173 1173 return (
1174 1174 ifh.tell(),
1175 1175 dfh.tell() if dfh else None,
1176 1176 sdfh.tell() if sdfh else None,
1177 1177 )
1178 1178
1179 1179 def _divert_index(self):
1180 1180 index_file = self.index_file
1181 1181 # when we encounter a legacy inline-changelog, split it. However it is
1182 1182 # important to use the expected filename for pending content
1183 1183 # (<radix>.a) otherwise hooks won't be seeing the content of the
1184 1184 # pending transaction.
1185 1185 if index_file.endswith(b'.s'):
1186 1186 index_file = self.index_file[:-2]
1187 1187 return index_file + b'.a'
1188 1188
1189 1189 def delay(self):
1190 1190 assert not self.is_open
1191 1191 if self.inline:
1192 1192 msg = "revlog with delayed write should not be inline"
1193 1193 raise error.ProgrammingError(msg)
1194 1194 if self._delay_buffer is not None or self._orig_index_file is not None:
1195 1195 # delay or divert already in place
1196 1196 return None
1197 1197 elif len(self.index) == 0:
1198 1198 self._orig_index_file = self.index_file
1199 1199 self.index_file = self._divert_index()
1200 1200 assert self._orig_index_file is not None
1201 1201 assert self.index_file is not None
1202 1202 if self.opener.exists(self.index_file):
1203 1203 self.opener.unlink(self.index_file)
1204 1204 return self.index_file
1205 1205 else:
1206 1206 self._delay_buffer = []
1207 1207 return None
1208 1208
1209 1209 def write_pending(self):
1210 1210 assert not self.is_open
1211 1211 if self.inline:
1212 1212 msg = "revlog with delayed write should not be inline"
1213 1213 raise error.ProgrammingError(msg)
1214 1214 if self._orig_index_file is not None:
1215 1215 return None, True
1216 1216 any_pending = False
1217 1217 pending_index_file = self._divert_index()
1218 1218 if self.opener.exists(pending_index_file):
1219 1219 self.opener.unlink(pending_index_file)
1220 1220 util.copyfile(
1221 1221 self.opener.join(self.index_file),
1222 1222 self.opener.join(pending_index_file),
1223 1223 )
1224 1224 if self._delay_buffer:
1225 1225 with self.opener(pending_index_file, b'r+') as ifh:
1226 1226 ifh.seek(0, os.SEEK_END)
1227 1227 ifh.write(b"".join(self._delay_buffer))
1228 1228 any_pending = True
1229 1229 self._delay_buffer = None
1230 1230 self._orig_index_file = self.index_file
1231 1231 self.index_file = pending_index_file
1232 1232 return self.index_file, any_pending
1233 1233
1234 1234 def finalize_pending(self):
1235 1235 assert not self.is_open
1236 1236 if self.inline:
1237 1237 msg = "revlog with delayed write should not be inline"
1238 1238 raise error.ProgrammingError(msg)
1239 1239
1240 1240 delay = self._delay_buffer is not None
1241 1241 divert = self._orig_index_file is not None
1242 1242
1243 1243 if delay and divert:
1244 1244 assert False, "unreachable"
1245 1245 elif delay:
1246 1246 if self._delay_buffer:
1247 1247 with self.opener(self.index_file, b'r+') as ifh:
1248 1248 ifh.seek(0, os.SEEK_END)
1249 1249 ifh.write(b"".join(self._delay_buffer))
1250 1250 self._delay_buffer = None
1251 1251 elif divert:
1252 1252 if self.opener.exists(self.index_file):
1253 1253 self.opener.rename(
1254 1254 self.index_file,
1255 1255 self._orig_index_file,
1256 1256 checkambig=True,
1257 1257 )
1258 1258 self.index_file = self._orig_index_file
1259 1259 self._orig_index_file = None
1260 1260 else:
1261 1261 msg = b"not delay or divert found on this revlog"
1262 1262 raise error.ProgrammingError(msg)
1263 1263 return self.canonical_index_file
1264 1264
1265 1265
1266 1266 class revlog:
1267 1267 """
1268 1268 the underlying revision storage object
1269 1269
1270 1270 A revlog consists of two parts, an index and the revision data.
1271 1271
1272 1272 The index is a file with a fixed record size containing
1273 1273 information on each revision, including its nodeid (hash), the
1274 1274 nodeids of its parents, the position and offset of its data within
1275 1275 the data file, and the revision it's based on. Finally, each entry
1276 1276 contains a linkrev entry that can serve as a pointer to external
1277 1277 data.
1278 1278
1279 1279 The revision data itself is a linear collection of data chunks.
1280 1280 Each chunk represents a revision and is usually represented as a
1281 1281 delta against the previous chunk. To bound lookup time, runs of
1282 1282 deltas are limited to about 2 times the length of the original
1283 1283 version data. This makes retrieval of a version proportional to
1284 1284 its size, or O(1) relative to the number of revisions.
1285 1285
1286 1286 Both pieces of the revlog are written to in an append-only
1287 1287 fashion, which means we never need to rewrite a file to insert or
1288 1288 remove data, and can use some simple techniques to avoid the need
1289 1289 for locking while reading.
1290 1290
1291 1291 If checkambig, indexfile is opened with checkambig=True at
1292 1292 writing, to avoid file stat ambiguity.
1293 1293
1294 1294 If mmaplargeindex is True, and an mmapindexthreshold is set, the
1295 1295 index will be mmapped rather than read if it is larger than the
1296 1296 configured threshold.
1297 1297
1298 1298 If censorable is True, the revlog can have censored revisions.
1299 1299
1300 1300 If `upperboundcomp` is not None, this is the expected maximal gain from
1301 1301 compression for the data content.
1302 1302
1303 1303 `concurrencychecker` is an optional function that receives 3 arguments: a
1304 1304 file handle, a filename, and an expected position. It should check whether
1305 1305 the current position in the file handle is valid, and log/warn/fail (by
1306 1306 raising).
1307 1307
1308 1308 See mercurial/revlogutils/contants.py for details about the content of an
1309 1309 index entry.
1310 1310 """
1311 1311
1312 1312 _flagserrorclass = error.RevlogError
1313 1313 _inner: "_InnerRevlog"
1314 1314
1315 1315 opener: vfsmod.vfs
1316 1316
1317 1317 @staticmethod
1318 1318 def is_inline_index(header_bytes):
1319 1319 """Determine if a revlog is inline from the initial bytes of the index"""
1320 1320 if len(header_bytes) == 0:
1321 1321 return True
1322 1322
1323 1323 header = INDEX_HEADER.unpack(header_bytes)[0]
1324 1324
1325 1325 _format_flags = header & ~0xFFFF
1326 1326 _format_version = header & 0xFFFF
1327 1327
1328 1328 features = FEATURES_BY_VERSION[_format_version]
1329 1329 return features[b'inline'](_format_flags)
1330 1330
1331 1331 _docket_file: Optional[bytes]
1332 1332
1333 1333 def __init__(
1334 1334 self,
1335 1335 opener: vfsmod.vfs,
1336 1336 target,
1337 1337 radix,
1338 1338 postfix=None, # only exist for `tmpcensored` now
1339 1339 checkambig=False,
1340 1340 mmaplargeindex=False,
1341 1341 censorable=False,
1342 1342 upperboundcomp=None,
1343 1343 persistentnodemap=False,
1344 1344 concurrencychecker=None,
1345 1345 trypending=False,
1346 1346 try_split=False,
1347 1347 canonical_parent_order=True,
1348 1348 data_config=None,
1349 1349 delta_config=None,
1350 1350 feature_config=None,
1351 1351 may_inline=True, # may inline new revlog
1352 1352 ):
1353 1353 """
1354 1354 create a revlog object
1355 1355
1356 1356 opener is a function that abstracts the file opening operation
1357 1357 and can be used to implement COW semantics or the like.
1358 1358
1359 1359 `target`: a (KIND, ID) tuple that identify the content stored in
1360 1360 this revlog. It help the rest of the code to understand what the revlog
1361 1361 is about without having to resort to heuristic and index filename
1362 1362 analysis. Note: that this must be reliably be set by normal code, but
1363 1363 that test, debug, or performance measurement code might not set this to
1364 1364 accurate value.
1365 1365 """
1366 1366
1367 1367 self.radix = radix
1368 1368
1369 1369 self._docket_file = None
1370 1370 self._indexfile = None
1371 1371 self._datafile = None
1372 1372 self._sidedatafile = None
1373 1373 self._nodemap_file = None
1374 1374 self.postfix = postfix
1375 1375 self._trypending = trypending
1376 1376 self._try_split = try_split
1377 1377 self._may_inline = may_inline
1378 1378 self.opener = opener
1379 1379 if persistentnodemap:
1380 1380 self._nodemap_file = nodemaputil.get_nodemap_file(self)
1381 1381
1382 1382 assert target[0] in ALL_KINDS
1383 1383 assert len(target) == 2
1384 1384 self.target = target
1385 1385 if feature_config is not None:
1386 1386 self.feature_config = feature_config.copy()
1387 1387 elif b'feature-config' in self.opener.options:
1388 1388 self.feature_config = self.opener.options[b'feature-config'].copy()
1389 1389 else:
1390 1390 self.feature_config = FeatureConfig()
1391 1391 self.feature_config.censorable = censorable
1392 1392 self.feature_config.canonical_parent_order = canonical_parent_order
1393 1393 if data_config is not None:
1394 1394 self.data_config = data_config.copy()
1395 1395 elif b'data-config' in self.opener.options:
1396 1396 self.data_config = self.opener.options[b'data-config'].copy()
1397 1397 else:
1398 1398 self.data_config = DataConfig()
1399 1399 self.data_config.check_ambig = checkambig
1400 1400 self.data_config.mmap_large_index = mmaplargeindex
1401 1401 if delta_config is not None:
1402 1402 self.delta_config = delta_config.copy()
1403 1403 elif b'delta-config' in self.opener.options:
1404 1404 self.delta_config = self.opener.options[b'delta-config'].copy()
1405 1405 else:
1406 1406 self.delta_config = DeltaConfig()
1407 1407 self.delta_config.upper_bound_comp = upperboundcomp
1408 1408
1409 1409 # Maps rev to chain base rev.
1410 1410 self._chainbasecache = util.lrucachedict(100)
1411 1411
1412 1412 self.index = None
1413 1413 self._docket = None
1414 1414 self._nodemap_docket = None
1415 1415 # Mapping of partial identifiers to full nodes.
1416 1416 self._pcache = {}
1417 1417
1418 1418 # other optionnals features
1419 1419
1420 1420 # Make copy of flag processors so each revlog instance can support
1421 1421 # custom flags.
1422 1422 self._flagprocessors = dict(flagutil.flagprocessors)
1423 1423 # prevent nesting of addgroup
1424 1424 self._adding_group = None
1425 1425
1426 1426 chunk_cache = self._loadindex()
1427 1427 self._load_inner(chunk_cache)
1428 1428 self._concurrencychecker = concurrencychecker
1429 1429
1430 1430 def _init_opts(self):
1431 1431 """process options (from above/config) to setup associated default revlog mode
1432 1432
1433 1433 These values might be affected when actually reading on disk information.
1434 1434
1435 1435 The relevant values are returned for use in _loadindex().
1436 1436
1437 1437 * newversionflags:
1438 1438 version header to use if we need to create a new revlog
1439 1439
1440 1440 * mmapindexthreshold:
1441 1441 minimal index size for start to use mmap
1442 1442
1443 1443 * force_nodemap:
1444 1444 force the usage of a "development" version of the nodemap code
1445 1445 """
1446 1446 opts = self.opener.options
1447 1447
1448 1448 if b'changelogv2' in opts and self.revlog_kind == KIND_CHANGELOG:
1449 1449 new_header = CHANGELOGV2
1450 1450 compute_rank = opts.get(b'changelogv2.compute-rank', True)
1451 1451 self.feature_config.compute_rank = compute_rank
1452 1452 elif b'revlogv2' in opts:
1453 1453 new_header = REVLOGV2
1454 1454 elif b'revlogv1' in opts:
1455 1455 new_header = REVLOGV1
1456 1456 if self._may_inline:
1457 1457 new_header |= FLAG_INLINE_DATA
1458 1458 if b'generaldelta' in opts:
1459 1459 new_header |= FLAG_GENERALDELTA
1460 1460 elif b'revlogv0' in self.opener.options:
1461 1461 new_header = REVLOGV0
1462 1462 else:
1463 1463 new_header = REVLOG_DEFAULT_VERSION
1464 1464
1465 1465 mmapindexthreshold = None
1466 1466 if self.data_config.mmap_large_index:
1467 1467 mmapindexthreshold = self.data_config.mmap_index_threshold
1468 1468 if self.feature_config.enable_ellipsis:
1469 1469 self._flagprocessors[REVIDX_ELLIPSIS] = ellipsisprocessor
1470 1470
1471 1471 # revlog v0 doesn't have flag processors
1472 1472 for flag, processor in opts.get(b'flagprocessors', {}).items():
1473 1473 flagutil.insertflagprocessor(flag, processor, self._flagprocessors)
1474 1474
1475 1475 chunk_cache_size = self.data_config.chunk_cache_size
1476 1476 if chunk_cache_size <= 0:
1477 1477 raise error.RevlogError(
1478 1478 _(b'revlog chunk cache size %r is not greater than 0')
1479 1479 % chunk_cache_size
1480 1480 )
1481 1481 elif chunk_cache_size & (chunk_cache_size - 1):
1482 1482 raise error.RevlogError(
1483 1483 _(b'revlog chunk cache size %r is not a power of 2')
1484 1484 % chunk_cache_size
1485 1485 )
1486 1486 force_nodemap = opts.get(b'devel-force-nodemap', False)
1487 1487 return new_header, mmapindexthreshold, force_nodemap
1488 1488
1489 1489 def _get_data(self, filepath, mmap_threshold, size=None):
1490 1490 """return a file content with or without mmap
1491 1491
1492 1492 If the file is missing return the empty string"""
1493 1493 try:
1494 1494 with self.opener(filepath) as fp:
1495 1495 if mmap_threshold is not None:
1496 1496 file_size = self.opener.fstat(fp).st_size
1497 1497 if (
1498 1498 file_size >= mmap_threshold
1499 1499 and self.opener.is_mmap_safe(filepath)
1500 1500 ):
1501 1501 if size is not None:
1502 1502 # avoid potentiel mmap crash
1503 1503 size = min(file_size, size)
1504 1504 # TODO: should .close() to release resources without
1505 1505 # relying on Python GC
1506 1506 if size is None:
1507 1507 return util.buffer(util.mmapread(fp))
1508 1508 else:
1509 1509 return util.buffer(util.mmapread(fp, size))
1510 1510 if size is None:
1511 1511 return fp.read()
1512 1512 else:
1513 1513 return fp.read(size)
1514 1514 except FileNotFoundError:
1515 1515 return b''
1516 1516
1517 1517 def get_streams(self, max_linkrev, force_inline=False):
1518 1518 """return a list of streams that represent this revlog
1519 1519
1520 1520 This is used by stream-clone to do bytes to bytes copies of a repository.
1521 1521
1522 1522 This streams data for all revisions that refer to a changelog revision up
1523 1523 to `max_linkrev`.
1524 1524
1525 1525 If `force_inline` is set, it enforces that the stream will represent an inline revlog.
1526 1526
1527 1527 It returns is a list of three-tuple:
1528 1528
1529 1529 [
1530 1530 (filename, bytes_stream, stream_size),
1531 1531 …
1532 1532 ]
1533 1533 """
1534 1534 n = len(self)
1535 1535 index = self.index
1536 1536 while n > 0:
1537 1537 linkrev = index[n - 1][4]
1538 1538 if linkrev < max_linkrev:
1539 1539 break
1540 1540 # note: this loop will rarely go through multiple iterations, since
1541 1541 # it only traverses commits created during the current streaming
1542 1542 # pull operation.
1543 1543 #
1544 1544 # If this become a problem, using a binary search should cap the
1545 1545 # runtime of this.
1546 1546 n = n - 1
1547 1547 if n == 0:
1548 1548 # no data to send
1549 1549 return []
1550 1550 index_size = n * index.entry_size
1551 1551 data_size = self.end(n - 1)
1552 1552
1553 1553 # XXX we might have been split (or stripped) since the object
1554 1554 # initialization, We need to close this race too, but having a way to
1555 1555 # pre-open the file we feed to the revlog and never closing them before
1556 1556 # we are done streaming.
1557 1557
1558 1558 if self._inline:
1559 1559
1560 1560 def get_stream():
1561 1561 with self.opener(self._indexfile, mode=b"r") as fp:
1562 1562 yield None
1563 1563 size = index_size + data_size
1564 1564 if size <= 65536:
1565 1565 yield fp.read(size)
1566 1566 else:
1567 1567 yield from util.filechunkiter(fp, limit=size)
1568 1568
1569 1569 inline_stream = get_stream()
1570 1570 next(inline_stream)
1571 1571 return [
1572 1572 (self._indexfile, inline_stream, index_size + data_size),
1573 1573 ]
1574 1574 elif force_inline:
1575 1575
1576 1576 def get_stream():
1577 1577 with self.reading():
1578 1578 yield None
1579 1579
1580 1580 for rev in range(n):
1581 1581 idx = self.index.entry_binary(rev)
1582 1582 if rev == 0 and self._docket is None:
1583 1583 # re-inject the inline flag
1584 1584 header = self._format_flags
1585 1585 header |= self._format_version
1586 1586 header |= FLAG_INLINE_DATA
1587 1587 header = self.index.pack_header(header)
1588 1588 idx = header + idx
1589 1589 yield idx
1590 1590 yield self._inner.get_segment_for_revs(rev, rev)[1]
1591 1591
1592 1592 inline_stream = get_stream()
1593 1593 next(inline_stream)
1594 1594 return [
1595 1595 (self._indexfile, inline_stream, index_size + data_size),
1596 1596 ]
1597 1597 else:
1598 1598
1599 1599 def get_index_stream():
1600 1600 with self.opener(self._indexfile, mode=b"r") as fp:
1601 1601 yield None
1602 1602 if index_size <= 65536:
1603 1603 yield fp.read(index_size)
1604 1604 else:
1605 1605 yield from util.filechunkiter(fp, limit=index_size)
1606 1606
1607 1607 def get_data_stream():
1608 1608 with self._datafp() as fp:
1609 1609 yield None
1610 1610 if data_size <= 65536:
1611 1611 yield fp.read(data_size)
1612 1612 else:
1613 1613 yield from util.filechunkiter(fp, limit=data_size)
1614 1614
1615 1615 index_stream = get_index_stream()
1616 1616 next(index_stream)
1617 1617 data_stream = get_data_stream()
1618 1618 next(data_stream)
1619 1619 return [
1620 1620 (self._datafile, data_stream, data_size),
1621 1621 (self._indexfile, index_stream, index_size),
1622 1622 ]
1623 1623
1624 1624 def _loadindex(self, docket=None):
1625 1625 new_header, mmapindexthreshold, force_nodemap = self._init_opts()
1626 1626
1627 1627 if self.postfix is not None:
1628 1628 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
1629 1629 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
1630 1630 entry_point = b'%s.i.a' % self.radix
1631 1631 elif self._try_split and self.opener.exists(self._split_index_file):
1632 1632 entry_point = self._split_index_file
1633 1633 else:
1634 1634 entry_point = b'%s.i' % self.radix
1635 1635
1636 1636 if docket is not None:
1637 1637 self._docket = docket
1638 1638 self._docket_file = entry_point
1639 1639 else:
1640 1640 self._initempty = True
1641 1641 entry_data = self._get_data(entry_point, mmapindexthreshold)
1642 1642 if len(entry_data) > 0:
1643 1643 header = INDEX_HEADER.unpack(entry_data[:4])[0]
1644 1644 self._initempty = False
1645 1645 else:
1646 1646 header = new_header
1647 1647
1648 1648 self._format_flags = header & ~0xFFFF
1649 1649 self._format_version = header & 0xFFFF
1650 1650
1651 1651 supported_flags = SUPPORTED_FLAGS.get(self._format_version)
1652 1652 if supported_flags is None:
1653 1653 msg = _(b'unknown version (%d) in revlog %s')
1654 1654 msg %= (self._format_version, self.display_id)
1655 1655 raise error.RevlogError(msg)
1656 1656 elif self._format_flags & ~supported_flags:
1657 1657 msg = _(b'unknown flags (%#04x) in version %d revlog %s')
1658 1658 display_flag = self._format_flags >> 16
1659 1659 msg %= (display_flag, self._format_version, self.display_id)
1660 1660 raise error.RevlogError(msg)
1661 1661
1662 1662 features = FEATURES_BY_VERSION[self._format_version]
1663 1663 self._inline = features[b'inline'](self._format_flags)
1664 1664 self.delta_config.general_delta = features[b'generaldelta'](
1665 1665 self._format_flags
1666 1666 )
1667 1667 self.feature_config.has_side_data = features[b'sidedata']
1668 1668
1669 1669 if not features[b'docket']:
1670 1670 self._indexfile = entry_point
1671 1671 index_data = entry_data
1672 1672 else:
1673 1673 self._docket_file = entry_point
1674 1674 if self._initempty:
1675 1675 self._docket = docketutil.default_docket(self, header)
1676 1676 else:
1677 1677 self._docket = docketutil.parse_docket(
1678 1678 self, entry_data, use_pending=self._trypending
1679 1679 )
1680 1680
1681 1681 if self._docket is not None:
1682 1682 self._indexfile = self._docket.index_filepath()
1683 1683 index_data = b''
1684 1684 index_size = self._docket.index_end
1685 1685 if index_size > 0:
1686 1686 index_data = self._get_data(
1687 1687 self._indexfile, mmapindexthreshold, size=index_size
1688 1688 )
1689 1689 if len(index_data) < index_size:
1690 1690 msg = _(b'too few index data for %s: got %d, expected %d')
1691 1691 msg %= (self.display_id, len(index_data), index_size)
1692 1692 raise error.RevlogError(msg)
1693 1693
1694 1694 self._inline = False
1695 1695 # generaldelta implied by version 2 revlogs.
1696 1696 self.delta_config.general_delta = True
1697 1697 # the logic for persistent nodemap will be dealt with within the
1698 1698 # main docket, so disable it for now.
1699 1699 self._nodemap_file = None
1700 1700
1701 1701 if self._docket is not None:
1702 1702 self._datafile = self._docket.data_filepath()
1703 1703 self._sidedatafile = self._docket.sidedata_filepath()
1704 1704 elif self.postfix is None:
1705 1705 self._datafile = b'%s.d' % self.radix
1706 1706 else:
1707 1707 self._datafile = b'%s.d.%s' % (self.radix, self.postfix)
1708 1708
1709 1709 self.nodeconstants = sha1nodeconstants
1710 1710 self.nullid = self.nodeconstants.nullid
1711 1711
1712 1712 # sparse-revlog can't be on without general-delta (issue6056)
1713 1713 if not self.delta_config.general_delta:
1714 1714 self.delta_config.sparse_revlog = False
1715 1715
1716 1716 self._storedeltachains = True
1717 1717
1718 1718 devel_nodemap = (
1719 1719 self._nodemap_file
1720 1720 and force_nodemap
1721 1721 and parse_index_v1_nodemap is not None
1722 1722 )
1723 1723
1724 1724 use_rust_index = False
1725 1725 if rustrevlog is not None and self._nodemap_file is not None:
1726 1726 # we would like to use the rust_index in all case, especially
1727 1727 # because it is necessary for AncestorsIterator and LazyAncestors
1728 1728 # since the 6.7 cycle.
1729 1729 #
1730 1730 # However, the performance impact of inconditionnaly building the
1731 1731 # nodemap is currently a problem for non-persistent nodemap
1732 1732 # repository.
1733 1733 use_rust_index = True
1734 1734
1735 1735 self._parse_index = parse_index_v1
1736 1736 if self._format_version == REVLOGV0:
1737 1737 self._parse_index = revlogv0.parse_index_v0
1738 1738 elif self._format_version == REVLOGV2:
1739 1739 self._parse_index = parse_index_v2
1740 1740 elif self._format_version == CHANGELOGV2:
1741 1741 self._parse_index = parse_index_cl_v2
1742 1742 elif devel_nodemap:
1743 1743 self._parse_index = parse_index_v1_nodemap
1744 1744 elif use_rust_index:
1745 1745 self._parse_index = functools.partial(
1746 1746 parse_index_v1_rust, default_header=new_header
1747 1747 )
1748 1748 try:
1749 1749 d = self._parse_index(index_data, self._inline)
1750 1750 index, chunkcache = d
1751 1751 use_nodemap = (
1752 1752 not self._inline
1753 1753 and self._nodemap_file is not None
1754 1754 and hasattr(index, 'update_nodemap_data')
1755 1755 )
1756 1756 if use_nodemap:
1757 1757 nodemap_data = nodemaputil.persisted_data(self)
1758 1758 if nodemap_data is not None:
1759 1759 docket = nodemap_data[0]
1760 1760 if (
1761 1761 len(d[0]) > docket.tip_rev
1762 1762 and d[0][docket.tip_rev][7] == docket.tip_node
1763 1763 ):
1764 1764 # no changelog tampering
1765 1765 self._nodemap_docket = docket
1766 1766 index.update_nodemap_data(*nodemap_data)
1767 1767 except (ValueError, IndexError):
1768 1768 raise error.RevlogError(
1769 1769 _(b"index %s is corrupted") % self.display_id
1770 1770 )
1771 1771 self.index = index
1772 1772 # revnum -> (chain-length, sum-delta-length)
1773 1773 self._chaininfocache = util.lrucachedict(500)
1774 1774
1775 1775 return chunkcache
1776 1776
1777 1777 def _load_inner(self, chunk_cache):
1778 1778 if self._docket is None:
1779 1779 default_compression_header = None
1780 1780 else:
1781 1781 default_compression_header = self._docket.default_compression_header
1782 1782
1783 1783 self._inner = _InnerRevlog(
1784 1784 opener=self.opener,
1785 1785 index=self.index,
1786 1786 index_file=self._indexfile,
1787 1787 data_file=self._datafile,
1788 1788 sidedata_file=self._sidedatafile,
1789 1789 inline=self._inline,
1790 1790 data_config=self.data_config,
1791 1791 delta_config=self.delta_config,
1792 1792 feature_config=self.feature_config,
1793 1793 chunk_cache=chunk_cache,
1794 1794 default_compression_header=default_compression_header,
1795 1795 )
1796 1796
1797 1797 def get_revlog(self):
1798 1798 """simple function to mirror API of other not-really-revlog API"""
1799 1799 return self
1800 1800
1801 1801 @util.propertycache
1802 1802 def revlog_kind(self):
1803 1803 return self.target[0]
1804 1804
1805 1805 @util.propertycache
1806 1806 def display_id(self):
1807 1807 """The public facing "ID" of the revlog that we use in message"""
1808 1808 if self.revlog_kind == KIND_FILELOG:
1809 1809 # Reference the file without the "data/" prefix, so it is familiar
1810 1810 # to the user.
1811 1811 return self.target[1]
1812 1812 else:
1813 1813 return self.radix
1814 1814
1815 1815 def _datafp(self, mode=b'r'):
1816 1816 """file object for the revlog's data file"""
1817 1817 return self.opener(self._datafile, mode=mode)
1818 1818
1819 1819 def tiprev(self):
1820 1820 return len(self.index) - 1
1821 1821
1822 1822 def tip(self):
1823 1823 return self.node(self.tiprev())
1824 1824
1825 1825 def __contains__(self, rev):
1826 1826 return 0 <= rev < len(self)
1827 1827
1828 1828 def __len__(self):
1829 1829 return len(self.index)
1830 1830
1831 1831 def __iter__(self) -> Iterator[int]:
1832 1832 return iter(range(len(self)))
1833 1833
1834 1834 def revs(self, start=0, stop=None):
1835 1835 """iterate over all rev in this revlog (from start to stop)"""
1836 1836 return storageutil.iterrevs(len(self), start=start, stop=stop)
1837 1837
1838 1838 def hasnode(self, node):
1839 1839 try:
1840 1840 self.rev(node)
1841 1841 return True
1842 1842 except KeyError:
1843 1843 return False
1844 1844
1845 1845 def _candelta(self, baserev, rev):
1846 1846 """whether two revisions (baserev, rev) can be delta-ed or not"""
1847 1847 # Disable delta if either rev requires a content-changing flag
1848 1848 # processor (ex. LFS). This is because such flag processor can alter
1849 1849 # the rawtext content that the delta will be based on, and two clients
1850 1850 # could have a same revlog node with different flags (i.e. different
1851 1851 # rawtext contents) and the delta could be incompatible.
1852 1852 if (self.flags(baserev) & REVIDX_RAWTEXT_CHANGING_FLAGS) or (
1853 1853 self.flags(rev) & REVIDX_RAWTEXT_CHANGING_FLAGS
1854 1854 ):
1855 1855 return False
1856 1856 return True
1857 1857
1858 1858 def update_caches(self, transaction):
1859 1859 """update on disk cache
1860 1860
1861 1861 If a transaction is passed, the update may be delayed to transaction
1862 1862 commit."""
1863 1863 if self._nodemap_file is not None:
1864 1864 if transaction is None:
1865 1865 nodemaputil.update_persistent_nodemap(self)
1866 1866 else:
1867 1867 nodemaputil.setup_persistent_nodemap(transaction, self)
1868 1868
1869 1869 def clearcaches(self, clear_persisted_data: bool = False) -> None:
1870 1870 """Clear in-memory caches"""
1871 1871 self._chainbasecache.clear()
1872 1872 self._inner.clear_cache()
1873 1873 self._pcache = {}
1874 1874 self._nodemap_docket = None
1875 1875 self.index.clearcaches()
1876 1876 # The python code is the one responsible for validating the docket, we
1877 1877 # end up having to refresh it here.
1878 1878 use_nodemap = (
1879 1879 not self._inline
1880 1880 and self._nodemap_file is not None
1881 1881 and hasattr(self.index, 'update_nodemap_data')
1882 1882 )
1883 1883 if use_nodemap:
1884 1884 nodemap_data = nodemaputil.persisted_data(self)
1885 1885 if nodemap_data is not None:
1886 1886 self._nodemap_docket = nodemap_data[0]
1887 1887 self.index.update_nodemap_data(*nodemap_data)
1888 1888
1889 1889 def rev(self, node):
1890 1890 """return the revision number associated with a <nodeid>"""
1891 1891 try:
1892 1892 return self.index.rev(node)
1893 1893 except TypeError:
1894 1894 raise
1895 1895 except error.RevlogError:
1896 1896 # parsers.c radix tree lookup failed
1897 1897 if (
1898 1898 node == self.nodeconstants.wdirid
1899 1899 or node in self.nodeconstants.wdirfilenodeids
1900 1900 ):
1901 1901 raise error.WdirUnsupported
1902 1902 raise error.LookupError(node, self.display_id, _(b'no node'))
1903 1903
1904 1904 # Accessors for index entries.
1905 1905
1906 1906 # First tuple entry is 8 bytes. First 6 bytes are offset. Last 2 bytes
1907 1907 # are flags.
1908 1908 def start(self, rev):
1909 1909 return int(self.index[rev][0] >> 16)
1910 1910
1911 1911 def sidedata_cut_off(self, rev):
1912 1912 sd_cut_off = self.index[rev][8]
1913 1913 if sd_cut_off != 0:
1914 1914 return sd_cut_off
1915 1915 # This is some annoying dance, because entries without sidedata
1916 1916 # currently use 0 as their ofsset. (instead of previous-offset +
1917 1917 # previous-size)
1918 1918 #
1919 1919 # We should reconsider this sidedata β†’ 0 sidata_offset policy.
1920 1920 # In the meantime, we need this.
1921 1921 while 0 <= rev:
1922 1922 e = self.index[rev]
1923 1923 if e[9] != 0:
1924 1924 return e[8] + e[9]
1925 1925 rev -= 1
1926 1926 return 0
1927 1927
1928 1928 def flags(self, rev):
1929 1929 return self.index[rev][0] & 0xFFFF
1930 1930
1931 1931 def length(self, rev):
1932 1932 return self.index[rev][1]
1933 1933
1934 1934 def sidedata_length(self, rev):
1935 1935 if not self.feature_config.has_side_data:
1936 1936 return 0
1937 1937 return self.index[rev][9]
1938 1938
1939 1939 def rawsize(self, rev):
1940 1940 """return the length of the uncompressed text for a given revision"""
1941 1941 l = self.index[rev][2]
1942 1942 if l >= 0:
1943 1943 return l
1944 1944
1945 1945 t = self.rawdata(rev)
1946 1946 return len(t)
1947 1947
1948 1948 def size(self, rev):
1949 1949 """length of non-raw text (processed by a "read" flag processor)"""
1950 1950 # fast path: if no "read" flag processor could change the content,
1951 1951 # size is rawsize. note: ELLIPSIS is known to not change the content.
1952 1952 flags = self.flags(rev)
1953 1953 if flags & (flagutil.REVIDX_KNOWN_FLAGS ^ REVIDX_ELLIPSIS) == 0:
1954 1954 return self.rawsize(rev)
1955 1955
1956 1956 return len(self.revision(rev))
1957 1957
1958 1958 def fast_rank(self, rev):
1959 1959 """Return the rank of a revision if already known, or None otherwise.
1960 1960
1961 1961 The rank of a revision is the size of the sub-graph it defines as a
1962 1962 head. Equivalently, the rank of a revision `r` is the size of the set
1963 1963 `ancestors(r)`, `r` included.
1964 1964
1965 1965 This method returns the rank retrieved from the revlog in constant
1966 1966 time. It makes no attempt at computing unknown values for versions of
1967 1967 the revlog which do not persist the rank.
1968 1968 """
1969 1969 rank = self.index[rev][ENTRY_RANK]
1970 1970 if self._format_version != CHANGELOGV2 or rank == RANK_UNKNOWN:
1971 1971 return None
1972 1972 if rev == nullrev:
1973 1973 return 0 # convention
1974 1974 return rank
1975 1975
1976 1976 def chainbase(self, rev):
1977 1977 base = self._chainbasecache.get(rev)
1978 1978 if base is not None:
1979 1979 return base
1980 1980
1981 1981 index = self.index
1982 1982 iterrev = rev
1983 1983 base = index[iterrev][3]
1984 1984 while base != iterrev:
1985 1985 iterrev = base
1986 1986 base = index[iterrev][3]
1987 1987
1988 1988 self._chainbasecache[rev] = base
1989 1989 return base
1990 1990
1991 1991 def linkrev(self, rev):
1992 1992 return self.index[rev][4]
1993 1993
1994 1994 def parentrevs(self, rev):
1995 1995 try:
1996 1996 entry = self.index[rev]
1997 1997 except IndexError:
1998 1998 if rev == wdirrev:
1999 1999 raise error.WdirUnsupported
2000 2000 raise
2001 2001
2002 2002 if self.feature_config.canonical_parent_order and entry[5] == nullrev:
2003 2003 return entry[6], entry[5]
2004 2004 else:
2005 2005 return entry[5], entry[6]
2006 2006
2007 2007 # fast parentrevs(rev) where rev isn't filtered
2008 2008 _uncheckedparentrevs = parentrevs
2009 2009
2010 2010 def node(self, rev):
2011 2011 try:
2012 2012 return self.index[rev][7]
2013 2013 except IndexError:
2014 2014 if rev == wdirrev:
2015 2015 raise error.WdirUnsupported
2016 2016 raise
2017 2017
2018 2018 # Derived from index values.
2019 2019
2020 2020 def end(self, rev):
2021 2021 return self.start(rev) + self.length(rev)
2022 2022
2023 2023 def parents(self, node):
2024 2024 i = self.index
2025 2025 d = i[self.rev(node)]
2026 2026 # inline node() to avoid function call overhead
2027 2027 if self.feature_config.canonical_parent_order and d[5] == self.nullid:
2028 2028 return i[d[6]][7], i[d[5]][7]
2029 2029 else:
2030 2030 return i[d[5]][7], i[d[6]][7]
2031 2031
2032 2032 def chainlen(self, rev):
2033 2033 return self._chaininfo(rev)[0]
2034 2034
2035 2035 def _chaininfo(self, rev):
2036 2036 chaininfocache = self._chaininfocache
2037 2037 if rev in chaininfocache:
2038 2038 return chaininfocache[rev]
2039 2039 index = self.index
2040 2040 generaldelta = self.delta_config.general_delta
2041 2041 iterrev = rev
2042 2042 e = index[iterrev]
2043 2043 clen = 0
2044 2044 compresseddeltalen = 0
2045 2045 while iterrev != e[3]:
2046 2046 clen += 1
2047 2047 compresseddeltalen += e[1]
2048 2048 if generaldelta:
2049 2049 iterrev = e[3]
2050 2050 else:
2051 2051 iterrev -= 1
2052 2052 if iterrev in chaininfocache:
2053 2053 t = chaininfocache[iterrev]
2054 2054 clen += t[0]
2055 2055 compresseddeltalen += t[1]
2056 2056 break
2057 2057 e = index[iterrev]
2058 2058 else:
2059 2059 # Add text length of base since decompressing that also takes
2060 2060 # work. For cache hits the length is already included.
2061 2061 compresseddeltalen += e[1]
2062 2062 r = (clen, compresseddeltalen)
2063 2063 chaininfocache[rev] = r
2064 2064 return r
2065 2065
2066 2066 def _deltachain(self, rev, stoprev=None):
2067 2067 return self._inner._deltachain(rev, stoprev=stoprev)
2068 2068
2069 2069 def ancestors(self, revs, stoprev=0, inclusive=False):
2070 2070 """Generate the ancestors of 'revs' in reverse revision order.
2071 2071 Does not generate revs lower than stoprev.
2072 2072
2073 2073 See the documentation for ancestor.lazyancestors for more details."""
2074 2074
2075 2075 # first, make sure start revisions aren't filtered
2076 2076 revs = list(revs)
2077 2077 checkrev = self.node
2078 2078 for r in revs:
2079 2079 checkrev(r)
2080 2080 # and we're sure ancestors aren't filtered as well
2081 2081
2082 2082 if rustancestor is not None and self.index.rust_ext_compat:
2083 2083 lazyancestors = rustancestor.LazyAncestors
2084 2084 arg = self.index
2085 2085 else:
2086 2086 lazyancestors = ancestor.lazyancestors
2087 2087 arg = self._uncheckedparentrevs
2088 2088 return lazyancestors(arg, revs, stoprev=stoprev, inclusive=inclusive)
2089 2089
2090 2090 def descendants(self, revs):
2091 2091 return dagop.descendantrevs(revs, self.revs, self.parentrevs)
2092 2092
2093 2093 def findcommonmissing(self, common=None, heads=None):
2094 2094 """Return a tuple of the ancestors of common and the ancestors of heads
2095 2095 that are not ancestors of common. In revset terminology, we return the
2096 2096 tuple:
2097 2097
2098 2098 ::common, (::heads) - (::common)
2099 2099
2100 2100 The list is sorted by revision number, meaning it is
2101 2101 topologically sorted.
2102 2102
2103 2103 'heads' and 'common' are both lists of node IDs. If heads is
2104 2104 not supplied, uses all of the revlog's heads. If common is not
2105 2105 supplied, uses nullid."""
2106 2106 if common is None:
2107 2107 common = [self.nullid]
2108 2108 if heads is None:
2109 2109 heads = self.heads()
2110 2110
2111 2111 common = [self.rev(n) for n in common]
2112 2112 heads = [self.rev(n) for n in heads]
2113 2113
2114 2114 # we want the ancestors, but inclusive
2115 2115 class lazyset:
2116 2116 def __init__(self, lazyvalues):
2117 2117 self.addedvalues = set()
2118 2118 self.lazyvalues = lazyvalues
2119 2119
2120 2120 def __contains__(self, value):
2121 2121 return value in self.addedvalues or value in self.lazyvalues
2122 2122
2123 2123 def __iter__(self):
2124 2124 added = self.addedvalues
2125 2125 for r in added:
2126 2126 yield r
2127 2127 for r in self.lazyvalues:
2128 2128 if not r in added:
2129 2129 yield r
2130 2130
2131 2131 def add(self, value):
2132 2132 self.addedvalues.add(value)
2133 2133
2134 2134 def update(self, values):
2135 2135 self.addedvalues.update(values)
2136 2136
2137 2137 has = lazyset(self.ancestors(common))
2138 2138 has.add(nullrev)
2139 2139 has.update(common)
2140 2140
2141 2141 # take all ancestors from heads that aren't in has
2142 2142 missing = set()
2143 2143 visit = collections.deque(r for r in heads if r not in has)
2144 2144 while visit:
2145 2145 r = visit.popleft()
2146 2146 if r in missing:
2147 2147 continue
2148 2148 else:
2149 2149 missing.add(r)
2150 2150 for p in self.parentrevs(r):
2151 2151 if p not in has:
2152 2152 visit.append(p)
2153 2153 missing = list(missing)
2154 2154 missing.sort()
2155 2155 return has, [self.node(miss) for miss in missing]
2156 2156
2157 2157 def incrementalmissingrevs(self, common=None):
2158 2158 """Return an object that can be used to incrementally compute the
2159 2159 revision numbers of the ancestors of arbitrary sets that are not
2160 2160 ancestors of common. This is an ancestor.incrementalmissingancestors
2161 2161 object.
2162 2162
2163 2163 'common' is a list of revision numbers. If common is not supplied, uses
2164 2164 nullrev.
2165 2165 """
2166 2166 if common is None:
2167 2167 common = [nullrev]
2168 2168
2169 2169 if rustancestor is not None and self.index.rust_ext_compat:
2170 2170 return rustancestor.MissingAncestors(self.index, common)
2171 2171 return ancestor.incrementalmissingancestors(self.parentrevs, common)
2172 2172
2173 2173 def findmissingrevs(self, common=None, heads=None):
2174 2174 """Return the revision numbers of the ancestors of heads that
2175 2175 are not ancestors of common.
2176 2176
2177 2177 More specifically, return a list of revision numbers corresponding to
2178 2178 nodes N such that every N satisfies the following constraints:
2179 2179
2180 2180 1. N is an ancestor of some node in 'heads'
2181 2181 2. N is not an ancestor of any node in 'common'
2182 2182
2183 2183 The list is sorted by revision number, meaning it is
2184 2184 topologically sorted.
2185 2185
2186 2186 'heads' and 'common' are both lists of revision numbers. If heads is
2187 2187 not supplied, uses all of the revlog's heads. If common is not
2188 2188 supplied, uses nullid."""
2189 2189 if common is None:
2190 2190 common = [nullrev]
2191 2191 if heads is None:
2192 2192 heads = self.headrevs()
2193 2193
2194 2194 inc = self.incrementalmissingrevs(common=common)
2195 2195 return inc.missingancestors(heads)
2196 2196
2197 2197 def findmissing(self, common=None, heads=None):
2198 2198 """Return the ancestors of heads that are not ancestors of common.
2199 2199
2200 2200 More specifically, return a list of nodes N such that every N
2201 2201 satisfies the following constraints:
2202 2202
2203 2203 1. N is an ancestor of some node in 'heads'
2204 2204 2. N is not an ancestor of any node in 'common'
2205 2205
2206 2206 The list is sorted by revision number, meaning it is
2207 2207 topologically sorted.
2208 2208
2209 2209 'heads' and 'common' are both lists of node IDs. If heads is
2210 2210 not supplied, uses all of the revlog's heads. If common is not
2211 2211 supplied, uses nullid."""
2212 2212 if common is None:
2213 2213 common = [self.nullid]
2214 2214 if heads is None:
2215 2215 heads = self.heads()
2216 2216
2217 2217 common = [self.rev(n) for n in common]
2218 2218 heads = [self.rev(n) for n in heads]
2219 2219
2220 2220 inc = self.incrementalmissingrevs(common=common)
2221 2221 return [self.node(r) for r in inc.missingancestors(heads)]
2222 2222
2223 2223 def nodesbetween(self, roots=None, heads=None):
2224 2224 """Return a topological path from 'roots' to 'heads'.
2225 2225
2226 2226 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
2227 2227 topologically sorted list of all nodes N that satisfy both of
2228 2228 these constraints:
2229 2229
2230 2230 1. N is a descendant of some node in 'roots'
2231 2231 2. N is an ancestor of some node in 'heads'
2232 2232
2233 2233 Every node is considered to be both a descendant and an ancestor
2234 2234 of itself, so every reachable node in 'roots' and 'heads' will be
2235 2235 included in 'nodes'.
2236 2236
2237 2237 'outroots' is the list of reachable nodes in 'roots', i.e., the
2238 2238 subset of 'roots' that is returned in 'nodes'. Likewise,
2239 2239 'outheads' is the subset of 'heads' that is also in 'nodes'.
2240 2240
2241 2241 'roots' and 'heads' are both lists of node IDs. If 'roots' is
2242 2242 unspecified, uses nullid as the only root. If 'heads' is
2243 2243 unspecified, uses list of all of the revlog's heads."""
2244 2244 nonodes = ([], [], [])
2245 2245 if roots is not None:
2246 2246 roots = list(roots)
2247 2247 if not roots:
2248 2248 return nonodes
2249 2249 lowestrev = min([self.rev(n) for n in roots])
2250 2250 else:
2251 2251 roots = [self.nullid] # Everybody's a descendant of nullid
2252 2252 lowestrev = nullrev
2253 2253 if (lowestrev == nullrev) and (heads is None):
2254 2254 # We want _all_ the nodes!
2255 2255 return (
2256 2256 [self.node(r) for r in self],
2257 2257 [self.nullid],
2258 2258 list(self.heads()),
2259 2259 )
2260 2260 if heads is None:
2261 2261 # All nodes are ancestors, so the latest ancestor is the last
2262 2262 # node.
2263 2263 highestrev = len(self) - 1
2264 2264 # Set ancestors to None to signal that every node is an ancestor.
2265 2265 ancestors = None
2266 2266 # Set heads to an empty dictionary for later discovery of heads
2267 2267 heads = {}
2268 2268 else:
2269 2269 heads = list(heads)
2270 2270 if not heads:
2271 2271 return nonodes
2272 2272 ancestors = set()
2273 2273 # Turn heads into a dictionary so we can remove 'fake' heads.
2274 2274 # Also, later we will be using it to filter out the heads we can't
2275 2275 # find from roots.
2276 2276 heads = dict.fromkeys(heads, False)
2277 2277 # Start at the top and keep marking parents until we're done.
2278 2278 nodestotag = set(heads)
2279 2279 # Remember where the top was so we can use it as a limit later.
2280 2280 highestrev = max([self.rev(n) for n in nodestotag])
2281 2281 while nodestotag:
2282 2282 # grab a node to tag
2283 2283 n = nodestotag.pop()
2284 2284 # Never tag nullid
2285 2285 if n == self.nullid:
2286 2286 continue
2287 2287 # A node's revision number represents its place in a
2288 2288 # topologically sorted list of nodes.
2289 2289 r = self.rev(n)
2290 2290 if r >= lowestrev:
2291 2291 if n not in ancestors:
2292 2292 # If we are possibly a descendant of one of the roots
2293 2293 # and we haven't already been marked as an ancestor
2294 2294 ancestors.add(n) # Mark as ancestor
2295 2295 # Add non-nullid parents to list of nodes to tag.
2296 2296 nodestotag.update(
2297 2297 [p for p in self.parents(n) if p != self.nullid]
2298 2298 )
2299 2299 elif n in heads: # We've seen it before, is it a fake head?
2300 2300 # So it is, real heads should not be the ancestors of
2301 2301 # any other heads.
2302 2302 heads.pop(n)
2303 2303 if not ancestors:
2304 2304 return nonodes
2305 2305 # Now that we have our set of ancestors, we want to remove any
2306 2306 # roots that are not ancestors.
2307 2307
2308 2308 # If one of the roots was nullid, everything is included anyway.
2309 2309 if lowestrev > nullrev:
2310 2310 # But, since we weren't, let's recompute the lowest rev to not
2311 2311 # include roots that aren't ancestors.
2312 2312
2313 2313 # Filter out roots that aren't ancestors of heads
2314 2314 roots = [root for root in roots if root in ancestors]
2315 2315 # Recompute the lowest revision
2316 2316 if roots:
2317 2317 lowestrev = min([self.rev(root) for root in roots])
2318 2318 else:
2319 2319 # No more roots? Return empty list
2320 2320 return nonodes
2321 2321 else:
2322 2322 # We are descending from nullid, and don't need to care about
2323 2323 # any other roots.
2324 2324 lowestrev = nullrev
2325 2325 roots = [self.nullid]
2326 2326 # Transform our roots list into a set.
2327 2327 descendants = set(roots)
2328 2328 # Also, keep the original roots so we can filter out roots that aren't
2329 2329 # 'real' roots (i.e. are descended from other roots).
2330 2330 roots = descendants.copy()
2331 2331 # Our topologically sorted list of output nodes.
2332 2332 orderedout = []
2333 2333 # Don't start at nullid since we don't want nullid in our output list,
2334 2334 # and if nullid shows up in descendants, empty parents will look like
2335 2335 # they're descendants.
2336 2336 for r in self.revs(start=max(lowestrev, 0), stop=highestrev + 1):
2337 2337 n = self.node(r)
2338 2338 isdescendant = False
2339 2339 if lowestrev == nullrev: # Everybody is a descendant of nullid
2340 2340 isdescendant = True
2341 2341 elif n in descendants:
2342 2342 # n is already a descendant
2343 2343 isdescendant = True
2344 2344 # This check only needs to be done here because all the roots
2345 2345 # will start being marked is descendants before the loop.
2346 2346 if n in roots:
2347 2347 # If n was a root, check if it's a 'real' root.
2348 2348 p = tuple(self.parents(n))
2349 2349 # If any of its parents are descendants, it's not a root.
2350 2350 if (p[0] in descendants) or (p[1] in descendants):
2351 2351 roots.remove(n)
2352 2352 else:
2353 2353 p = tuple(self.parents(n))
2354 2354 # A node is a descendant if either of its parents are
2355 2355 # descendants. (We seeded the dependents list with the roots
2356 2356 # up there, remember?)
2357 2357 if (p[0] in descendants) or (p[1] in descendants):
2358 2358 descendants.add(n)
2359 2359 isdescendant = True
2360 2360 if isdescendant and ((ancestors is None) or (n in ancestors)):
2361 2361 # Only include nodes that are both descendants and ancestors.
2362 2362 orderedout.append(n)
2363 2363 if (ancestors is not None) and (n in heads):
2364 2364 # We're trying to figure out which heads are reachable
2365 2365 # from roots.
2366 2366 # Mark this head as having been reached
2367 2367 heads[n] = True
2368 2368 elif ancestors is None:
2369 2369 # Otherwise, we're trying to discover the heads.
2370 2370 # Assume this is a head because if it isn't, the next step
2371 2371 # will eventually remove it.
2372 2372 heads[n] = True
2373 2373 # But, obviously its parents aren't.
2374 2374 for p in self.parents(n):
2375 2375 heads.pop(p, None)
2376 2376 heads = [head for head, flag in heads.items() if flag]
2377 2377 roots = list(roots)
2378 2378 assert orderedout
2379 2379 assert roots
2380 2380 assert heads
2381 2381 return (orderedout, roots, heads)
2382 2382
2383 def headrevs(self, revs=None):
2383 def headrevs(self, revs=None, stop_rev=None):
2384 2384 if revs is None:
2385 return self.index.headrevs()
2385 excluded = None
2386 if stop_rev is not None and stop_rev < len(self.index):
2387 # We should let the native code handle it, but that a
2388 # simple enough first step.
2389 excluded = range(stop_rev, len(self.index))
2390 return self.index.headrevs(excluded)
2391 assert stop_rev is None
2386 2392 if rustdagop is not None and self.index.rust_ext_compat:
2387 2393 return rustdagop.headrevs(self.index, revs)
2388 2394 return dagop.headrevs(revs, self._uncheckedparentrevs)
2389 2395
2390 2396 def headrevsdiff(self, start, stop):
2391 2397 try:
2392 2398 return self.index.headrevsdiff(start, stop)
2393 2399 except AttributeError:
2394 2400 return dagop.headrevsdiff(self._uncheckedparentrevs, start, stop)
2395 2401
2396 2402 def computephases(self, roots):
2397 2403 return self.index.computephasesmapsets(roots)
2398 2404
2399 2405 def _head_node_ids(self):
2400 2406 try:
2401 2407 return self.index.head_node_ids()
2402 2408 except AttributeError:
2403 2409 return [self.node(r) for r in self.headrevs()]
2404 2410
2405 2411 def heads(self, start=None, stop=None):
2406 2412 """return the list of all nodes that have no children
2407 2413
2408 2414 if start is specified, only heads that are descendants of
2409 2415 start will be returned
2410 2416 if stop is specified, it will consider all the revs from stop
2411 2417 as if they had no children
2412 2418 """
2413 2419 if start is None and stop is None:
2414 2420 if not len(self):
2415 2421 return [self.nullid]
2416 2422 return self._head_node_ids()
2417 2423 if start is None:
2418 2424 start = nullrev
2419 2425 else:
2420 2426 start = self.rev(start)
2421 2427
2422 2428 stoprevs = {self.rev(n) for n in stop or []}
2423 2429
2424 2430 revs = dagop.headrevssubset(
2425 2431 self.revs, self.parentrevs, startrev=start, stoprevs=stoprevs
2426 2432 )
2427 2433
2428 2434 return [self.node(rev) for rev in revs]
2429 2435
2430 2436 def diffheads(self, start, stop):
2431 2437 """return the nodes that make up the difference between
2432 2438 heads of revs before `start` and heads of revs before `stop`"""
2433 2439 removed, added = self.headrevsdiff(start, stop)
2434 2440 return [self.node(r) for r in removed], [self.node(r) for r in added]
2435 2441
2436 2442 def children(self, node):
2437 2443 """find the children of a given node"""
2438 2444 c = []
2439 2445 p = self.rev(node)
2440 2446 for r in self.revs(start=p + 1):
2441 2447 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
2442 2448 if prevs:
2443 2449 for pr in prevs:
2444 2450 if pr == p:
2445 2451 c.append(self.node(r))
2446 2452 elif p == nullrev:
2447 2453 c.append(self.node(r))
2448 2454 return c
2449 2455
2450 2456 def commonancestorsheads(self, a, b):
2451 2457 """calculate all the heads of the common ancestors of nodes a and b"""
2452 2458 a, b = self.rev(a), self.rev(b)
2453 2459 ancs = self._commonancestorsheads(a, b)
2454 2460 return pycompat.maplist(self.node, ancs)
2455 2461
2456 2462 def _commonancestorsheads(self, *revs):
2457 2463 """calculate all the heads of the common ancestors of revs"""
2458 2464 try:
2459 2465 ancs = self.index.commonancestorsheads(*revs)
2460 2466 except (AttributeError, OverflowError): # C implementation failed
2461 2467 ancs = ancestor.commonancestorsheads(self.parentrevs, *revs)
2462 2468 return ancs
2463 2469
2464 2470 def isancestor(self, a, b):
2465 2471 """return True if node a is an ancestor of node b
2466 2472
2467 2473 A revision is considered an ancestor of itself."""
2468 2474 a, b = self.rev(a), self.rev(b)
2469 2475 return self.isancestorrev(a, b)
2470 2476
2471 2477 def isancestorrev(self, a, b):
2472 2478 """return True if revision a is an ancestor of revision b
2473 2479
2474 2480 A revision is considered an ancestor of itself.
2475 2481
2476 2482 The implementation of this is trivial but the use of
2477 2483 reachableroots is not."""
2478 2484 if a == nullrev:
2479 2485 return True
2480 2486 elif a == b:
2481 2487 return True
2482 2488 elif a > b:
2483 2489 return False
2484 2490 return bool(self.reachableroots(a, [b], [a], includepath=False))
2485 2491
2486 2492 def reachableroots(self, minroot, heads, roots, includepath=False):
2487 2493 """return (heads(::(<roots> and <roots>::<heads>)))
2488 2494
2489 2495 If includepath is True, return (<roots>::<heads>)."""
2490 2496 try:
2491 2497 return self.index.reachableroots2(
2492 2498 minroot, heads, roots, includepath
2493 2499 )
2494 2500 except AttributeError:
2495 2501 return dagop._reachablerootspure(
2496 2502 self.parentrevs, minroot, roots, heads, includepath
2497 2503 )
2498 2504
2499 2505 def ancestor(self, a, b):
2500 2506 """calculate the "best" common ancestor of nodes a and b"""
2501 2507
2502 2508 a, b = self.rev(a), self.rev(b)
2503 2509 try:
2504 2510 ancs = self.index.ancestors(a, b)
2505 2511 except (AttributeError, OverflowError):
2506 2512 ancs = ancestor.ancestors(self.parentrevs, a, b)
2507 2513 if ancs:
2508 2514 # choose a consistent winner when there's a tie
2509 2515 return min(map(self.node, ancs))
2510 2516 return self.nullid
2511 2517
2512 2518 def _match(self, id):
2513 2519 if isinstance(id, int):
2514 2520 # rev
2515 2521 return self.node(id)
2516 2522 if len(id) == self.nodeconstants.nodelen:
2517 2523 # possibly a binary node
2518 2524 # odds of a binary node being all hex in ASCII are 1 in 10**25
2519 2525 try:
2520 2526 node = id
2521 2527 self.rev(node) # quick search the index
2522 2528 return node
2523 2529 except error.LookupError:
2524 2530 pass # may be partial hex id
2525 2531 try:
2526 2532 # str(rev)
2527 2533 rev = int(id)
2528 2534 if b"%d" % rev != id:
2529 2535 raise ValueError
2530 2536 if rev < 0:
2531 2537 rev = len(self) + rev
2532 2538 if rev < 0 or rev >= len(self):
2533 2539 raise ValueError
2534 2540 return self.node(rev)
2535 2541 except (ValueError, OverflowError):
2536 2542 pass
2537 2543 if len(id) == 2 * self.nodeconstants.nodelen:
2538 2544 try:
2539 2545 # a full hex nodeid?
2540 2546 node = bin(id)
2541 2547 self.rev(node)
2542 2548 return node
2543 2549 except (binascii.Error, error.LookupError):
2544 2550 pass
2545 2551
2546 2552 def _partialmatch(self, id):
2547 2553 # we don't care wdirfilenodeids as they should be always full hash
2548 2554 maybewdir = self.nodeconstants.wdirhex.startswith(id)
2549 2555 ambiguous = False
2550 2556 try:
2551 2557 partial = self.index.partialmatch(id)
2552 2558 if partial and self.hasnode(partial):
2553 2559 if maybewdir:
2554 2560 # single 'ff...' match in radix tree, ambiguous with wdir
2555 2561 ambiguous = True
2556 2562 else:
2557 2563 return partial
2558 2564 elif maybewdir:
2559 2565 # no 'ff...' match in radix tree, wdir identified
2560 2566 raise error.WdirUnsupported
2561 2567 else:
2562 2568 return None
2563 2569 except error.RevlogError:
2564 2570 # parsers.c radix tree lookup gave multiple matches
2565 2571 # fast path: for unfiltered changelog, radix tree is accurate
2566 2572 if not getattr(self, 'filteredrevs', None):
2567 2573 ambiguous = True
2568 2574 # fall through to slow path that filters hidden revisions
2569 2575 except (AttributeError, ValueError):
2570 2576 # we are pure python, or key is not hex
2571 2577 pass
2572 2578 if ambiguous:
2573 2579 raise error.AmbiguousPrefixLookupError(
2574 2580 id, self.display_id, _(b'ambiguous identifier')
2575 2581 )
2576 2582
2577 2583 if id in self._pcache:
2578 2584 return self._pcache[id]
2579 2585
2580 2586 if len(id) <= 40:
2581 2587 # hex(node)[:...]
2582 2588 l = len(id) // 2 * 2 # grab an even number of digits
2583 2589 try:
2584 2590 # we're dropping the last digit, so let's check that it's hex,
2585 2591 # to avoid the expensive computation below if it's not
2586 2592 if len(id) % 2 > 0:
2587 2593 if not (id[-1] in hexdigits):
2588 2594 return None
2589 2595 prefix = bin(id[:l])
2590 2596 except binascii.Error:
2591 2597 pass
2592 2598 else:
2593 2599 nl = [e[7] for e in self.index if e[7].startswith(prefix)]
2594 2600 nl = [
2595 2601 n for n in nl if hex(n).startswith(id) and self.hasnode(n)
2596 2602 ]
2597 2603 if self.nodeconstants.nullhex.startswith(id):
2598 2604 nl.append(self.nullid)
2599 2605 if len(nl) > 0:
2600 2606 if len(nl) == 1 and not maybewdir:
2601 2607 self._pcache[id] = nl[0]
2602 2608 return nl[0]
2603 2609 raise error.AmbiguousPrefixLookupError(
2604 2610 id, self.display_id, _(b'ambiguous identifier')
2605 2611 )
2606 2612 if maybewdir:
2607 2613 raise error.WdirUnsupported
2608 2614 return None
2609 2615
2610 2616 def lookup(self, id):
2611 2617 """locate a node based on:
2612 2618 - revision number or str(revision number)
2613 2619 - nodeid or subset of hex nodeid
2614 2620 """
2615 2621 n = self._match(id)
2616 2622 if n is not None:
2617 2623 return n
2618 2624 n = self._partialmatch(id)
2619 2625 if n:
2620 2626 return n
2621 2627
2622 2628 raise error.LookupError(id, self.display_id, _(b'no match found'))
2623 2629
2624 2630 def shortest(self, node, minlength=1):
2625 2631 """Find the shortest unambiguous prefix that matches node."""
2626 2632
2627 2633 def isvalid(prefix):
2628 2634 try:
2629 2635 matchednode = self._partialmatch(prefix)
2630 2636 except error.AmbiguousPrefixLookupError:
2631 2637 return False
2632 2638 except error.WdirUnsupported:
2633 2639 # single 'ff...' match
2634 2640 return True
2635 2641 if matchednode is None:
2636 2642 raise error.LookupError(node, self.display_id, _(b'no node'))
2637 2643 return True
2638 2644
2639 2645 def maybewdir(prefix):
2640 2646 return all(c == b'f' for c in pycompat.iterbytestr(prefix))
2641 2647
2642 2648 hexnode = hex(node)
2643 2649
2644 2650 def disambiguate(hexnode, minlength):
2645 2651 """Disambiguate against wdirid."""
2646 2652 for length in range(minlength, len(hexnode) + 1):
2647 2653 prefix = hexnode[:length]
2648 2654 if not maybewdir(prefix):
2649 2655 return prefix
2650 2656
2651 2657 if not getattr(self, 'filteredrevs', None):
2652 2658 try:
2653 2659 length = max(self.index.shortest(node), minlength)
2654 2660 return disambiguate(hexnode, length)
2655 2661 except error.RevlogError:
2656 2662 if node != self.nodeconstants.wdirid:
2657 2663 raise error.LookupError(
2658 2664 node, self.display_id, _(b'no node')
2659 2665 )
2660 2666 except AttributeError:
2661 2667 # Fall through to pure code
2662 2668 pass
2663 2669
2664 2670 if node == self.nodeconstants.wdirid:
2665 2671 for length in range(minlength, len(hexnode) + 1):
2666 2672 prefix = hexnode[:length]
2667 2673 if isvalid(prefix):
2668 2674 return prefix
2669 2675
2670 2676 for length in range(minlength, len(hexnode) + 1):
2671 2677 prefix = hexnode[:length]
2672 2678 if isvalid(prefix):
2673 2679 return disambiguate(hexnode, length)
2674 2680
2675 2681 def cmp(self, node, text):
2676 2682 """compare text with a given file revision
2677 2683
2678 2684 returns True if text is different than what is stored.
2679 2685 """
2680 2686 p1, p2 = self.parents(node)
2681 2687 return storageutil.hashrevisionsha1(text, p1, p2) != node
2682 2688
2683 2689 def deltaparent(self, rev):
2684 2690 """return deltaparent of the given revision"""
2685 2691 base = self.index[rev][3]
2686 2692 if base == rev:
2687 2693 return nullrev
2688 2694 elif self.delta_config.general_delta:
2689 2695 return base
2690 2696 else:
2691 2697 return rev - 1
2692 2698
2693 2699 def issnapshot(self, rev):
2694 2700 """tells whether rev is a snapshot"""
2695 2701 ret = self._inner.issnapshot(rev)
2696 2702 self.issnapshot = self._inner.issnapshot
2697 2703 return ret
2698 2704
2699 2705 def snapshotdepth(self, rev):
2700 2706 """number of snapshot in the chain before this one"""
2701 2707 if not self.issnapshot(rev):
2702 2708 raise error.ProgrammingError(b'revision %d not a snapshot')
2703 2709 return len(self._inner._deltachain(rev)[0]) - 1
2704 2710
2705 2711 def revdiff(self, rev1, rev2):
2706 2712 """return or calculate a delta between two revisions
2707 2713
2708 2714 The delta calculated is in binary form and is intended to be written to
2709 2715 revlog data directly. So this function needs raw revision data.
2710 2716 """
2711 2717 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
2712 2718 return bytes(self._inner._chunk(rev2))
2713 2719
2714 2720 return mdiff.textdiff(self.rawdata(rev1), self.rawdata(rev2))
2715 2721
2716 2722 def revision(self, nodeorrev):
2717 2723 """return an uncompressed revision of a given node or revision
2718 2724 number.
2719 2725 """
2720 2726 return self._revisiondata(nodeorrev)
2721 2727
2722 2728 def sidedata(self, nodeorrev):
2723 2729 """a map of extra data related to the changeset but not part of the hash
2724 2730
2725 2731 This function currently return a dictionary. However, more advanced
2726 2732 mapping object will likely be used in the future for a more
2727 2733 efficient/lazy code.
2728 2734 """
2729 2735 # deal with <nodeorrev> argument type
2730 2736 if isinstance(nodeorrev, int):
2731 2737 rev = nodeorrev
2732 2738 else:
2733 2739 rev = self.rev(nodeorrev)
2734 2740 return self._sidedata(rev)
2735 2741
2736 2742 def _rawtext(self, node, rev):
2737 2743 """return the possibly unvalidated rawtext for a revision
2738 2744
2739 2745 returns (rev, rawtext, validated)
2740 2746 """
2741 2747 # Check if we have the entry in cache
2742 2748 # The cache entry looks like (node, rev, rawtext)
2743 2749 if self._inner._revisioncache:
2744 2750 if self._inner._revisioncache[0] == node:
2745 2751 return (rev, self._inner._revisioncache[2], True)
2746 2752
2747 2753 if rev is None:
2748 2754 rev = self.rev(node)
2749 2755
2750 2756 text = self._inner.raw_text(node, rev)
2751 2757 return (rev, text, False)
2752 2758
2753 2759 def _revisiondata(self, nodeorrev, raw=False):
2754 2760 # deal with <nodeorrev> argument type
2755 2761 if isinstance(nodeorrev, int):
2756 2762 rev = nodeorrev
2757 2763 node = self.node(rev)
2758 2764 else:
2759 2765 node = nodeorrev
2760 2766 rev = None
2761 2767
2762 2768 # fast path the special `nullid` rev
2763 2769 if node == self.nullid:
2764 2770 return b""
2765 2771
2766 2772 # ``rawtext`` is the text as stored inside the revlog. Might be the
2767 2773 # revision or might need to be processed to retrieve the revision.
2768 2774 rev, rawtext, validated = self._rawtext(node, rev)
2769 2775
2770 2776 if raw and validated:
2771 2777 # if we don't want to process the raw text and that raw
2772 2778 # text is cached, we can exit early.
2773 2779 return rawtext
2774 2780 if rev is None:
2775 2781 rev = self.rev(node)
2776 2782 # the revlog's flag for this revision
2777 2783 # (usually alter its state or content)
2778 2784 flags = self.flags(rev)
2779 2785
2780 2786 if validated and flags == REVIDX_DEFAULT_FLAGS:
2781 2787 # no extra flags set, no flag processor runs, text = rawtext
2782 2788 return rawtext
2783 2789
2784 2790 if raw:
2785 2791 validatehash = flagutil.processflagsraw(self, rawtext, flags)
2786 2792 text = rawtext
2787 2793 else:
2788 2794 r = flagutil.processflagsread(self, rawtext, flags)
2789 2795 text, validatehash = r
2790 2796 if validatehash:
2791 2797 self.checkhash(text, node, rev=rev)
2792 2798 if not validated:
2793 2799 self._inner._revisioncache = (node, rev, rawtext)
2794 2800
2795 2801 return text
2796 2802
2797 2803 def _sidedata(self, rev):
2798 2804 """Return the sidedata for a given revision number."""
2799 2805 if self._sidedatafile is None:
2800 2806 return {}
2801 2807 sidedata_end = None
2802 2808 if self._docket is not None:
2803 2809 sidedata_end = self._docket.sidedata_end
2804 2810 return self._inner.sidedata(rev, sidedata_end)
2805 2811
2806 2812 def rawdata(self, nodeorrev):
2807 2813 """return an uncompressed raw data of a given node or revision number."""
2808 2814 return self._revisiondata(nodeorrev, raw=True)
2809 2815
2810 2816 def hash(self, text, p1, p2):
2811 2817 """Compute a node hash.
2812 2818
2813 2819 Available as a function so that subclasses can replace the hash
2814 2820 as needed.
2815 2821 """
2816 2822 return storageutil.hashrevisionsha1(text, p1, p2)
2817 2823
2818 2824 def checkhash(self, text, node, p1=None, p2=None, rev=None):
2819 2825 """Check node hash integrity.
2820 2826
2821 2827 Available as a function so that subclasses can extend hash mismatch
2822 2828 behaviors as needed.
2823 2829 """
2824 2830 try:
2825 2831 if p1 is None and p2 is None:
2826 2832 p1, p2 = self.parents(node)
2827 2833 if node != self.hash(text, p1, p2):
2828 2834 # Clear the revision cache on hash failure. The revision cache
2829 2835 # only stores the raw revision and clearing the cache does have
2830 2836 # the side-effect that we won't have a cache hit when the raw
2831 2837 # revision data is accessed. But this case should be rare and
2832 2838 # it is extra work to teach the cache about the hash
2833 2839 # verification state.
2834 2840 if (
2835 2841 self._inner._revisioncache
2836 2842 and self._inner._revisioncache[0] == node
2837 2843 ):
2838 2844 self._inner._revisioncache = None
2839 2845
2840 2846 revornode = rev
2841 2847 if revornode is None:
2842 2848 revornode = templatefilters.short(hex(node))
2843 2849 raise error.RevlogError(
2844 2850 _(b"integrity check failed on %s:%s")
2845 2851 % (self.display_id, pycompat.bytestr(revornode))
2846 2852 )
2847 2853 except error.RevlogError:
2848 2854 if self.feature_config.censorable and storageutil.iscensoredtext(
2849 2855 text
2850 2856 ):
2851 2857 raise error.CensoredNodeError(self.display_id, node, text)
2852 2858 raise
2853 2859
2854 2860 @property
2855 2861 def _split_index_file(self):
2856 2862 """the path where to expect the index of an ongoing splitting operation
2857 2863
2858 2864 The file will only exist if a splitting operation is in progress, but
2859 2865 it is always expected at the same location."""
2860 2866 parts = self.radix.split(b'/')
2861 2867 if len(parts) > 1:
2862 2868 # adds a '-s' prefix to the ``data/` or `meta/` base
2863 2869 head = parts[0] + b'-s'
2864 2870 mids = parts[1:-1]
2865 2871 tail = parts[-1] + b'.i'
2866 2872 pieces = [head] + mids + [tail]
2867 2873 return b'/'.join(pieces)
2868 2874 else:
2869 2875 # the revlog is stored at the root of the store (changelog or
2870 2876 # manifest), no risk of collision.
2871 2877 return self.radix + b'.i.s'
2872 2878
2873 2879 def _enforceinlinesize(self, tr):
2874 2880 """Check if the revlog is too big for inline and convert if so.
2875 2881
2876 2882 This should be called after revisions are added to the revlog. If the
2877 2883 revlog has grown too large to be an inline revlog, it will convert it
2878 2884 to use multiple index and data files.
2879 2885 """
2880 2886 tiprev = len(self) - 1
2881 2887 total_size = self.start(tiprev) + self.length(tiprev)
2882 2888 if not self._inline or (self._may_inline and total_size < _maxinline):
2883 2889 return
2884 2890
2885 2891 if self._docket is not None:
2886 2892 msg = b"inline revlog should not have a docket"
2887 2893 raise error.ProgrammingError(msg)
2888 2894
2889 2895 # In the common case, we enforce inline size because the revlog has
2890 2896 # been appened too. And in such case, it must have an initial offset
2891 2897 # recorded in the transaction.
2892 2898 troffset = tr.findoffset(self._inner.canonical_index_file)
2893 2899 pre_touched = troffset is not None
2894 2900 if not pre_touched and self.target[0] != KIND_CHANGELOG:
2895 2901 raise error.RevlogError(
2896 2902 _(b"%s not found in the transaction") % self._indexfile
2897 2903 )
2898 2904
2899 2905 tr.addbackup(self._inner.canonical_index_file, for_offset=pre_touched)
2900 2906 tr.add(self._datafile, 0)
2901 2907
2902 2908 new_index_file_path = None
2903 2909 old_index_file_path = self._indexfile
2904 2910 new_index_file_path = self._split_index_file
2905 2911 opener = self.opener
2906 2912 weak_self = weakref.ref(self)
2907 2913
2908 2914 # the "split" index replace the real index when the transaction is
2909 2915 # finalized
2910 2916 def finalize_callback(tr):
2911 2917 opener.rename(
2912 2918 new_index_file_path,
2913 2919 old_index_file_path,
2914 2920 checkambig=True,
2915 2921 )
2916 2922 maybe_self = weak_self()
2917 2923 if maybe_self is not None:
2918 2924 maybe_self._indexfile = old_index_file_path
2919 2925 maybe_self._inner.index_file = maybe_self._indexfile
2920 2926
2921 2927 def abort_callback(tr):
2922 2928 maybe_self = weak_self()
2923 2929 if maybe_self is not None:
2924 2930 maybe_self._indexfile = old_index_file_path
2925 2931 maybe_self._inner.inline = True
2926 2932 maybe_self._inner.index_file = old_index_file_path
2927 2933
2928 2934 tr.registertmp(new_index_file_path)
2929 2935 # we use 001 here to make this this happens after the finalisation of
2930 2936 # pending changelog write (using 000). Otherwise the two finalizer
2931 2937 # would step over each other and delete the changelog.i file.
2932 2938 if self.target[1] is not None:
2933 2939 callback_id = b'001-revlog-split-%d-%s' % self.target
2934 2940 else:
2935 2941 callback_id = b'001-revlog-split-%d' % self.target[0]
2936 2942 tr.addfinalize(callback_id, finalize_callback)
2937 2943 tr.addabort(callback_id, abort_callback)
2938 2944
2939 2945 self._format_flags &= ~FLAG_INLINE_DATA
2940 2946 self._inner.split_inline(
2941 2947 tr,
2942 2948 self._format_flags | self._format_version,
2943 2949 new_index_file_path=new_index_file_path,
2944 2950 )
2945 2951
2946 2952 self._inline = False
2947 2953 if new_index_file_path is not None:
2948 2954 self._indexfile = new_index_file_path
2949 2955
2950 2956 nodemaputil.setup_persistent_nodemap(tr, self)
2951 2957
2952 2958 def _nodeduplicatecallback(self, transaction, node):
2953 2959 """called when trying to add a node already stored."""
2954 2960
2955 2961 @contextlib.contextmanager
2956 2962 def reading(self):
2957 2963 with self._inner.reading():
2958 2964 yield
2959 2965
2960 2966 @contextlib.contextmanager
2961 2967 def _writing(self, transaction):
2962 2968 if self._trypending:
2963 2969 msg = b'try to write in a `trypending` revlog: %s'
2964 2970 msg %= self.display_id
2965 2971 raise error.ProgrammingError(msg)
2966 2972 if self._inner.is_writing:
2967 2973 yield
2968 2974 else:
2969 2975 data_end = None
2970 2976 sidedata_end = None
2971 2977 if self._docket is not None:
2972 2978 data_end = self._docket.data_end
2973 2979 sidedata_end = self._docket.sidedata_end
2974 2980 with self._inner.writing(
2975 2981 transaction,
2976 2982 data_end=data_end,
2977 2983 sidedata_end=sidedata_end,
2978 2984 ):
2979 2985 yield
2980 2986 if self._docket is not None:
2981 2987 self._write_docket(transaction)
2982 2988
2983 2989 @property
2984 2990 def is_delaying(self):
2985 2991 return self._inner.is_delaying
2986 2992
2987 2993 def _write_docket(self, transaction):
2988 2994 """write the current docket on disk
2989 2995
2990 2996 Exist as a method to help changelog to implement transaction logic
2991 2997
2992 2998 We could also imagine using the same transaction logic for all revlog
2993 2999 since docket are cheap."""
2994 3000 self._docket.write(transaction)
2995 3001
2996 3002 def addrevision(
2997 3003 self,
2998 3004 text,
2999 3005 transaction,
3000 3006 link,
3001 3007 p1,
3002 3008 p2,
3003 3009 cachedelta=None,
3004 3010 node=None,
3005 3011 flags=REVIDX_DEFAULT_FLAGS,
3006 3012 deltacomputer=None,
3007 3013 sidedata=None,
3008 3014 ):
3009 3015 """add a revision to the log
3010 3016
3011 3017 text - the revision data to add
3012 3018 transaction - the transaction object used for rollback
3013 3019 link - the linkrev data to add
3014 3020 p1, p2 - the parent nodeids of the revision
3015 3021 cachedelta - an optional precomputed delta
3016 3022 node - nodeid of revision; typically node is not specified, and it is
3017 3023 computed by default as hash(text, p1, p2), however subclasses might
3018 3024 use different hashing method (and override checkhash() in such case)
3019 3025 flags - the known flags to set on the revision
3020 3026 deltacomputer - an optional deltacomputer instance shared between
3021 3027 multiple calls
3022 3028 """
3023 3029 if link == nullrev:
3024 3030 raise error.RevlogError(
3025 3031 _(b"attempted to add linkrev -1 to %s") % self.display_id
3026 3032 )
3027 3033
3028 3034 if sidedata is None:
3029 3035 sidedata = {}
3030 3036 elif sidedata and not self.feature_config.has_side_data:
3031 3037 raise error.ProgrammingError(
3032 3038 _(b"trying to add sidedata to a revlog who don't support them")
3033 3039 )
3034 3040
3035 3041 if flags:
3036 3042 node = node or self.hash(text, p1, p2)
3037 3043
3038 3044 rawtext, validatehash = flagutil.processflagswrite(self, text, flags)
3039 3045
3040 3046 # If the flag processor modifies the revision data, ignore any provided
3041 3047 # cachedelta.
3042 3048 if rawtext != text:
3043 3049 cachedelta = None
3044 3050
3045 3051 if len(rawtext) > _maxentrysize:
3046 3052 raise error.RevlogError(
3047 3053 _(
3048 3054 b"%s: size of %d bytes exceeds maximum revlog storage of 2GiB"
3049 3055 )
3050 3056 % (self.display_id, len(rawtext))
3051 3057 )
3052 3058
3053 3059 node = node or self.hash(rawtext, p1, p2)
3054 3060 rev = self.index.get_rev(node)
3055 3061 if rev is not None:
3056 3062 return rev
3057 3063
3058 3064 if validatehash:
3059 3065 self.checkhash(rawtext, node, p1=p1, p2=p2)
3060 3066
3061 3067 return self.addrawrevision(
3062 3068 rawtext,
3063 3069 transaction,
3064 3070 link,
3065 3071 p1,
3066 3072 p2,
3067 3073 node,
3068 3074 flags,
3069 3075 cachedelta=cachedelta,
3070 3076 deltacomputer=deltacomputer,
3071 3077 sidedata=sidedata,
3072 3078 )
3073 3079
3074 3080 def addrawrevision(
3075 3081 self,
3076 3082 rawtext,
3077 3083 transaction,
3078 3084 link,
3079 3085 p1,
3080 3086 p2,
3081 3087 node,
3082 3088 flags,
3083 3089 cachedelta=None,
3084 3090 deltacomputer=None,
3085 3091 sidedata=None,
3086 3092 ):
3087 3093 """add a raw revision with known flags, node and parents
3088 3094 useful when reusing a revision not stored in this revlog (ex: received
3089 3095 over wire, or read from an external bundle).
3090 3096 """
3091 3097 with self._writing(transaction):
3092 3098 return self._addrevision(
3093 3099 node,
3094 3100 rawtext,
3095 3101 transaction,
3096 3102 link,
3097 3103 p1,
3098 3104 p2,
3099 3105 flags,
3100 3106 cachedelta,
3101 3107 deltacomputer=deltacomputer,
3102 3108 sidedata=sidedata,
3103 3109 )
3104 3110
3105 3111 def compress(self, data: bytes) -> Tuple[bytes, bytes]:
3106 3112 return self._inner.compress(data)
3107 3113
3108 3114 def decompress(self, data):
3109 3115 return self._inner.decompress(data)
3110 3116
3111 3117 def _addrevision(
3112 3118 self,
3113 3119 node,
3114 3120 rawtext,
3115 3121 transaction,
3116 3122 link,
3117 3123 p1,
3118 3124 p2,
3119 3125 flags,
3120 3126 cachedelta,
3121 3127 alwayscache=False,
3122 3128 deltacomputer=None,
3123 3129 sidedata=None,
3124 3130 ):
3125 3131 """internal function to add revisions to the log
3126 3132
3127 3133 see addrevision for argument descriptions.
3128 3134
3129 3135 note: "addrevision" takes non-raw text, "_addrevision" takes raw text.
3130 3136
3131 3137 if "deltacomputer" is not provided or None, a defaultdeltacomputer will
3132 3138 be used.
3133 3139
3134 3140 invariants:
3135 3141 - rawtext is optional (can be None); if not set, cachedelta must be set.
3136 3142 if both are set, they must correspond to each other.
3137 3143 """
3138 3144 if node == self.nullid:
3139 3145 raise error.RevlogError(
3140 3146 _(b"%s: attempt to add null revision") % self.display_id
3141 3147 )
3142 3148 if (
3143 3149 node == self.nodeconstants.wdirid
3144 3150 or node in self.nodeconstants.wdirfilenodeids
3145 3151 ):
3146 3152 raise error.RevlogError(
3147 3153 _(b"%s: attempt to add wdir revision") % self.display_id
3148 3154 )
3149 3155 if not self._inner.is_writing:
3150 3156 msg = b'adding revision outside `revlog._writing` context'
3151 3157 raise error.ProgrammingError(msg)
3152 3158
3153 3159 btext = [rawtext]
3154 3160
3155 3161 curr = len(self)
3156 3162 prev = curr - 1
3157 3163
3158 3164 offset = self._get_data_offset(prev)
3159 3165
3160 3166 if self._concurrencychecker:
3161 3167 ifh, dfh, sdfh = self._inner._writinghandles
3162 3168 # XXX no checking for the sidedata file
3163 3169 if self._inline:
3164 3170 # offset is "as if" it were in the .d file, so we need to add on
3165 3171 # the size of the entry metadata.
3166 3172 self._concurrencychecker(
3167 3173 ifh, self._indexfile, offset + curr * self.index.entry_size
3168 3174 )
3169 3175 else:
3170 3176 # Entries in the .i are a consistent size.
3171 3177 self._concurrencychecker(
3172 3178 ifh, self._indexfile, curr * self.index.entry_size
3173 3179 )
3174 3180 self._concurrencychecker(dfh, self._datafile, offset)
3175 3181
3176 3182 p1r, p2r = self.rev(p1), self.rev(p2)
3177 3183
3178 3184 # full versions are inserted when the needed deltas
3179 3185 # become comparable to the uncompressed text
3180 3186 if rawtext is None:
3181 3187 # need rawtext size, before changed by flag processors, which is
3182 3188 # the non-raw size. use revlog explicitly to avoid filelog's extra
3183 3189 # logic that might remove metadata size.
3184 3190 textlen = mdiff.patchedsize(
3185 3191 revlog.size(self, cachedelta[0]), cachedelta[1]
3186 3192 )
3187 3193 else:
3188 3194 textlen = len(rawtext)
3189 3195
3190 3196 if deltacomputer is None:
3191 3197 write_debug = None
3192 3198 if self.delta_config.debug_delta:
3193 3199 write_debug = transaction._report
3194 3200 deltacomputer = deltautil.deltacomputer(
3195 3201 self, write_debug=write_debug
3196 3202 )
3197 3203
3198 3204 if cachedelta is not None and len(cachedelta) == 2:
3199 3205 # If the cached delta has no information about how it should be
3200 3206 # reused, add the default reuse instruction according to the
3201 3207 # revlog's configuration.
3202 3208 if (
3203 3209 self.delta_config.general_delta
3204 3210 and self.delta_config.lazy_delta_base
3205 3211 ):
3206 3212 delta_base_reuse = DELTA_BASE_REUSE_TRY
3207 3213 else:
3208 3214 delta_base_reuse = DELTA_BASE_REUSE_NO
3209 3215 cachedelta = (cachedelta[0], cachedelta[1], delta_base_reuse)
3210 3216
3211 3217 revinfo = revlogutils.revisioninfo(
3212 3218 node,
3213 3219 p1,
3214 3220 p2,
3215 3221 btext,
3216 3222 textlen,
3217 3223 cachedelta,
3218 3224 flags,
3219 3225 )
3220 3226
3221 3227 deltainfo = deltacomputer.finddeltainfo(revinfo)
3222 3228
3223 3229 compression_mode = COMP_MODE_INLINE
3224 3230 if self._docket is not None:
3225 3231 default_comp = self._docket.default_compression_header
3226 3232 r = deltautil.delta_compression(default_comp, deltainfo)
3227 3233 compression_mode, deltainfo = r
3228 3234
3229 3235 sidedata_compression_mode = COMP_MODE_INLINE
3230 3236 if sidedata and self.feature_config.has_side_data:
3231 3237 sidedata_compression_mode = COMP_MODE_PLAIN
3232 3238 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
3233 3239 sidedata_offset = self._docket.sidedata_end
3234 3240 h, comp_sidedata = self._inner.compress(serialized_sidedata)
3235 3241 if (
3236 3242 h != b'u'
3237 3243 and comp_sidedata[0:1] != b'\0'
3238 3244 and len(comp_sidedata) < len(serialized_sidedata)
3239 3245 ):
3240 3246 assert not h
3241 3247 if (
3242 3248 comp_sidedata[0:1]
3243 3249 == self._docket.default_compression_header
3244 3250 ):
3245 3251 sidedata_compression_mode = COMP_MODE_DEFAULT
3246 3252 serialized_sidedata = comp_sidedata
3247 3253 else:
3248 3254 sidedata_compression_mode = COMP_MODE_INLINE
3249 3255 serialized_sidedata = comp_sidedata
3250 3256 else:
3251 3257 serialized_sidedata = b""
3252 3258 # Don't store the offset if the sidedata is empty, that way
3253 3259 # we can easily detect empty sidedata and they will be no different
3254 3260 # than ones we manually add.
3255 3261 sidedata_offset = 0
3256 3262
3257 3263 rank = RANK_UNKNOWN
3258 3264 if self.feature_config.compute_rank:
3259 3265 if (p1r, p2r) == (nullrev, nullrev):
3260 3266 rank = 1
3261 3267 elif p1r != nullrev and p2r == nullrev:
3262 3268 rank = 1 + self.fast_rank(p1r)
3263 3269 elif p1r == nullrev and p2r != nullrev:
3264 3270 rank = 1 + self.fast_rank(p2r)
3265 3271 else: # merge node
3266 3272 if rustdagop is not None and self.index.rust_ext_compat:
3267 3273 rank = rustdagop.rank(self.index, p1r, p2r)
3268 3274 else:
3269 3275 pmin, pmax = sorted((p1r, p2r))
3270 3276 rank = 1 + self.fast_rank(pmax)
3271 3277 rank += sum(1 for _ in self.findmissingrevs([pmax], [pmin]))
3272 3278
3273 3279 e = revlogutils.entry(
3274 3280 flags=flags,
3275 3281 data_offset=offset,
3276 3282 data_compressed_length=deltainfo.deltalen,
3277 3283 data_uncompressed_length=textlen,
3278 3284 data_compression_mode=compression_mode,
3279 3285 data_delta_base=deltainfo.base,
3280 3286 link_rev=link,
3281 3287 parent_rev_1=p1r,
3282 3288 parent_rev_2=p2r,
3283 3289 node_id=node,
3284 3290 sidedata_offset=sidedata_offset,
3285 3291 sidedata_compressed_length=len(serialized_sidedata),
3286 3292 sidedata_compression_mode=sidedata_compression_mode,
3287 3293 rank=rank,
3288 3294 )
3289 3295
3290 3296 self.index.append(e)
3291 3297 entry = self.index.entry_binary(curr)
3292 3298 if curr == 0 and self._docket is None:
3293 3299 header = self._format_flags | self._format_version
3294 3300 header = self.index.pack_header(header)
3295 3301 entry = header + entry
3296 3302 self._writeentry(
3297 3303 transaction,
3298 3304 entry,
3299 3305 deltainfo.data,
3300 3306 link,
3301 3307 offset,
3302 3308 serialized_sidedata,
3303 3309 sidedata_offset,
3304 3310 )
3305 3311
3306 3312 rawtext = btext[0]
3307 3313
3308 3314 if alwayscache and rawtext is None:
3309 3315 rawtext = deltacomputer.buildtext(revinfo)
3310 3316
3311 3317 if type(rawtext) == bytes: # only accept immutable objects
3312 3318 self._inner._revisioncache = (node, curr, rawtext)
3313 3319 self._chainbasecache[curr] = deltainfo.chainbase
3314 3320 return curr
3315 3321
3316 3322 def _get_data_offset(self, prev):
3317 3323 """Returns the current offset in the (in-transaction) data file.
3318 3324 Versions < 2 of the revlog can get this 0(1), revlog v2 needs a docket
3319 3325 file to store that information: since sidedata can be rewritten to the
3320 3326 end of the data file within a transaction, you can have cases where, for
3321 3327 example, rev `n` does not have sidedata while rev `n - 1` does, leading
3322 3328 to `n - 1`'s sidedata being written after `n`'s data.
3323 3329
3324 3330 TODO cache this in a docket file before getting out of experimental."""
3325 3331 if self._docket is None:
3326 3332 return self.end(prev)
3327 3333 else:
3328 3334 return self._docket.data_end
3329 3335
3330 3336 def _writeentry(
3331 3337 self,
3332 3338 transaction,
3333 3339 entry,
3334 3340 data,
3335 3341 link,
3336 3342 offset,
3337 3343 sidedata,
3338 3344 sidedata_offset,
3339 3345 ):
3340 3346 # Files opened in a+ mode have inconsistent behavior on various
3341 3347 # platforms. Windows requires that a file positioning call be made
3342 3348 # when the file handle transitions between reads and writes. See
3343 3349 # 3686fa2b8eee and the mixedfilemodewrapper in windows.py. On other
3344 3350 # platforms, Python or the platform itself can be buggy. Some versions
3345 3351 # of Solaris have been observed to not append at the end of the file
3346 3352 # if the file was seeked to before the end. See issue4943 for more.
3347 3353 #
3348 3354 # We work around this issue by inserting a seek() before writing.
3349 3355 # Note: This is likely not necessary on Python 3. However, because
3350 3356 # the file handle is reused for reads and may be seeked there, we need
3351 3357 # to be careful before changing this.
3352 3358 index_end = data_end = sidedata_end = None
3353 3359 if self._docket is not None:
3354 3360 index_end = self._docket.index_end
3355 3361 data_end = self._docket.data_end
3356 3362 sidedata_end = self._docket.sidedata_end
3357 3363
3358 3364 files_end = self._inner.write_entry(
3359 3365 transaction,
3360 3366 entry,
3361 3367 data,
3362 3368 link,
3363 3369 offset,
3364 3370 sidedata,
3365 3371 sidedata_offset,
3366 3372 index_end,
3367 3373 data_end,
3368 3374 sidedata_end,
3369 3375 )
3370 3376 self._enforceinlinesize(transaction)
3371 3377 if self._docket is not None:
3372 3378 self._docket.index_end = files_end[0]
3373 3379 self._docket.data_end = files_end[1]
3374 3380 self._docket.sidedata_end = files_end[2]
3375 3381
3376 3382 nodemaputil.setup_persistent_nodemap(transaction, self)
3377 3383
3378 3384 def addgroup(
3379 3385 self,
3380 3386 deltas,
3381 3387 linkmapper,
3382 3388 transaction,
3383 3389 alwayscache=False,
3384 3390 addrevisioncb=None,
3385 3391 duplicaterevisioncb=None,
3386 3392 debug_info=None,
3387 3393 delta_base_reuse_policy=None,
3388 3394 ):
3389 3395 """
3390 3396 add a delta group
3391 3397
3392 3398 given a set of deltas, add them to the revision log. the
3393 3399 first delta is against its parent, which should be in our
3394 3400 log, the rest are against the previous delta.
3395 3401
3396 3402 If ``addrevisioncb`` is defined, it will be called with arguments of
3397 3403 this revlog and the node that was added.
3398 3404 """
3399 3405
3400 3406 if self._adding_group:
3401 3407 raise error.ProgrammingError(b'cannot nest addgroup() calls')
3402 3408
3403 3409 # read the default delta-base reuse policy from revlog config if the
3404 3410 # group did not specify one.
3405 3411 if delta_base_reuse_policy is None:
3406 3412 if (
3407 3413 self.delta_config.general_delta
3408 3414 and self.delta_config.lazy_delta_base
3409 3415 ):
3410 3416 delta_base_reuse_policy = DELTA_BASE_REUSE_TRY
3411 3417 else:
3412 3418 delta_base_reuse_policy = DELTA_BASE_REUSE_NO
3413 3419
3414 3420 self._adding_group = True
3415 3421 empty = True
3416 3422 try:
3417 3423 with self._writing(transaction):
3418 3424 write_debug = None
3419 3425 if self.delta_config.debug_delta:
3420 3426 write_debug = transaction._report
3421 3427 deltacomputer = deltautil.deltacomputer(
3422 3428 self,
3423 3429 write_debug=write_debug,
3424 3430 debug_info=debug_info,
3425 3431 )
3426 3432 # loop through our set of deltas
3427 3433 for data in deltas:
3428 3434 (
3429 3435 node,
3430 3436 p1,
3431 3437 p2,
3432 3438 linknode,
3433 3439 deltabase,
3434 3440 delta,
3435 3441 flags,
3436 3442 sidedata,
3437 3443 ) = data
3438 3444 link = linkmapper(linknode)
3439 3445 flags = flags or REVIDX_DEFAULT_FLAGS
3440 3446
3441 3447 rev = self.index.get_rev(node)
3442 3448 if rev is not None:
3443 3449 # this can happen if two branches make the same change
3444 3450 self._nodeduplicatecallback(transaction, rev)
3445 3451 if duplicaterevisioncb:
3446 3452 duplicaterevisioncb(self, rev)
3447 3453 empty = False
3448 3454 continue
3449 3455
3450 3456 for p in (p1, p2):
3451 3457 if not self.index.has_node(p):
3452 3458 raise error.LookupError(
3453 3459 p, self.radix, _(b'unknown parent')
3454 3460 )
3455 3461
3456 3462 if not self.index.has_node(deltabase):
3457 3463 raise error.LookupError(
3458 3464 deltabase, self.display_id, _(b'unknown delta base')
3459 3465 )
3460 3466
3461 3467 baserev = self.rev(deltabase)
3462 3468
3463 3469 if baserev != nullrev and self.iscensored(baserev):
3464 3470 # if base is censored, delta must be full replacement in a
3465 3471 # single patch operation
3466 3472 hlen = struct.calcsize(b">lll")
3467 3473 oldlen = self.rawsize(baserev)
3468 3474 newlen = len(delta) - hlen
3469 3475 if delta[:hlen] != mdiff.replacediffheader(
3470 3476 oldlen, newlen
3471 3477 ):
3472 3478 raise error.CensoredBaseError(
3473 3479 self.display_id, self.node(baserev)
3474 3480 )
3475 3481
3476 3482 if not flags and self._peek_iscensored(baserev, delta):
3477 3483 flags |= REVIDX_ISCENSORED
3478 3484
3479 3485 # We assume consumers of addrevisioncb will want to retrieve
3480 3486 # the added revision, which will require a call to
3481 3487 # revision(). revision() will fast path if there is a cache
3482 3488 # hit. So, we tell _addrevision() to always cache in this case.
3483 3489 # We're only using addgroup() in the context of changegroup
3484 3490 # generation so the revision data can always be handled as raw
3485 3491 # by the flagprocessor.
3486 3492 rev = self._addrevision(
3487 3493 node,
3488 3494 None,
3489 3495 transaction,
3490 3496 link,
3491 3497 p1,
3492 3498 p2,
3493 3499 flags,
3494 3500 (baserev, delta, delta_base_reuse_policy),
3495 3501 alwayscache=alwayscache,
3496 3502 deltacomputer=deltacomputer,
3497 3503 sidedata=sidedata,
3498 3504 )
3499 3505
3500 3506 if addrevisioncb:
3501 3507 addrevisioncb(self, rev)
3502 3508 empty = False
3503 3509 finally:
3504 3510 self._adding_group = False
3505 3511 return not empty
3506 3512
3507 3513 def iscensored(self, rev):
3508 3514 """Check if a file revision is censored."""
3509 3515 if not self.feature_config.censorable:
3510 3516 return False
3511 3517
3512 3518 return self.flags(rev) & REVIDX_ISCENSORED
3513 3519
3514 3520 def _peek_iscensored(self, baserev, delta):
3515 3521 """Quickly check if a delta produces a censored revision."""
3516 3522 if not self.feature_config.censorable:
3517 3523 return False
3518 3524
3519 3525 return storageutil.deltaiscensored(delta, baserev, self.rawsize)
3520 3526
3521 3527 def getstrippoint(self, minlink):
3522 3528 """find the minimum rev that must be stripped to strip the linkrev
3523 3529
3524 3530 Returns a tuple containing the minimum rev and a set of all revs that
3525 3531 have linkrevs that will be broken by this strip.
3526 3532 """
3527 3533 return storageutil.resolvestripinfo(
3528 3534 minlink,
3529 3535 len(self) - 1,
3530 3536 self.headrevs(),
3531 3537 self.linkrev,
3532 3538 self.parentrevs,
3533 3539 )
3534 3540
3535 3541 def strip(self, minlink, transaction):
3536 3542 """truncate the revlog on the first revision with a linkrev >= minlink
3537 3543
3538 3544 This function is called when we're stripping revision minlink and
3539 3545 its descendants from the repository.
3540 3546
3541 3547 We have to remove all revisions with linkrev >= minlink, because
3542 3548 the equivalent changelog revisions will be renumbered after the
3543 3549 strip.
3544 3550
3545 3551 So we truncate the revlog on the first of these revisions, and
3546 3552 trust that the caller has saved the revisions that shouldn't be
3547 3553 removed and that it'll re-add them after this truncation.
3548 3554 """
3549 3555 if len(self) == 0:
3550 3556 return
3551 3557
3552 3558 rev, _ = self.getstrippoint(minlink)
3553 3559 if rev == len(self):
3554 3560 return
3555 3561
3556 3562 # first truncate the files on disk
3557 3563 data_end = self.start(rev)
3558 3564 if not self._inline:
3559 3565 transaction.add(self._datafile, data_end)
3560 3566 end = rev * self.index.entry_size
3561 3567 else:
3562 3568 end = data_end + (rev * self.index.entry_size)
3563 3569
3564 3570 if self._sidedatafile:
3565 3571 sidedata_end = self.sidedata_cut_off(rev)
3566 3572 transaction.add(self._sidedatafile, sidedata_end)
3567 3573
3568 3574 transaction.add(self._indexfile, end)
3569 3575 if self._docket is not None:
3570 3576 # XXX we could, leverage the docket while stripping. However it is
3571 3577 # not powerfull enough at the time of this comment
3572 3578 self._docket.index_end = end
3573 3579 self._docket.data_end = data_end
3574 3580 self._docket.sidedata_end = sidedata_end
3575 3581 self._docket.write(transaction, stripping=True)
3576 3582
3577 3583 # then reset internal state in memory to forget those revisions
3578 3584 self._chaininfocache = util.lrucachedict(500)
3579 3585 self._inner.clear_cache()
3580 3586
3581 3587 del self.index[rev:-1]
3582 3588
3583 3589 def checksize(self):
3584 3590 """Check size of index and data files
3585 3591
3586 3592 return a (dd, di) tuple.
3587 3593 - dd: extra bytes for the "data" file
3588 3594 - di: extra bytes for the "index" file
3589 3595
3590 3596 A healthy revlog will return (0, 0).
3591 3597 """
3592 3598 expected = 0
3593 3599 if len(self):
3594 3600 expected = max(0, self.end(len(self) - 1))
3595 3601
3596 3602 try:
3597 3603 with self._datafp() as f:
3598 3604 f.seek(0, io.SEEK_END)
3599 3605 actual = f.tell()
3600 3606 dd = actual - expected
3601 3607 except FileNotFoundError:
3602 3608 dd = 0
3603 3609
3604 3610 try:
3605 3611 f = self.opener(self._indexfile)
3606 3612 f.seek(0, io.SEEK_END)
3607 3613 actual = f.tell()
3608 3614 f.close()
3609 3615 s = self.index.entry_size
3610 3616 i = max(0, actual // s)
3611 3617 di = actual - (i * s)
3612 3618 if self._inline:
3613 3619 databytes = 0
3614 3620 for r in self:
3615 3621 databytes += max(0, self.length(r))
3616 3622 dd = 0
3617 3623 di = actual - len(self) * s - databytes
3618 3624 except FileNotFoundError:
3619 3625 di = 0
3620 3626
3621 3627 return (dd, di)
3622 3628
3623 3629 def files(self):
3624 3630 """return list of files that compose this revlog"""
3625 3631 res = [self._indexfile]
3626 3632 if self._docket_file is None:
3627 3633 if not self._inline:
3628 3634 res.append(self._datafile)
3629 3635 else:
3630 3636 res.append(self._docket_file)
3631 3637 res.extend(self._docket.old_index_filepaths(include_empty=False))
3632 3638 if self._docket.data_end:
3633 3639 res.append(self._datafile)
3634 3640 res.extend(self._docket.old_data_filepaths(include_empty=False))
3635 3641 if self._docket.sidedata_end:
3636 3642 res.append(self._sidedatafile)
3637 3643 res.extend(self._docket.old_sidedata_filepaths(include_empty=False))
3638 3644 return res
3639 3645
3640 3646 def emitrevisions(
3641 3647 self,
3642 3648 nodes,
3643 3649 nodesorder=None,
3644 3650 revisiondata=False,
3645 3651 assumehaveparentrevisions=False,
3646 3652 deltamode=repository.CG_DELTAMODE_STD,
3647 3653 sidedata_helpers=None,
3648 3654 debug_info=None,
3649 3655 ):
3650 3656 if nodesorder not in (b'nodes', b'storage', b'linear', None):
3651 3657 raise error.ProgrammingError(
3652 3658 b'unhandled value for nodesorder: %s' % nodesorder
3653 3659 )
3654 3660
3655 3661 if nodesorder is None and not self.delta_config.general_delta:
3656 3662 nodesorder = b'storage'
3657 3663
3658 3664 if (
3659 3665 not self._storedeltachains
3660 3666 and deltamode != repository.CG_DELTAMODE_PREV
3661 3667 ):
3662 3668 deltamode = repository.CG_DELTAMODE_FULL
3663 3669
3664 3670 return storageutil.emitrevisions(
3665 3671 self,
3666 3672 nodes,
3667 3673 nodesorder,
3668 3674 revlogrevisiondelta,
3669 3675 deltaparentfn=self.deltaparent,
3670 3676 candeltafn=self._candelta,
3671 3677 rawsizefn=self.rawsize,
3672 3678 revdifffn=self.revdiff,
3673 3679 flagsfn=self.flags,
3674 3680 deltamode=deltamode,
3675 3681 revisiondata=revisiondata,
3676 3682 assumehaveparentrevisions=assumehaveparentrevisions,
3677 3683 sidedata_helpers=sidedata_helpers,
3678 3684 debug_info=debug_info,
3679 3685 )
3680 3686
3681 3687 DELTAREUSEALWAYS = b'always'
3682 3688 DELTAREUSESAMEREVS = b'samerevs'
3683 3689 DELTAREUSENEVER = b'never'
3684 3690
3685 3691 DELTAREUSEFULLADD = b'fulladd'
3686 3692
3687 3693 DELTAREUSEALL = {b'always', b'samerevs', b'never', b'fulladd'}
3688 3694
3689 3695 def clone(
3690 3696 self,
3691 3697 tr,
3692 3698 destrevlog,
3693 3699 addrevisioncb=None,
3694 3700 deltareuse=DELTAREUSESAMEREVS,
3695 3701 forcedeltabothparents=None,
3696 3702 sidedata_helpers=None,
3697 3703 ):
3698 3704 """Copy this revlog to another, possibly with format changes.
3699 3705
3700 3706 The destination revlog will contain the same revisions and nodes.
3701 3707 However, it may not be bit-for-bit identical due to e.g. delta encoding
3702 3708 differences.
3703 3709
3704 3710 The ``deltareuse`` argument control how deltas from the existing revlog
3705 3711 are preserved in the destination revlog. The argument can have the
3706 3712 following values:
3707 3713
3708 3714 DELTAREUSEALWAYS
3709 3715 Deltas will always be reused (if possible), even if the destination
3710 3716 revlog would not select the same revisions for the delta. This is the
3711 3717 fastest mode of operation.
3712 3718 DELTAREUSESAMEREVS
3713 3719 Deltas will be reused if the destination revlog would pick the same
3714 3720 revisions for the delta. This mode strikes a balance between speed
3715 3721 and optimization.
3716 3722 DELTAREUSENEVER
3717 3723 Deltas will never be reused. This is the slowest mode of execution.
3718 3724 This mode can be used to recompute deltas (e.g. if the diff/delta
3719 3725 algorithm changes).
3720 3726 DELTAREUSEFULLADD
3721 3727 Revision will be re-added as if their were new content. This is
3722 3728 slower than DELTAREUSEALWAYS but allow more mechanism to kicks in.
3723 3729 eg: large file detection and handling.
3724 3730
3725 3731 Delta computation can be slow, so the choice of delta reuse policy can
3726 3732 significantly affect run time.
3727 3733
3728 3734 The default policy (``DELTAREUSESAMEREVS``) strikes a balance between
3729 3735 two extremes. Deltas will be reused if they are appropriate. But if the
3730 3736 delta could choose a better revision, it will do so. This means if you
3731 3737 are converting a non-generaldelta revlog to a generaldelta revlog,
3732 3738 deltas will be recomputed if the delta's parent isn't a parent of the
3733 3739 revision.
3734 3740
3735 3741 In addition to the delta policy, the ``forcedeltabothparents``
3736 3742 argument controls whether to force compute deltas against both parents
3737 3743 for merges. By default, the current default is used.
3738 3744
3739 3745 See `revlogutil.sidedata.get_sidedata_helpers` for the doc on
3740 3746 `sidedata_helpers`.
3741 3747 """
3742 3748 if deltareuse not in self.DELTAREUSEALL:
3743 3749 raise ValueError(
3744 3750 _(b'value for deltareuse invalid: %s') % deltareuse
3745 3751 )
3746 3752
3747 3753 if len(destrevlog):
3748 3754 raise ValueError(_(b'destination revlog is not empty'))
3749 3755
3750 3756 if getattr(self, 'filteredrevs', None):
3751 3757 raise ValueError(_(b'source revlog has filtered revisions'))
3752 3758 if getattr(destrevlog, 'filteredrevs', None):
3753 3759 raise ValueError(_(b'destination revlog has filtered revisions'))
3754 3760
3755 3761 # lazydelta and lazydeltabase controls whether to reuse a cached delta,
3756 3762 # if possible.
3757 3763 old_delta_config = destrevlog.delta_config
3758 3764 destrevlog.delta_config = destrevlog.delta_config.copy()
3759 3765
3760 3766 try:
3761 3767 if deltareuse == self.DELTAREUSEALWAYS:
3762 3768 destrevlog.delta_config.lazy_delta_base = True
3763 3769 destrevlog.delta_config.lazy_delta = True
3764 3770 elif deltareuse == self.DELTAREUSESAMEREVS:
3765 3771 destrevlog.delta_config.lazy_delta_base = False
3766 3772 destrevlog.delta_config.lazy_delta = True
3767 3773 elif deltareuse == self.DELTAREUSENEVER:
3768 3774 destrevlog.delta_config.lazy_delta_base = False
3769 3775 destrevlog.delta_config.lazy_delta = False
3770 3776
3771 3777 delta_both_parents = (
3772 3778 forcedeltabothparents or old_delta_config.delta_both_parents
3773 3779 )
3774 3780 destrevlog.delta_config.delta_both_parents = delta_both_parents
3775 3781
3776 3782 with self.reading(), destrevlog._writing(tr):
3777 3783 self._clone(
3778 3784 tr,
3779 3785 destrevlog,
3780 3786 addrevisioncb,
3781 3787 deltareuse,
3782 3788 forcedeltabothparents,
3783 3789 sidedata_helpers,
3784 3790 )
3785 3791
3786 3792 finally:
3787 3793 destrevlog.delta_config = old_delta_config
3788 3794
3789 3795 def _clone(
3790 3796 self,
3791 3797 tr,
3792 3798 destrevlog,
3793 3799 addrevisioncb,
3794 3800 deltareuse,
3795 3801 forcedeltabothparents,
3796 3802 sidedata_helpers,
3797 3803 ):
3798 3804 """perform the core duty of `revlog.clone` after parameter processing"""
3799 3805 write_debug = None
3800 3806 if self.delta_config.debug_delta:
3801 3807 write_debug = tr._report
3802 3808 deltacomputer = deltautil.deltacomputer(
3803 3809 destrevlog,
3804 3810 write_debug=write_debug,
3805 3811 )
3806 3812 index = self.index
3807 3813 for rev in self:
3808 3814 entry = index[rev]
3809 3815
3810 3816 # Some classes override linkrev to take filtered revs into
3811 3817 # account. Use raw entry from index.
3812 3818 flags = entry[0] & 0xFFFF
3813 3819 linkrev = entry[4]
3814 3820 p1 = index[entry[5]][7]
3815 3821 p2 = index[entry[6]][7]
3816 3822 node = entry[7]
3817 3823
3818 3824 # (Possibly) reuse the delta from the revlog if allowed and
3819 3825 # the revlog chunk is a delta.
3820 3826 cachedelta = None
3821 3827 rawtext = None
3822 3828 if deltareuse == self.DELTAREUSEFULLADD:
3823 3829 text = self._revisiondata(rev)
3824 3830 sidedata = self.sidedata(rev)
3825 3831
3826 3832 if sidedata_helpers is not None:
3827 3833 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3828 3834 self, sidedata_helpers, sidedata, rev
3829 3835 )
3830 3836 flags = flags | new_flags[0] & ~new_flags[1]
3831 3837
3832 3838 destrevlog.addrevision(
3833 3839 text,
3834 3840 tr,
3835 3841 linkrev,
3836 3842 p1,
3837 3843 p2,
3838 3844 cachedelta=cachedelta,
3839 3845 node=node,
3840 3846 flags=flags,
3841 3847 deltacomputer=deltacomputer,
3842 3848 sidedata=sidedata,
3843 3849 )
3844 3850 else:
3845 3851 if destrevlog.delta_config.lazy_delta:
3846 3852 dp = self.deltaparent(rev)
3847 3853 if dp != nullrev:
3848 3854 cachedelta = (dp, bytes(self._inner._chunk(rev)))
3849 3855
3850 3856 sidedata = None
3851 3857 if not cachedelta:
3852 3858 try:
3853 3859 rawtext = self._revisiondata(rev)
3854 3860 except error.CensoredNodeError as censored:
3855 3861 assert flags & REVIDX_ISCENSORED
3856 3862 rawtext = censored.tombstone
3857 3863 sidedata = self.sidedata(rev)
3858 3864 if sidedata is None:
3859 3865 sidedata = self.sidedata(rev)
3860 3866
3861 3867 if sidedata_helpers is not None:
3862 3868 (sidedata, new_flags) = sidedatautil.run_sidedata_helpers(
3863 3869 self, sidedata_helpers, sidedata, rev
3864 3870 )
3865 3871 flags = flags | new_flags[0] & ~new_flags[1]
3866 3872
3867 3873 destrevlog._addrevision(
3868 3874 node,
3869 3875 rawtext,
3870 3876 tr,
3871 3877 linkrev,
3872 3878 p1,
3873 3879 p2,
3874 3880 flags,
3875 3881 cachedelta,
3876 3882 deltacomputer=deltacomputer,
3877 3883 sidedata=sidedata,
3878 3884 )
3879 3885
3880 3886 if addrevisioncb:
3881 3887 addrevisioncb(self, rev, node)
3882 3888
3883 3889 def censorrevision(self, tr, censor_nodes, tombstone=b''):
3884 3890 if self._format_version == REVLOGV0:
3885 3891 raise error.RevlogError(
3886 3892 _(b'cannot censor with version %d revlogs')
3887 3893 % self._format_version
3888 3894 )
3889 3895 elif self._format_version == REVLOGV1:
3890 3896 rewrite.v1_censor(self, tr, censor_nodes, tombstone)
3891 3897 else:
3892 3898 rewrite.v2_censor(self, tr, censor_nodes, tombstone)
3893 3899
3894 3900 def verifyintegrity(self, state) -> Iterable[RevLogProblem]:
3895 3901 """Verifies the integrity of the revlog.
3896 3902
3897 3903 Yields ``revlogproblem`` instances describing problems that are
3898 3904 found.
3899 3905 """
3900 3906 dd, di = self.checksize()
3901 3907 if dd:
3902 3908 yield revlogproblem(error=_(b'data length off by %d bytes') % dd)
3903 3909 if di:
3904 3910 yield revlogproblem(error=_(b'index contains %d extra bytes') % di)
3905 3911
3906 3912 version = self._format_version
3907 3913
3908 3914 # The verifier tells us what version revlog we should be.
3909 3915 if version != state[b'expectedversion']:
3910 3916 yield revlogproblem(
3911 3917 warning=_(b"warning: '%s' uses revlog format %d; expected %d")
3912 3918 % (self.display_id, version, state[b'expectedversion'])
3913 3919 )
3914 3920
3915 3921 state[b'skipread'] = set()
3916 3922 state[b'safe_renamed'] = set()
3917 3923
3918 3924 for rev in self:
3919 3925 node = self.node(rev)
3920 3926
3921 3927 # Verify contents. 4 cases to care about:
3922 3928 #
3923 3929 # common: the most common case
3924 3930 # rename: with a rename
3925 3931 # meta: file content starts with b'\1\n', the metadata
3926 3932 # header defined in filelog.py, but without a rename
3927 3933 # ext: content stored externally
3928 3934 #
3929 3935 # More formally, their differences are shown below:
3930 3936 #
3931 3937 # | common | rename | meta | ext
3932 3938 # -------------------------------------------------------
3933 3939 # flags() | 0 | 0 | 0 | not 0
3934 3940 # renamed() | False | True | False | ?
3935 3941 # rawtext[0:2]=='\1\n'| False | True | True | ?
3936 3942 #
3937 3943 # "rawtext" means the raw text stored in revlog data, which
3938 3944 # could be retrieved by "rawdata(rev)". "text"
3939 3945 # mentioned below is "revision(rev)".
3940 3946 #
3941 3947 # There are 3 different lengths stored physically:
3942 3948 # 1. L1: rawsize, stored in revlog index
3943 3949 # 2. L2: len(rawtext), stored in revlog data
3944 3950 # 3. L3: len(text), stored in revlog data if flags==0, or
3945 3951 # possibly somewhere else if flags!=0
3946 3952 #
3947 3953 # L1 should be equal to L2. L3 could be different from them.
3948 3954 # "text" may or may not affect commit hash depending on flag
3949 3955 # processors (see flagutil.addflagprocessor).
3950 3956 #
3951 3957 # | common | rename | meta | ext
3952 3958 # -------------------------------------------------
3953 3959 # rawsize() | L1 | L1 | L1 | L1
3954 3960 # size() | L1 | L2-LM | L1(*) | L1 (?)
3955 3961 # len(rawtext) | L2 | L2 | L2 | L2
3956 3962 # len(text) | L2 | L2 | L2 | L3
3957 3963 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
3958 3964 #
3959 3965 # LM: length of metadata, depending on rawtext
3960 3966 # (*): not ideal, see comment in filelog.size
3961 3967 # (?): could be "- len(meta)" if the resolved content has
3962 3968 # rename metadata
3963 3969 #
3964 3970 # Checks needed to be done:
3965 3971 # 1. length check: L1 == L2, in all cases.
3966 3972 # 2. hash check: depending on flag processor, we may need to
3967 3973 # use either "text" (external), or "rawtext" (in revlog).
3968 3974
3969 3975 try:
3970 3976 skipflags = state.get(b'skipflags', 0)
3971 3977 if skipflags:
3972 3978 skipflags &= self.flags(rev)
3973 3979
3974 3980 _verify_revision(self, skipflags, state, node)
3975 3981
3976 3982 l1 = self.rawsize(rev)
3977 3983 l2 = len(self.rawdata(node))
3978 3984
3979 3985 if l1 != l2:
3980 3986 yield revlogproblem(
3981 3987 error=_(b'unpacked size is %d, %d expected') % (l2, l1),
3982 3988 node=node,
3983 3989 )
3984 3990
3985 3991 except error.CensoredNodeError:
3986 3992 if state[b'erroroncensored']:
3987 3993 yield revlogproblem(
3988 3994 error=_(b'censored file data'), node=node
3989 3995 )
3990 3996 state[b'skipread'].add(node)
3991 3997 except Exception as e:
3992 3998 yield revlogproblem(
3993 3999 error=_(b'unpacking %s: %s')
3994 4000 % (short(node), stringutil.forcebytestr(e)),
3995 4001 node=node,
3996 4002 )
3997 4003 state[b'skipread'].add(node)
3998 4004
3999 4005 def storageinfo(
4000 4006 self,
4001 4007 exclusivefiles=False,
4002 4008 sharedfiles=False,
4003 4009 revisionscount=False,
4004 4010 trackedsize=False,
4005 4011 storedsize=False,
4006 4012 ):
4007 4013 d = {}
4008 4014
4009 4015 if exclusivefiles:
4010 4016 d[b'exclusivefiles'] = [(self.opener, self._indexfile)]
4011 4017 if not self._inline:
4012 4018 d[b'exclusivefiles'].append((self.opener, self._datafile))
4013 4019
4014 4020 if sharedfiles:
4015 4021 d[b'sharedfiles'] = []
4016 4022
4017 4023 if revisionscount:
4018 4024 d[b'revisionscount'] = len(self)
4019 4025
4020 4026 if trackedsize:
4021 4027 d[b'trackedsize'] = sum(map(self.rawsize, iter(self)))
4022 4028
4023 4029 if storedsize:
4024 4030 d[b'storedsize'] = sum(
4025 4031 self.opener.stat(path).st_size for path in self.files()
4026 4032 )
4027 4033
4028 4034 return d
4029 4035
4030 4036 def rewrite_sidedata(self, transaction, helpers, startrev, endrev):
4031 4037 if not self.feature_config.has_side_data:
4032 4038 return
4033 4039 # revlog formats with sidedata support does not support inline
4034 4040 assert not self._inline
4035 4041 if not helpers[1] and not helpers[2]:
4036 4042 # Nothing to generate or remove
4037 4043 return
4038 4044
4039 4045 new_entries = []
4040 4046 # append the new sidedata
4041 4047 with self._writing(transaction):
4042 4048 ifh, dfh, sdfh = self._inner._writinghandles
4043 4049 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
4044 4050
4045 4051 current_offset = sdfh.tell()
4046 4052 for rev in range(startrev, endrev + 1):
4047 4053 entry = self.index[rev]
4048 4054 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
4049 4055 store=self,
4050 4056 sidedata_helpers=helpers,
4051 4057 sidedata={},
4052 4058 rev=rev,
4053 4059 )
4054 4060
4055 4061 serialized_sidedata = sidedatautil.serialize_sidedata(
4056 4062 new_sidedata
4057 4063 )
4058 4064
4059 4065 sidedata_compression_mode = COMP_MODE_INLINE
4060 4066 if serialized_sidedata and self.feature_config.has_side_data:
4061 4067 sidedata_compression_mode = COMP_MODE_PLAIN
4062 4068 h, comp_sidedata = self._inner.compress(serialized_sidedata)
4063 4069 if (
4064 4070 h != b'u'
4065 4071 and comp_sidedata[0] != b'\0'
4066 4072 and len(comp_sidedata) < len(serialized_sidedata)
4067 4073 ):
4068 4074 assert not h
4069 4075 if (
4070 4076 comp_sidedata[0]
4071 4077 == self._docket.default_compression_header
4072 4078 ):
4073 4079 sidedata_compression_mode = COMP_MODE_DEFAULT
4074 4080 serialized_sidedata = comp_sidedata
4075 4081 else:
4076 4082 sidedata_compression_mode = COMP_MODE_INLINE
4077 4083 serialized_sidedata = comp_sidedata
4078 4084 if entry[8] != 0 or entry[9] != 0:
4079 4085 # rewriting entries that already have sidedata is not
4080 4086 # supported yet, because it introduces garbage data in the
4081 4087 # revlog.
4082 4088 msg = b"rewriting existing sidedata is not supported yet"
4083 4089 raise error.Abort(msg)
4084 4090
4085 4091 # Apply (potential) flags to add and to remove after running
4086 4092 # the sidedata helpers
4087 4093 new_offset_flags = entry[0] | flags[0] & ~flags[1]
4088 4094 entry_update = (
4089 4095 current_offset,
4090 4096 len(serialized_sidedata),
4091 4097 new_offset_flags,
4092 4098 sidedata_compression_mode,
4093 4099 )
4094 4100
4095 4101 # the sidedata computation might have move the file cursors around
4096 4102 sdfh.seek(current_offset, os.SEEK_SET)
4097 4103 sdfh.write(serialized_sidedata)
4098 4104 new_entries.append(entry_update)
4099 4105 current_offset += len(serialized_sidedata)
4100 4106 self._docket.sidedata_end = sdfh.tell()
4101 4107
4102 4108 # rewrite the new index entries
4103 4109 ifh.seek(startrev * self.index.entry_size)
4104 4110 for i, e in enumerate(new_entries):
4105 4111 rev = startrev + i
4106 4112 self.index.replace_sidedata_info(rev, *e)
4107 4113 packed = self.index.entry_binary(rev)
4108 4114 if rev == 0 and self._docket is None:
4109 4115 header = self._format_flags | self._format_version
4110 4116 header = self.index.pack_header(header)
4111 4117 packed = header + packed
4112 4118 ifh.write(packed)
General Comments 0
You need to be logged in to leave comments. Login now