##// END OF EJS Templates
branchmap-v3: detect invalid headerline and raise error...
marmoute -
r52865:dd3ccda3 default
parent child Browse files
Show More
@@ -1,1083 +1,1087
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import annotations
9 9
10 10 from .node import (
11 11 bin,
12 12 hex,
13 13 nullrev,
14 14 )
15 15
16 16 from typing import (
17 17 Any,
18 18 Callable,
19 19 Dict,
20 20 Iterable,
21 21 List,
22 22 Optional,
23 23 Set,
24 24 TYPE_CHECKING,
25 25 Tuple,
26 26 Union,
27 27 cast,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50
51 51 class BranchMapCache:
52 52 """mapping of filtered views of repo with their branchcache"""
53 53
54 54 def __init__(self):
55 55 self._per_filter = {}
56 56
57 57 def __getitem__(self, repo):
58 58 self.updatecache(repo)
59 59 bcache = self._per_filter[repo.filtername]
60 60 bcache._ensure_populated(repo)
61 61 assert bcache._filtername == repo.filtername, (
62 62 bcache._filtername,
63 63 repo.filtername,
64 64 )
65 65 return bcache
66 66
67 67 def update_disk(self, repo, detect_pure_topo=False):
68 68 """ensure and up-to-date cache is (or will be) written on disk
69 69
70 70 The cache for this repository view is updated if needed and written on
71 71 disk.
72 72
73 73 If a transaction is in progress, the writing is schedule to transaction
74 74 close. See the `BranchMapCache.write_dirty` method.
75 75
76 76 This method exist independently of __getitem__ as it is sometime useful
77 77 to signal that we have no intend to use the data in memory yet.
78 78 """
79 79 self.updatecache(repo)
80 80 bcache = self._per_filter[repo.filtername]
81 81 assert bcache._filtername == repo.filtername, (
82 82 bcache._filtername,
83 83 repo.filtername,
84 84 )
85 85 if detect_pure_topo:
86 86 bcache._detect_pure_topo(repo)
87 87 tr = repo.currenttransaction()
88 88 if getattr(tr, 'finalized', True):
89 89 bcache.sync_disk(repo)
90 90
91 91 def updatecache(self, repo):
92 92 """Update the cache for the given filtered view on a repository"""
93 93 # This can trigger updates for the caches for subsets of the filtered
94 94 # view, e.g. when there is no cache for this filtered view or the cache
95 95 # is stale.
96 96
97 97 cl = repo.changelog
98 98 filtername = repo.filtername
99 99 bcache = self._per_filter.get(filtername)
100 100 if bcache is None or not bcache.validfor(repo):
101 101 # cache object missing or cache object stale? Read from disk
102 102 bcache = branch_cache_from_file(repo)
103 103
104 104 revs = []
105 105 if bcache is None:
106 106 # no (fresh) cache available anymore, perhaps we can re-use
107 107 # the cache for a subset, then extend that to add info on missing
108 108 # revisions.
109 109 subsetname = subsettable.get(filtername)
110 110 if subsetname is not None:
111 111 subset = repo.filtered(subsetname)
112 112 self.updatecache(subset)
113 113 bcache = self._per_filter[subset.filtername].inherit_for(repo)
114 114 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
115 115 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
116 116 else:
117 117 # nothing to fall back on, start empty.
118 118 bcache = new_branch_cache(repo)
119 119
120 120 revs.extend(cl.revs(start=bcache.tiprev + 1))
121 121 if revs:
122 122 bcache.update(repo, revs)
123 123
124 124 assert bcache.validfor(repo), filtername
125 125 self._per_filter[repo.filtername] = bcache
126 126
127 127 def replace(self, repo, remotebranchmap):
128 128 """Replace the branchmap cache for a repo with a branch mapping.
129 129
130 130 This is likely only called during clone with a branch map from a
131 131 remote.
132 132
133 133 """
134 134 cl = repo.changelog
135 135 clrev = cl.rev
136 136 clbranchinfo = cl.branchinfo
137 137 rbheads = []
138 138 closed = set()
139 139 for bheads in remotebranchmap.values():
140 140 rbheads += bheads
141 141 for h in bheads:
142 142 r = clrev(h)
143 143 b, c = clbranchinfo(r)
144 144 if c:
145 145 closed.add(h)
146 146
147 147 if rbheads:
148 148 rtiprev = max((int(clrev(node)) for node in rbheads))
149 149 cache = new_branch_cache(
150 150 repo,
151 151 remotebranchmap,
152 152 repo[rtiprev].node(),
153 153 rtiprev,
154 154 closednodes=closed,
155 155 )
156 156
157 157 # Try to stick it as low as possible
158 158 # filter above served are unlikely to be fetch from a clone
159 159 for candidate in (b'base', b'immutable', b'served'):
160 160 rview = repo.filtered(candidate)
161 161 if cache.validfor(rview):
162 162 cache._filtername = candidate
163 163 self._per_filter[candidate] = cache
164 164 cache._state = STATE_DIRTY
165 165 cache.write(rview)
166 166 return
167 167
168 168 def clear(self):
169 169 self._per_filter.clear()
170 170
171 171 def write_dirty(self, repo):
172 172 unfi = repo.unfiltered()
173 173 for filtername in repoviewutil.get_ordered_subset():
174 174 cache = self._per_filter.get(filtername)
175 175 if cache is None:
176 176 continue
177 177 if filtername is None:
178 178 repo = unfi
179 179 else:
180 180 repo = unfi.filtered(filtername)
181 181 cache.sync_disk(repo)
182 182
183 183
184 184 def _unknownnode(node):
185 185 """raises ValueError when branchcache found a node which does not exists"""
186 186 raise ValueError('node %s does not exist' % node.hex())
187 187
188 188
189 189 def _branchcachedesc(repo):
190 190 if repo.filtername is not None:
191 191 return b'branch cache (%s)' % repo.filtername
192 192 else:
193 193 return b'branch cache'
194 194
195 195
196 196 class _BaseBranchCache:
197 197 """A dict like object that hold branches heads cache.
198 198
199 199 This cache is used to avoid costly computations to determine all the
200 200 branch heads of a repo.
201 201 """
202 202
203 203 def __init__(
204 204 self,
205 205 repo: "localrepo.localrepository",
206 206 entries: Union[
207 207 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
208 208 ] = (),
209 209 closed_nodes: Optional[Set[bytes]] = None,
210 210 ) -> None:
211 211 """hasnode is a function which can be used to verify whether changelog
212 212 has a given node or not. If it's not provided, we assume that every node
213 213 we have exists in changelog"""
214 214 # closednodes is a set of nodes that close their branch. If the branch
215 215 # cache has been updated, it may contain nodes that are no longer
216 216 # heads.
217 217 if closed_nodes is None:
218 218 closed_nodes = set()
219 219 self._closednodes = set(closed_nodes)
220 220 self._entries = dict(entries)
221 221
222 222 def __iter__(self):
223 223 return iter(self._entries)
224 224
225 225 def __setitem__(self, key, value):
226 226 self._entries[key] = value
227 227
228 228 def __getitem__(self, key):
229 229 return self._entries[key]
230 230
231 231 def __contains__(self, key):
232 232 return key in self._entries
233 233
234 234 def iteritems(self):
235 235 return self._entries.items()
236 236
237 237 items = iteritems
238 238
239 239 def hasbranch(self, label):
240 240 """checks whether a branch of this name exists or not"""
241 241 return label in self._entries
242 242
243 243 def _branchtip(self, heads):
244 244 """Return tuple with last open head in heads and false,
245 245 otherwise return last closed head and true."""
246 246 tip = heads[-1]
247 247 closed = True
248 248 for h in reversed(heads):
249 249 if h not in self._closednodes:
250 250 tip = h
251 251 closed = False
252 252 break
253 253 return tip, closed
254 254
255 255 def branchtip(self, branch):
256 256 """Return the tipmost open head on branch head, otherwise return the
257 257 tipmost closed head on branch.
258 258 Raise KeyError for unknown branch."""
259 259 return self._branchtip(self[branch])[0]
260 260
261 261 def iteropen(self, nodes):
262 262 return (n for n in nodes if n not in self._closednodes)
263 263
264 264 def branchheads(self, branch, closed=False):
265 265 heads = self._entries[branch]
266 266 if not closed:
267 267 heads = list(self.iteropen(heads))
268 268 return heads
269 269
270 270 def iterbranches(self):
271 271 for bn, heads in self.items():
272 272 yield (bn, heads) + self._branchtip(heads)
273 273
274 274 def iterheads(self):
275 275 """returns all the heads"""
276 276 return self._entries.values()
277 277
278 278 def update(self, repo, revgen):
279 279 """Given a branchhead cache, self, that may have extra nodes or be
280 280 missing heads, and a generator of nodes that are strictly a superset of
281 281 heads missing, this function updates self to be correct.
282 282 """
283 283 starttime = util.timer()
284 284 cl = repo.changelog
285 285 # Faster than using ctx.obsolete()
286 286 obsrevs = obsolete.getrevs(repo, b'obsolete')
287 287 # collect new branch entries
288 288 newbranches = {}
289 289 new_closed = set()
290 290 obs_ignored = set()
291 291 getbranchinfo = repo.revbranchcache().branchinfo
292 292 max_rev = -1
293 293 for r in revgen:
294 294 max_rev = max(max_rev, r)
295 295 if r in obsrevs:
296 296 # We ignore obsolete changesets as they shouldn't be
297 297 # considered heads.
298 298 obs_ignored.add(r)
299 299 continue
300 300 branch, closesbranch = getbranchinfo(r)
301 301 newbranches.setdefault(branch, []).append(r)
302 302 if closesbranch:
303 303 new_closed.add(r)
304 304 if max_rev < 0:
305 305 msg = "running branchcache.update without revision to update"
306 306 raise error.ProgrammingError(msg)
307 307
308 308 self._process_new(
309 309 repo,
310 310 newbranches,
311 311 new_closed,
312 312 obs_ignored,
313 313 max_rev,
314 314 )
315 315
316 316 self._closednodes.update(cl.node(rev) for rev in new_closed)
317 317
318 318 duration = util.timer() - starttime
319 319 repo.ui.log(
320 320 b'branchcache',
321 321 b'updated %s in %.4f seconds\n',
322 322 _branchcachedesc(repo),
323 323 duration,
324 324 )
325 325 return max_rev
326 326
327 327 def _process_new(
328 328 self,
329 329 repo,
330 330 newbranches,
331 331 new_closed,
332 332 obs_ignored,
333 333 max_rev,
334 334 ):
335 335 """update the branchmap from a set of new information"""
336 336 # Delay fetching the topological heads until they are needed.
337 337 # A repository without non-continous branches can skip this part.
338 338 topoheads = None
339 339
340 340 cl = repo.changelog
341 341 getbranchinfo = repo.revbranchcache().branchinfo
342 342 # Faster than using ctx.obsolete()
343 343 obsrevs = obsolete.getrevs(repo, b'obsolete')
344 344
345 345 # If a changeset is visible, its parents must be visible too, so
346 346 # use the faster unfiltered parent accessor.
347 347 parentrevs = cl._uncheckedparentrevs
348 348
349 349 for branch, newheadrevs in newbranches.items():
350 350 # For every branch, compute the new branchheads.
351 351 # A branchhead is a revision such that no descendant is on
352 352 # the same branch.
353 353 #
354 354 # The branchheads are computed iteratively in revision order.
355 355 # This ensures topological order, i.e. parents are processed
356 356 # before their children. Ancestors are inclusive here, i.e.
357 357 # any revision is an ancestor of itself.
358 358 #
359 359 # Core observations:
360 360 # - The current revision is always a branchhead for the
361 361 # repository up to that point.
362 362 # - It is the first revision of the branch if and only if
363 363 # there was no branchhead before. In that case, it is the
364 364 # only branchhead as there are no possible ancestors on
365 365 # the same branch.
366 366 # - If a parent is on the same branch, a branchhead can
367 367 # only be an ancestor of that parent, if it is parent
368 368 # itself. Otherwise it would have been removed as ancestor
369 369 # of that parent before.
370 370 # - Therefore, if all parents are on the same branch, they
371 371 # can just be removed from the branchhead set.
372 372 # - If one parent is on the same branch and the other is not
373 373 # and there was exactly one branchhead known, the existing
374 374 # branchhead can only be an ancestor if it is the parent.
375 375 # Otherwise it would have been removed as ancestor of
376 376 # the parent before. The other parent therefore can't have
377 377 # a branchhead as ancestor.
378 378 # - In all other cases, the parents on different branches
379 379 # could have a branchhead as ancestor. Those parents are
380 380 # kept in the "uncertain" set. If all branchheads are also
381 381 # topological heads, they can't have descendants and further
382 382 # checks can be skipped. Otherwise, the ancestors of the
383 383 # "uncertain" set are removed from branchheads.
384 384 # This computation is heavy and avoided if at all possible.
385 385 bheads = self._entries.get(branch, [])
386 386 bheadset = {cl.rev(node) for node in bheads}
387 387 uncertain = set()
388 388 for newrev in sorted(newheadrevs):
389 389 if not bheadset:
390 390 bheadset.add(newrev)
391 391 continue
392 392
393 393 parents = [p for p in parentrevs(newrev) if p != nullrev]
394 394 samebranch = set()
395 395 otherbranch = set()
396 396 obsparents = set()
397 397 for p in parents:
398 398 if p in obsrevs:
399 399 # We ignored this obsolete changeset earlier, but now
400 400 # that it has non-ignored children, we need to make
401 401 # sure their ancestors are not considered heads. To
402 402 # achieve that, we will simply treat this obsolete
403 403 # changeset as a parent from other branch.
404 404 obsparents.add(p)
405 405 elif p in bheadset or getbranchinfo(p)[0] == branch:
406 406 samebranch.add(p)
407 407 else:
408 408 otherbranch.add(p)
409 409 if not (len(bheadset) == len(samebranch) == 1):
410 410 uncertain.update(otherbranch)
411 411 uncertain.update(obsparents)
412 412 bheadset.difference_update(samebranch)
413 413 bheadset.add(newrev)
414 414
415 415 if uncertain:
416 416 if topoheads is None:
417 417 topoheads = set(cl.headrevs())
418 418 if bheadset - topoheads:
419 419 floorrev = min(bheadset)
420 420 if floorrev <= max(uncertain):
421 421 ancestors = set(cl.ancestors(uncertain, floorrev))
422 422 bheadset -= ancestors
423 423 if bheadset:
424 424 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
425 425
426 426
427 427 STATE_CLEAN = 1
428 428 STATE_INHERITED = 2
429 429 STATE_DIRTY = 3
430 430
431 431
432 432 class _LocalBranchCache(_BaseBranchCache):
433 433 """base class of branch-map info for a local repo or repoview"""
434 434
435 435 _base_filename = None
436 436 _default_key_hashes: Tuple[bytes] = cast(Tuple[bytes], ())
437 437
438 438 def __init__(
439 439 self,
440 440 repo: "localrepo.localrepository",
441 441 entries: Union[
442 442 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
443 443 ] = (),
444 444 tipnode: Optional[bytes] = None,
445 445 tiprev: Optional[int] = nullrev,
446 446 key_hashes: Optional[Tuple[bytes]] = None,
447 447 closednodes: Optional[Set[bytes]] = None,
448 448 hasnode: Optional[Callable[[bytes], bool]] = None,
449 449 verify_node: bool = False,
450 450 inherited: bool = False,
451 451 ) -> None:
452 452 """hasnode is a function which can be used to verify whether changelog
453 453 has a given node or not. If it's not provided, we assume that every node
454 454 we have exists in changelog"""
455 455 self._filtername = repo.filtername
456 456 if tipnode is None:
457 457 self.tipnode = repo.nullid
458 458 else:
459 459 self.tipnode = tipnode
460 460 self.tiprev = tiprev
461 461 if key_hashes is None:
462 462 self.key_hashes = self._default_key_hashes
463 463 else:
464 464 self.key_hashes = key_hashes
465 465 self._state = STATE_CLEAN
466 466 if inherited:
467 467 self._state = STATE_INHERITED
468 468
469 469 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
470 470 # closednodes is a set of nodes that close their branch. If the branch
471 471 # cache has been updated, it may contain nodes that are no longer
472 472 # heads.
473 473
474 474 # Do we need to verify branch at all ?
475 475 self._verify_node = verify_node
476 476 # branches for which nodes are verified
477 477 self._verifiedbranches = set()
478 478 self._hasnode = None
479 479 if self._verify_node:
480 480 self._hasnode = repo.changelog.hasnode
481 481
482 482 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
483 483 raise NotImplementedError
484 484
485 485 def _ensure_populated(self, repo):
486 486 """make sure any lazily loaded values are fully populated"""
487 487
488 488 def _detect_pure_topo(self, repo) -> None:
489 489 pass
490 490
491 491 def validfor(self, repo):
492 492 """check that cache contents are valid for (a subset of) this repo
493 493
494 494 - False when the order of changesets changed or if we detect a strip.
495 495 - True when cache is up-to-date for the current repo or its subset."""
496 496 try:
497 497 node = repo.changelog.node(self.tiprev)
498 498 except IndexError:
499 499 # changesets were stripped and now we don't even have enough to
500 500 # find tiprev
501 501 return False
502 502 if self.tipnode != node:
503 503 # tiprev doesn't correspond to tipnode: repo was stripped, or this
504 504 # repo has a different order of changesets
505 505 return False
506 506 repo_key_hashes = self._compute_key_hashes(repo)
507 507 # hashes don't match if this repo view has a different set of filtered
508 508 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
509 509 # history was rewritten)
510 510 return self.key_hashes == repo_key_hashes
511 511
512 512 @classmethod
513 513 def fromfile(cls, repo):
514 514 f = None
515 515 try:
516 516 f = repo.cachevfs(cls._filename(repo))
517 517 lineiter = iter(f)
518 518 init_kwargs = cls._load_header(repo, lineiter)
519 519 bcache = cls(
520 520 repo,
521 521 verify_node=True,
522 522 **init_kwargs,
523 523 )
524 524 if not bcache.validfor(repo):
525 525 # invalidate the cache
526 526 raise ValueError('tip differs')
527 527 bcache._load_heads(repo, lineiter)
528 528 except (IOError, OSError):
529 529 return None
530 530
531 531 except Exception as inst:
532 532 if repo.ui.debugflag:
533 533 msg = b'invalid %s: %s\n'
534 534 msg %= (
535 535 _branchcachedesc(repo),
536 536 stringutil.forcebytestr(inst),
537 537 )
538 538 repo.ui.debug(msg)
539 539 bcache = None
540 540
541 541 finally:
542 542 if f:
543 543 f.close()
544 544
545 545 return bcache
546 546
547 547 @classmethod
548 548 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
549 549 raise NotImplementedError
550 550
551 551 def _load_heads(self, repo, lineiter):
552 552 """fully loads the branchcache by reading from the file using the line
553 553 iterator passed"""
554 554 for line in lineiter:
555 555 line = line.rstrip(b'\n')
556 556 if not line:
557 557 continue
558 558 node, state, label = line.split(b" ", 2)
559 559 if state not in b'oc':
560 560 raise ValueError('invalid branch state')
561 561 label = encoding.tolocal(label.strip())
562 562 node = bin(node)
563 563 self._entries.setdefault(label, []).append(node)
564 564 if state == b'c':
565 565 self._closednodes.add(node)
566 566
567 567 @classmethod
568 568 def _filename(cls, repo):
569 569 """name of a branchcache file for a given repo or repoview"""
570 570 filename = cls._base_filename
571 571 assert filename is not None
572 572 if repo.filtername:
573 573 filename = b'%s-%s' % (filename, repo.filtername)
574 574 return filename
575 575
576 576 def inherit_for(self, repo):
577 577 """return a deep copy of the branchcache object"""
578 578 assert repo.filtername != self._filtername
579 579 other = type(self)(
580 580 repo=repo,
581 581 # we always do a shally copy of self._entries, and the values is
582 582 # always replaced, so no need to deepcopy until the above remains
583 583 # true.
584 584 entries=self._entries,
585 585 tipnode=self.tipnode,
586 586 tiprev=self.tiprev,
587 587 key_hashes=self.key_hashes,
588 588 closednodes=set(self._closednodes),
589 589 verify_node=self._verify_node,
590 590 inherited=True,
591 591 )
592 592 # also copy information about the current verification state
593 593 other._verifiedbranches = set(self._verifiedbranches)
594 594 return other
595 595
596 596 def sync_disk(self, repo):
597 597 """synchronise the on disk file with the cache state
598 598
599 599 If new value specific to this filter level need to be written, the file
600 600 will be updated, if the state of the branchcache is inherited from a
601 601 subset, any stalled on disk file will be deleted.
602 602
603 603 That method does nothing if there is nothing to do.
604 604 """
605 605 if self._state == STATE_DIRTY:
606 606 self.write(repo)
607 607 elif self._state == STATE_INHERITED:
608 608 filename = self._filename(repo)
609 609 repo.cachevfs.tryunlink(filename)
610 610
611 611 def write(self, repo):
612 612 assert self._filtername == repo.filtername, (
613 613 self._filtername,
614 614 repo.filtername,
615 615 )
616 616 assert self._state == STATE_DIRTY, self._state
617 617 # This method should not be called during an open transaction
618 618 tr = repo.currenttransaction()
619 619 if not getattr(tr, 'finalized', True):
620 620 msg = "writing branchcache in the middle of a transaction"
621 621 raise error.ProgrammingError(msg)
622 622 try:
623 623 filename = self._filename(repo)
624 624 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
625 625 self._write_header(f)
626 626 nodecount = self._write_heads(repo, f)
627 627 repo.ui.log(
628 628 b'branchcache',
629 629 b'wrote %s with %d labels and %d nodes\n',
630 630 _branchcachedesc(repo),
631 631 len(self._entries),
632 632 nodecount,
633 633 )
634 634 self._state = STATE_CLEAN
635 635 except (IOError, OSError, error.Abort) as inst:
636 636 # Abort may be raised by read only opener, so log and continue
637 637 repo.ui.debug(
638 638 b"couldn't write branch cache: %s\n"
639 639 % stringutil.forcebytestr(inst)
640 640 )
641 641
642 642 def _write_header(self, fp) -> None:
643 643 raise NotImplementedError
644 644
645 645 def _write_heads(self, repo, fp) -> int:
646 646 """write list of heads to a file
647 647
648 648 Return the number of heads written."""
649 649 nodecount = 0
650 650 for label, nodes in sorted(self._entries.items()):
651 651 label = encoding.fromlocal(label)
652 652 for node in nodes:
653 653 nodecount += 1
654 654 if node in self._closednodes:
655 655 state = b'c'
656 656 else:
657 657 state = b'o'
658 658 fp.write(b"%s %s %s\n" % (hex(node), state, label))
659 659 return nodecount
660 660
661 661 def _verifybranch(self, branch):
662 662 """verify head nodes for the given branch."""
663 663 if not self._verify_node:
664 664 return
665 665 if branch not in self._entries or branch in self._verifiedbranches:
666 666 return
667 667 assert self._hasnode is not None
668 668 for n in self._entries[branch]:
669 669 if not self._hasnode(n):
670 670 _unknownnode(n)
671 671
672 672 self._verifiedbranches.add(branch)
673 673
674 674 def _verifyall(self):
675 675 """verifies nodes of all the branches"""
676 676 for b in self._entries.keys():
677 677 if b not in self._verifiedbranches:
678 678 self._verifybranch(b)
679 679
680 680 def __getitem__(self, key):
681 681 self._verifybranch(key)
682 682 return super().__getitem__(key)
683 683
684 684 def __contains__(self, key):
685 685 self._verifybranch(key)
686 686 return super().__contains__(key)
687 687
688 688 def iteritems(self):
689 689 self._verifyall()
690 690 return super().iteritems()
691 691
692 692 items = iteritems
693 693
694 694 def iterheads(self):
695 695 """returns all the heads"""
696 696 self._verifyall()
697 697 return super().iterheads()
698 698
699 699 def hasbranch(self, label):
700 700 """checks whether a branch of this name exists or not"""
701 701 self._verifybranch(label)
702 702 return super().hasbranch(label)
703 703
704 704 def branchheads(self, branch, closed=False):
705 705 self._verifybranch(branch)
706 706 return super().branchheads(branch, closed=closed)
707 707
708 708 def update(self, repo, revgen):
709 709 assert self._filtername == repo.filtername, (
710 710 self._filtername,
711 711 repo.filtername,
712 712 )
713 713 cl = repo.changelog
714 714 max_rev = super().update(repo, revgen)
715 715 # new tip revision which we found after iterating items from new
716 716 # branches
717 717 if max_rev is not None and max_rev > self.tiprev:
718 718 self.tiprev = max_rev
719 719 self.tipnode = cl.node(max_rev)
720 720 else:
721 721 # We should not be here is if this is false
722 722 assert cl.node(self.tiprev) == self.tipnode
723 723
724 724 if not self.validfor(repo):
725 725 # the tiprev and tipnode should be aligned, so if the current repo
726 726 # is not seens as valid this is because old cache key is now
727 727 # invalid for the repo.
728 728 #
729 729 # However. we've just updated the cache and we assume it's valid,
730 730 # so let's make the cache key valid as well by recomputing it from
731 731 # the cached data
732 732 self.key_hashes = self._compute_key_hashes(repo)
733 733 self.filteredhash = scmutil.combined_filtered_and_obsolete_hash(
734 734 repo,
735 735 self.tiprev,
736 736 )
737 737
738 738 self._state = STATE_DIRTY
739 739 tr = repo.currenttransaction()
740 740 if getattr(tr, 'finalized', True):
741 741 # Avoid premature writing.
742 742 #
743 743 # (The cache warming setup by localrepo will update the file later.)
744 744 self.write(repo)
745 745
746 746
747 747 def branch_cache_from_file(repo) -> Optional[_LocalBranchCache]:
748 748 """Build a branch cache from on-disk data if possible
749 749
750 750 Return a branch cache of the right format depending of the repository.
751 751 """
752 752 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
753 753 return BranchCacheV3.fromfile(repo)
754 754 else:
755 755 return BranchCacheV2.fromfile(repo)
756 756
757 757
758 758 def new_branch_cache(repo, *args, **kwargs):
759 759 """Build a new branch cache from argument
760 760
761 761 Return a branch cache of the right format depending of the repository.
762 762 """
763 763 if repo.ui.configbool(b"experimental", b"branch-cache-v3"):
764 764 return BranchCacheV3(repo, *args, **kwargs)
765 765 else:
766 766 return BranchCacheV2(repo, *args, **kwargs)
767 767
768 768
769 769 class BranchCacheV2(_LocalBranchCache):
770 770 """a branch cache using version 2 of the format on disk
771 771
772 772 The cache is serialized on disk in the following format:
773 773
774 774 <tip hex node> <tip rev number> [optional filtered repo hex hash]
775 775 <branch head hex node> <open/closed state> <branch name>
776 776 <branch head hex node> <open/closed state> <branch name>
777 777 ...
778 778
779 779 The first line is used to check if the cache is still valid. If the
780 780 branch cache is for a filtered repo view, an optional third hash is
781 781 included that hashes the hashes of all filtered and obsolete revisions.
782 782
783 783 The open/closed state is represented by a single letter 'o' or 'c'.
784 784 This field can be used to avoid changelog reads when determining if a
785 785 branch head closes a branch or not.
786 786 """
787 787
788 788 _base_filename = b"branch2"
789 789
790 790 @classmethod
791 791 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
792 792 """parse the head of a branchmap file
793 793
794 794 return parameters to pass to a newly created class instance.
795 795 """
796 796 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
797 797 last, lrev = cachekey[:2]
798 798 last, lrev = bin(last), int(lrev)
799 799 filteredhash = ()
800 800 if len(cachekey) > 2:
801 801 filteredhash = (bin(cachekey[2]),)
802 802 return {
803 803 "tipnode": last,
804 804 "tiprev": lrev,
805 805 "key_hashes": filteredhash,
806 806 }
807 807
808 808 def _write_header(self, fp) -> None:
809 809 """write the branch cache header to a file"""
810 810 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
811 811 if self.key_hashes:
812 812 cachekey.append(hex(self.key_hashes[0]))
813 813 fp.write(b" ".join(cachekey) + b'\n')
814 814
815 815 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
816 816 """return the cache key hashes that match this repoview state"""
817 817 filtered_hash = scmutil.combined_filtered_and_obsolete_hash(
818 818 repo,
819 819 self.tiprev,
820 820 needobsolete=True,
821 821 )
822 822 keys: Tuple[bytes] = cast(Tuple[bytes], ())
823 823 if filtered_hash is not None:
824 824 keys: Tuple[bytes] = (filtered_hash,)
825 825 return keys
826 826
827 827
828 828 class BranchCacheV3(_LocalBranchCache):
829 829 """a branch cache using version 3 of the format on disk
830 830
831 831 This version is still EXPERIMENTAL and the format is subject to changes.
832 832
833 833 The cache is serialized on disk in the following format:
834 834
835 835 <cache-key-xxx>=<xxx-value> <cache-key-yyy>=<yyy-value> […]
836 836 <branch head hex node> <open/closed state> <branch name>
837 837 <branch head hex node> <open/closed state> <branch name>
838 838 ...
839 839
840 840 The first line is used to check if the cache is still valid. It is a series
841 841 of key value pair. The following key are recognized:
842 842
843 843 - tip-rev: the rev-num of the tip-most revision seen by this cache
844 844 - tip-node: the node-id of the tip-most revision sen by this cache
845 845 - filtered-hash: the hash of all filtered revisions (before tip-rev)
846 846 ignored by this cache.
847 847 - obsolete-hash: the hash of all non-filtered obsolete revisions (before
848 848 tip-rev) ignored by this cache.
849 849
850 850 The tip-rev is used to know how far behind the value in the file are
851 851 compared to the current repository state.
852 852
853 853 The tip-node, filtered-hash and obsolete-hash are used to detect if this
854 854 cache can be used for this repository state at all.
855 855
856 856 The open/closed state is represented by a single letter 'o' or 'c'.
857 857 This field can be used to avoid changelog reads when determining if a
858 858 branch head closes a branch or not.
859 859
860 860 Topological heads are not included in the listing and should be dispatched
861 861 on the right branch at read time. Obsolete topological heads should be
862 862 ignored.
863 863 """
864 864
865 865 _base_filename = b"branch3-exp"
866 866 _default_key_hashes = (None, None)
867 867
868 868 def __init__(self, *args, pure_topo_branch=None, **kwargs):
869 869 super().__init__(*args, **kwargs)
870 870 self._pure_topo_branch = pure_topo_branch
871 871 self._needs_populate = self._pure_topo_branch is not None
872 872
873 873 def inherit_for(self, repo):
874 874 new = super().inherit_for(repo)
875 875 new._pure_topo_branch = self._pure_topo_branch
876 876 new._needs_populate = self._needs_populate
877 877 return new
878 878
879 879 def _get_topo_heads(self, repo):
880 880 """returns the topological head of a repoview content up to self.tiprev"""
881 881 cl = repo.changelog
882 882 if self.tiprev == nullrev:
883 883 return []
884 884 elif self.tiprev == cl.tiprev():
885 885 return cl.headrevs()
886 886 else:
887 887 # XXX passing tiprev as ceiling of cl.headrevs could be faster
888 888 heads = cl.headrevs(cl.revs(stop=self.tiprev))
889 889 return heads
890 890
891 891 def _write_header(self, fp) -> None:
892 892 cache_keys = {
893 893 b"tip-node": hex(self.tipnode),
894 894 b"tip-rev": b'%d' % self.tiprev,
895 895 }
896 896 if self.key_hashes:
897 897 if self.key_hashes[0] is not None:
898 898 cache_keys[b"filtered-hash"] = hex(self.key_hashes[0])
899 899 if self.key_hashes[1] is not None:
900 900 cache_keys[b"obsolete-hash"] = hex(self.key_hashes[1])
901 901 if self._pure_topo_branch is not None:
902 902 cache_keys[b"topo-mode"] = b"pure"
903 903 pieces = (b"%s=%s" % i for i in sorted(cache_keys.items()))
904 904 fp.write(b" ".join(pieces) + b'\n')
905 905 if self._pure_topo_branch is not None:
906 906 label = encoding.fromlocal(self._pure_topo_branch)
907 907 fp.write(label + b'\n')
908 908
909 909 def _write_heads(self, repo, fp) -> int:
910 910 """write list of heads to a file
911 911
912 912 Return the number of heads written."""
913 913 nodecount = 0
914 914 topo_heads = None
915 915 if self._pure_topo_branch is None:
916 916 topo_heads = set(self._get_topo_heads(repo))
917 917 to_rev = repo.changelog.index.rev
918 918 for label, nodes in sorted(self._entries.items()):
919 919 if label == self._pure_topo_branch:
920 920 # not need to write anything the header took care of that
921 921 continue
922 922 label = encoding.fromlocal(label)
923 923 for node in nodes:
924 924 if topo_heads is not None:
925 925 rev = to_rev(node)
926 926 if rev in topo_heads:
927 927 continue
928 928 if node in self._closednodes:
929 929 state = b'c'
930 930 else:
931 931 state = b'o'
932 932 nodecount += 1
933 933 fp.write(b"%s %s %s\n" % (hex(node), state, label))
934 934 return nodecount
935 935
936 936 @classmethod
937 937 def _load_header(cls, repo, lineiter):
938 938 header_line = next(lineiter)
939 939 pieces = header_line.rstrip(b'\n').split(b" ")
940 for p in pieces:
941 if b'=' not in p:
942 msg = b"invalid header_line: %r" % header_line
943 raise ValueError(msg)
940 944 cache_keys = dict(p.split(b'=', 1) for p in pieces)
941 945
942 946 args = {}
943 947 filtered_hash = None
944 948 obsolete_hash = None
945 949 has_pure_topo_heads = False
946 950 for k, v in cache_keys.items():
947 951 if k == b"tip-rev":
948 952 args["tiprev"] = int(v)
949 953 elif k == b"tip-node":
950 954 args["tipnode"] = bin(v)
951 955 elif k == b"filtered-hash":
952 956 filtered_hash = bin(v)
953 957 elif k == b"obsolete-hash":
954 958 obsolete_hash = bin(v)
955 959 elif k == b"topo-mode":
956 960 if v == b"pure":
957 961 has_pure_topo_heads = True
958 962 else:
959 963 msg = b"unknown topo-mode: %r" % v
960 964 raise ValueError(msg)
961 965 else:
962 966 msg = b"unknown cache key: %r" % k
963 967 raise ValueError(msg)
964 968 args["key_hashes"] = (filtered_hash, obsolete_hash)
965 969 if has_pure_topo_heads:
966 970 pure_line = next(lineiter).rstrip(b'\n')
967 971 args["pure_topo_branch"] = encoding.tolocal(pure_line)
968 972 return args
969 973
970 974 def _load_heads(self, repo, lineiter):
971 975 """fully loads the branchcache by reading from the file using the line
972 976 iterator passed"""
973 977 super()._load_heads(repo, lineiter)
974 978 if self._pure_topo_branch is not None:
975 979 # no need to read the repository heads, we know their value already.
976 980 return
977 981 cl = repo.changelog
978 982 getbranchinfo = repo.revbranchcache().branchinfo
979 983 obsrevs = obsolete.getrevs(repo, b'obsolete')
980 984 to_node = cl.node
981 985 touched_branch = set()
982 986 for head in self._get_topo_heads(repo):
983 987 if head in obsrevs:
984 988 continue
985 989 node = to_node(head)
986 990 branch, closed = getbranchinfo(head)
987 991 self._entries.setdefault(branch, []).append(node)
988 992 if closed:
989 993 self._closednodes.add(node)
990 994 touched_branch.add(branch)
991 995 to_rev = cl.index.rev
992 996 for branch in touched_branch:
993 997 self._entries[branch].sort(key=to_rev)
994 998
995 999 def _compute_key_hashes(self, repo) -> Tuple[bytes]:
996 1000 """return the cache key hashes that match this repoview state"""
997 1001 return scmutil.filtered_and_obsolete_hash(
998 1002 repo,
999 1003 self.tiprev,
1000 1004 )
1001 1005
1002 1006 def _process_new(
1003 1007 self,
1004 1008 repo,
1005 1009 newbranches,
1006 1010 new_closed,
1007 1011 obs_ignored,
1008 1012 max_rev,
1009 1013 ) -> None:
1010 1014 if (
1011 1015 # note: the check about `obs_ignored` is too strict as the
1012 1016 # obsolete revision could be non-topological, but lets keep
1013 1017 # things simple for now
1014 1018 #
1015 1019 # The same apply to `new_closed` if the closed changeset are
1016 1020 # not a head, we don't care that it is closed, but lets keep
1017 1021 # things simple here too.
1018 1022 not (obs_ignored or new_closed)
1019 1023 and (
1020 1024 not newbranches
1021 1025 or (
1022 1026 len(newbranches) == 1
1023 1027 and (
1024 1028 self.tiprev == nullrev
1025 1029 or self._pure_topo_branch in newbranches
1026 1030 )
1027 1031 )
1028 1032 )
1029 1033 ):
1030 1034 if newbranches:
1031 1035 assert len(newbranches) == 1
1032 1036 self._pure_topo_branch = list(newbranches.keys())[0]
1033 1037 self._needs_populate = True
1034 1038 self._entries.pop(self._pure_topo_branch, None)
1035 1039 return
1036 1040
1037 1041 self._ensure_populated(repo)
1038 1042 self._pure_topo_branch = None
1039 1043 super()._process_new(
1040 1044 repo,
1041 1045 newbranches,
1042 1046 new_closed,
1043 1047 obs_ignored,
1044 1048 max_rev,
1045 1049 )
1046 1050
1047 1051 def _ensure_populated(self, repo):
1048 1052 """make sure any lazily loaded values are fully populated"""
1049 1053 if self._needs_populate:
1050 1054 assert self._pure_topo_branch is not None
1051 1055 cl = repo.changelog
1052 1056 to_node = cl.node
1053 1057 topo_heads = self._get_topo_heads(repo)
1054 1058 heads = [to_node(r) for r in topo_heads]
1055 1059 self._entries[self._pure_topo_branch] = heads
1056 1060 self._needs_populate = False
1057 1061
1058 1062 def _detect_pure_topo(self, repo) -> None:
1059 1063 if self._pure_topo_branch is not None:
1060 1064 # we are pure topological already
1061 1065 return
1062 1066 to_node = repo.changelog.node
1063 1067 topo_heads = [to_node(r) for r in self._get_topo_heads(repo)]
1064 1068 if any(n in self._closednodes for n in topo_heads):
1065 1069 return
1066 1070 for branch, heads in self._entries.items():
1067 1071 if heads == topo_heads:
1068 1072 self._pure_topo_branch = branch
1069 1073 break
1070 1074
1071 1075
1072 1076 class remotebranchcache(_BaseBranchCache):
1073 1077 """Branchmap info for a remote connection, should not write locally"""
1074 1078
1075 1079 def __init__(
1076 1080 self,
1077 1081 repo: "localrepo.localrepository",
1078 1082 entries: Union[
1079 1083 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
1080 1084 ] = (),
1081 1085 closednodes: Optional[Set[bytes]] = None,
1082 1086 ) -> None:
1083 1087 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
General Comments 0
You need to be logged in to leave comments. Login now