##// END OF EJS Templates
branchcache: do not use `__getitem__` in updatecache...
marmoute -
r52387:54f0dd79 default
parent child Browse files
Show More
@@ -1,1072 +1,1073 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Any,
19 19 Callable,
20 20 Dict,
21 21 Iterable,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 TYPE_CHECKING,
26 26 Tuple,
27 27 Union,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50 calcsize = struct.calcsize
51 51 pack_into = struct.pack_into
52 52 unpack_from = struct.unpack_from
53 53
54 54
55 55 class BranchMapCache:
56 56 """mapping of filtered views of repo with their branchcache"""
57 57
58 58 def __init__(self):
59 59 self._per_filter = {}
60 60
61 61 def __getitem__(self, repo):
62 62 self.updatecache(repo)
63 63 bcache = self._per_filter[repo.filtername]
64 64 assert bcache._filtername == repo.filtername, (
65 65 bcache._filtername,
66 66 repo.filtername,
67 67 )
68 68 return bcache
69 69
70 70 def update_disk(self, repo):
71 71 """ensure and up-to-date cache is (or will be) written on disk
72 72
73 73 The cache for this repository view is updated if needed and written on
74 74 disk.
75 75
76 76 If a transaction is in progress, the writing is schedule to transaction
77 77 close. See the `BranchMapCache.write_dirty` method.
78 78
79 79 This method exist independently of __getitem__ as it is sometime useful
80 80 to signal that we have no intend to use the data in memory yet.
81 81 """
82 82 self.updatecache(repo)
83 83 bcache = self._per_filter[repo.filtername]
84 84 assert bcache._filtername == repo.filtername, (
85 85 bcache._filtername,
86 86 repo.filtername,
87 87 )
88 88 bcache.sync_disk(repo)
89 89
90 90 def updatecache(self, repo):
91 91 """Update the cache for the given filtered view on a repository"""
92 92 # This can trigger updates for the caches for subsets of the filtered
93 93 # view, e.g. when there is no cache for this filtered view or the cache
94 94 # is stale.
95 95
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 bcache = self._per_filter.get(filtername)
99 99 if bcache is None or not bcache.validfor(repo):
100 100 # cache object missing or cache object stale? Read from disk
101 101 bcache = branchcache.fromfile(repo)
102 102
103 103 revs = []
104 104 if bcache is None:
105 105 # no (fresh) cache available anymore, perhaps we can re-use
106 106 # the cache for a subset, then extend that to add info on missing
107 107 # revisions.
108 108 subsetname = subsettable.get(filtername)
109 109 if subsetname is not None:
110 110 subset = repo.filtered(subsetname)
111 bcache = self[subset].inherit_for(repo)
111 self.updatecache(subset)
112 bcache = self._per_filter[subset.filtername].inherit_for(repo)
112 113 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 114 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 115 else:
115 116 # nothing to fall back on, start empty.
116 117 bcache = branchcache(repo)
117 118
118 119 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 120 if revs:
120 121 bcache.update(repo, revs)
121 122
122 123 assert bcache.validfor(repo), filtername
123 124 self._per_filter[repo.filtername] = bcache
124 125
125 126 def replace(self, repo, remotebranchmap):
126 127 """Replace the branchmap cache for a repo with a branch mapping.
127 128
128 129 This is likely only called during clone with a branch map from a
129 130 remote.
130 131
131 132 """
132 133 cl = repo.changelog
133 134 clrev = cl.rev
134 135 clbranchinfo = cl.branchinfo
135 136 rbheads = []
136 137 closed = set()
137 138 for bheads in remotebranchmap.values():
138 139 rbheads += bheads
139 140 for h in bheads:
140 141 r = clrev(h)
141 142 b, c = clbranchinfo(r)
142 143 if c:
143 144 closed.add(h)
144 145
145 146 if rbheads:
146 147 rtiprev = max((int(clrev(node)) for node in rbheads))
147 148 cache = branchcache(
148 149 repo,
149 150 remotebranchmap,
150 151 repo[rtiprev].node(),
151 152 rtiprev,
152 153 closednodes=closed,
153 154 )
154 155
155 156 # Try to stick it as low as possible
156 157 # filter above served are unlikely to be fetch from a clone
157 158 for candidate in (b'base', b'immutable', b'served'):
158 159 rview = repo.filtered(candidate)
159 160 if cache.validfor(rview):
160 161 cache._filtername = candidate
161 162 self._per_filter[candidate] = cache
162 163 cache._state = STATE_DIRTY
163 164 cache.write(rview)
164 165 return
165 166
166 167 def clear(self):
167 168 self._per_filter.clear()
168 169
169 170 def write_dirty(self, repo):
170 171 unfi = repo.unfiltered()
171 172 for filtername in repoviewutil.get_ordered_subset():
172 173 cache = self._per_filter.get(filtername)
173 174 if cache is None:
174 175 continue
175 176 if filtername is None:
176 177 repo = unfi
177 178 else:
178 179 repo = unfi.filtered(filtername)
179 180 cache.sync_disk(repo)
180 181
181 182
182 183 def _unknownnode(node):
183 184 """raises ValueError when branchcache found a node which does not exists"""
184 185 raise ValueError('node %s does not exist' % node.hex())
185 186
186 187
187 188 def _branchcachedesc(repo):
188 189 if repo.filtername is not None:
189 190 return b'branch cache (%s)' % repo.filtername
190 191 else:
191 192 return b'branch cache'
192 193
193 194
194 195 class _BaseBranchCache:
195 196 """A dict like object that hold branches heads cache.
196 197
197 198 This cache is used to avoid costly computations to determine all the
198 199 branch heads of a repo.
199 200
200 201 The cache is serialized on disk in the following format:
201 202
202 203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
203 204 <branch head hex node> <open/closed state> <branch name>
204 205 <branch head hex node> <open/closed state> <branch name>
205 206 ...
206 207
207 208 The first line is used to check if the cache is still valid. If the
208 209 branch cache is for a filtered repo view, an optional third hash is
209 210 included that hashes the hashes of all filtered and obsolete revisions.
210 211
211 212 The open/closed state is represented by a single letter 'o' or 'c'.
212 213 This field can be used to avoid changelog reads when determining if a
213 214 branch head closes a branch or not.
214 215 """
215 216
216 217 def __init__(
217 218 self,
218 219 repo: "localrepo.localrepository",
219 220 entries: Union[
220 221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
221 222 ] = (),
222 223 closed_nodes: Optional[Set[bytes]] = None,
223 224 ) -> None:
224 225 """hasnode is a function which can be used to verify whether changelog
225 226 has a given node or not. If it's not provided, we assume that every node
226 227 we have exists in changelog"""
227 228 # closednodes is a set of nodes that close their branch. If the branch
228 229 # cache has been updated, it may contain nodes that are no longer
229 230 # heads.
230 231 if closed_nodes is None:
231 232 closed_nodes = set()
232 233 self._closednodes = set(closed_nodes)
233 234 self._entries = dict(entries)
234 235
235 236 def __iter__(self):
236 237 return iter(self._entries)
237 238
238 239 def __setitem__(self, key, value):
239 240 self._entries[key] = value
240 241
241 242 def __getitem__(self, key):
242 243 return self._entries[key]
243 244
244 245 def __contains__(self, key):
245 246 return key in self._entries
246 247
247 248 def iteritems(self):
248 249 return self._entries.items()
249 250
250 251 items = iteritems
251 252
252 253 def hasbranch(self, label):
253 254 """checks whether a branch of this name exists or not"""
254 255 return label in self._entries
255 256
256 257 def _branchtip(self, heads):
257 258 """Return tuple with last open head in heads and false,
258 259 otherwise return last closed head and true."""
259 260 tip = heads[-1]
260 261 closed = True
261 262 for h in reversed(heads):
262 263 if h not in self._closednodes:
263 264 tip = h
264 265 closed = False
265 266 break
266 267 return tip, closed
267 268
268 269 def branchtip(self, branch):
269 270 """Return the tipmost open head on branch head, otherwise return the
270 271 tipmost closed head on branch.
271 272 Raise KeyError for unknown branch."""
272 273 return self._branchtip(self[branch])[0]
273 274
274 275 def iteropen(self, nodes):
275 276 return (n for n in nodes if n not in self._closednodes)
276 277
277 278 def branchheads(self, branch, closed=False):
278 279 heads = self._entries[branch]
279 280 if not closed:
280 281 heads = list(self.iteropen(heads))
281 282 return heads
282 283
283 284 def iterbranches(self):
284 285 for bn, heads in self.items():
285 286 yield (bn, heads) + self._branchtip(heads)
286 287
287 288 def iterheads(self):
288 289 """returns all the heads"""
289 290 return self._entries.values()
290 291
291 292 def update(self, repo, revgen):
292 293 """Given a branchhead cache, self, that may have extra nodes or be
293 294 missing heads, and a generator of nodes that are strictly a superset of
294 295 heads missing, this function updates self to be correct.
295 296 """
296 297 starttime = util.timer()
297 298 cl = repo.changelog
298 299 # collect new branch entries
299 300 newbranches = {}
300 301 getbranchinfo = repo.revbranchcache().branchinfo
301 302 max_rev = -1
302 303 for r in revgen:
303 304 branch, closesbranch = getbranchinfo(r)
304 305 newbranches.setdefault(branch, []).append(r)
305 306 if closesbranch:
306 307 self._closednodes.add(cl.node(r))
307 308 max_rev = max(max_rev, r)
308 309 if max_rev < 0:
309 310 msg = "running branchcache.update without revision to update"
310 311 raise error.ProgrammingError(msg)
311 312
312 313 # Delay fetching the topological heads until they are needed.
313 314 # A repository without non-continous branches can skip this part.
314 315 topoheads = None
315 316
316 317 # If a changeset is visible, its parents must be visible too, so
317 318 # use the faster unfiltered parent accessor.
318 319 parentrevs = repo.unfiltered().changelog.parentrevs
319 320
320 321 # Faster than using ctx.obsolete()
321 322 obsrevs = obsolete.getrevs(repo, b'obsolete')
322 323
323 324 for branch, newheadrevs in newbranches.items():
324 325 # For every branch, compute the new branchheads.
325 326 # A branchhead is a revision such that no descendant is on
326 327 # the same branch.
327 328 #
328 329 # The branchheads are computed iteratively in revision order.
329 330 # This ensures topological order, i.e. parents are processed
330 331 # before their children. Ancestors are inclusive here, i.e.
331 332 # any revision is an ancestor of itself.
332 333 #
333 334 # Core observations:
334 335 # - The current revision is always a branchhead for the
335 336 # repository up to that point.
336 337 # - It is the first revision of the branch if and only if
337 338 # there was no branchhead before. In that case, it is the
338 339 # only branchhead as there are no possible ancestors on
339 340 # the same branch.
340 341 # - If a parent is on the same branch, a branchhead can
341 342 # only be an ancestor of that parent, if it is parent
342 343 # itself. Otherwise it would have been removed as ancestor
343 344 # of that parent before.
344 345 # - Therefore, if all parents are on the same branch, they
345 346 # can just be removed from the branchhead set.
346 347 # - If one parent is on the same branch and the other is not
347 348 # and there was exactly one branchhead known, the existing
348 349 # branchhead can only be an ancestor if it is the parent.
349 350 # Otherwise it would have been removed as ancestor of
350 351 # the parent before. The other parent therefore can't have
351 352 # a branchhead as ancestor.
352 353 # - In all other cases, the parents on different branches
353 354 # could have a branchhead as ancestor. Those parents are
354 355 # kept in the "uncertain" set. If all branchheads are also
355 356 # topological heads, they can't have descendants and further
356 357 # checks can be skipped. Otherwise, the ancestors of the
357 358 # "uncertain" set are removed from branchheads.
358 359 # This computation is heavy and avoided if at all possible.
359 360 bheads = self._entries.get(branch, [])
360 361 bheadset = {cl.rev(node) for node in bheads}
361 362 uncertain = set()
362 363 for newrev in sorted(newheadrevs):
363 364 if newrev in obsrevs:
364 365 # We ignore obsolete changesets as they shouldn't be
365 366 # considered heads.
366 367 continue
367 368
368 369 if not bheadset:
369 370 bheadset.add(newrev)
370 371 continue
371 372
372 373 parents = [p for p in parentrevs(newrev) if p != nullrev]
373 374 samebranch = set()
374 375 otherbranch = set()
375 376 obsparents = set()
376 377 for p in parents:
377 378 if p in obsrevs:
378 379 # We ignored this obsolete changeset earlier, but now
379 380 # that it has non-ignored children, we need to make
380 381 # sure their ancestors are not considered heads. To
381 382 # achieve that, we will simply treat this obsolete
382 383 # changeset as a parent from other branch.
383 384 obsparents.add(p)
384 385 elif p in bheadset or getbranchinfo(p)[0] == branch:
385 386 samebranch.add(p)
386 387 else:
387 388 otherbranch.add(p)
388 389 if not (len(bheadset) == len(samebranch) == 1):
389 390 uncertain.update(otherbranch)
390 391 uncertain.update(obsparents)
391 392 bheadset.difference_update(samebranch)
392 393 bheadset.add(newrev)
393 394
394 395 if uncertain:
395 396 if topoheads is None:
396 397 topoheads = set(cl.headrevs())
397 398 if bheadset - topoheads:
398 399 floorrev = min(bheadset)
399 400 if floorrev <= max(uncertain):
400 401 ancestors = set(cl.ancestors(uncertain, floorrev))
401 402 bheadset -= ancestors
402 403 if bheadset:
403 404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
404 405
405 406 duration = util.timer() - starttime
406 407 repo.ui.log(
407 408 b'branchcache',
408 409 b'updated %s in %.4f seconds\n',
409 410 _branchcachedesc(repo),
410 411 duration,
411 412 )
412 413 return max_rev
413 414
414 415
415 416 STATE_CLEAN = 1
416 417 STATE_INHERITED = 2
417 418 STATE_DIRTY = 3
418 419
419 420
420 421 class branchcache(_BaseBranchCache):
421 422 """Branchmap info for a local repo or repoview"""
422 423
423 424 _base_filename = b"branch2"
424 425
425 426 def __init__(
426 427 self,
427 428 repo: "localrepo.localrepository",
428 429 entries: Union[
429 430 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
430 431 ] = (),
431 432 tipnode: Optional[bytes] = None,
432 433 tiprev: Optional[int] = nullrev,
433 434 filteredhash: Optional[bytes] = None,
434 435 closednodes: Optional[Set[bytes]] = None,
435 436 hasnode: Optional[Callable[[bytes], bool]] = None,
436 437 verify_node: bool = False,
437 438 inherited: bool = False,
438 439 ) -> None:
439 440 """hasnode is a function which can be used to verify whether changelog
440 441 has a given node or not. If it's not provided, we assume that every node
441 442 we have exists in changelog"""
442 443 self._filtername = repo.filtername
443 444 if tipnode is None:
444 445 self.tipnode = repo.nullid
445 446 else:
446 447 self.tipnode = tipnode
447 448 self.tiprev = tiprev
448 449 self.filteredhash = filteredhash
449 450 self._state = STATE_CLEAN
450 451 if inherited:
451 452 self._state = STATE_INHERITED
452 453
453 454 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
454 455 # closednodes is a set of nodes that close their branch. If the branch
455 456 # cache has been updated, it may contain nodes that are no longer
456 457 # heads.
457 458
458 459 # Do we need to verify branch at all ?
459 460 self._verify_node = verify_node
460 461 # branches for which nodes are verified
461 462 self._verifiedbranches = set()
462 463 self._hasnode = None
463 464 if self._verify_node:
464 465 self._hasnode = repo.changelog.hasnode
465 466
466 467 def validfor(self, repo):
467 468 """check that cache contents are valid for (a subset of) this repo
468 469
469 470 - False when the order of changesets changed or if we detect a strip.
470 471 - True when cache is up-to-date for the current repo or its subset."""
471 472 try:
472 473 node = repo.changelog.node(self.tiprev)
473 474 except IndexError:
474 475 # changesets were stripped and now we don't even have enough to
475 476 # find tiprev
476 477 return False
477 478 if self.tipnode != node:
478 479 # tiprev doesn't correspond to tipnode: repo was stripped, or this
479 480 # repo has a different order of changesets
480 481 return False
481 482 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
482 483 # hashes don't match if this repo view has a different set of filtered
483 484 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
484 485 # history was rewritten)
485 486 return self.filteredhash == tiphash
486 487
487 488 @classmethod
488 489 def fromfile(cls, repo):
489 490 f = None
490 491 try:
491 492 f = repo.cachevfs(cls._filename(repo))
492 493 lineiter = iter(f)
493 494 init_kwargs = cls._load_header(repo, lineiter)
494 495 bcache = cls(
495 496 repo,
496 497 verify_node=True,
497 498 **init_kwargs,
498 499 )
499 500 if not bcache.validfor(repo):
500 501 # invalidate the cache
501 502 raise ValueError('tip differs')
502 503 bcache._load_heads(repo, lineiter)
503 504 except (IOError, OSError):
504 505 return None
505 506
506 507 except Exception as inst:
507 508 if repo.ui.debugflag:
508 509 msg = b'invalid %s: %s\n'
509 510 msg %= (
510 511 _branchcachedesc(repo),
511 512 stringutil.forcebytestr(inst),
512 513 )
513 514 repo.ui.debug(msg)
514 515 bcache = None
515 516
516 517 finally:
517 518 if f:
518 519 f.close()
519 520
520 521 return bcache
521 522
522 523 @classmethod
523 524 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
524 525 """parse the head of a branchmap file
525 526
526 527 return parameters to pass to a newly created class instance.
527 528 """
528 529 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
529 530 last, lrev = cachekey[:2]
530 531 last, lrev = bin(last), int(lrev)
531 532 filteredhash = None
532 533 if len(cachekey) > 2:
533 534 filteredhash = bin(cachekey[2])
534 535 return {
535 536 "tipnode": last,
536 537 "tiprev": lrev,
537 538 "filteredhash": filteredhash,
538 539 }
539 540
540 541 def _load_heads(self, repo, lineiter):
541 542 """fully loads the branchcache by reading from the file using the line
542 543 iterator passed"""
543 544 for line in lineiter:
544 545 line = line.rstrip(b'\n')
545 546 if not line:
546 547 continue
547 548 node, state, label = line.split(b" ", 2)
548 549 if state not in b'oc':
549 550 raise ValueError('invalid branch state')
550 551 label = encoding.tolocal(label.strip())
551 552 node = bin(node)
552 553 self._entries.setdefault(label, []).append(node)
553 554 if state == b'c':
554 555 self._closednodes.add(node)
555 556
556 557 @classmethod
557 558 def _filename(cls, repo):
558 559 """name of a branchcache file for a given repo or repoview"""
559 560 filename = cls._base_filename
560 561 if repo.filtername:
561 562 filename = b'%s-%s' % (filename, repo.filtername)
562 563 return filename
563 564
564 565 def inherit_for(self, repo):
565 566 """return a deep copy of the branchcache object"""
566 567 assert repo.filtername != self._filtername
567 568 other = type(self)(
568 569 repo=repo,
569 570 # we always do a shally copy of self._entries, and the values is
570 571 # always replaced, so no need to deepcopy until the above remains
571 572 # true.
572 573 entries=self._entries,
573 574 tipnode=self.tipnode,
574 575 tiprev=self.tiprev,
575 576 filteredhash=self.filteredhash,
576 577 closednodes=set(self._closednodes),
577 578 verify_node=self._verify_node,
578 579 inherited=True,
579 580 )
580 581 # also copy information about the current verification state
581 582 other._verifiedbranches = set(self._verifiedbranches)
582 583 return other
583 584
584 585 def sync_disk(self, repo):
585 586 """synchronise the on disk file with the cache state
586 587
587 588 If new value specific to this filter level need to be written, the file
588 589 will be updated, if the state of the branchcache is inherited from a
589 590 subset, any stalled on disk file will be deleted.
590 591
591 592 That method does nothing if there is nothing to do.
592 593 """
593 594 if self._state == STATE_DIRTY:
594 595 self.write(repo)
595 596 elif self._state == STATE_INHERITED:
596 597 filename = self._filename(repo)
597 598 repo.cachevfs.tryunlink(filename)
598 599
599 600 def write(self, repo):
600 601 assert self._filtername == repo.filtername, (
601 602 self._filtername,
602 603 repo.filtername,
603 604 )
604 605 assert self._state == STATE_DIRTY, self._state
605 606 tr = repo.currenttransaction()
606 607 if not getattr(tr, 'finalized', True):
607 608 # Avoid premature writing.
608 609 #
609 610 # (The cache warming setup by localrepo will update the file later.)
610 611 return
611 612 try:
612 613 filename = self._filename(repo)
613 614 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
614 615 self._write_header(f)
615 616 nodecount = self._write_heads(f)
616 617 repo.ui.log(
617 618 b'branchcache',
618 619 b'wrote %s with %d labels and %d nodes\n',
619 620 _branchcachedesc(repo),
620 621 len(self._entries),
621 622 nodecount,
622 623 )
623 624 self._state = STATE_CLEAN
624 625 except (IOError, OSError, error.Abort) as inst:
625 626 # Abort may be raised by read only opener, so log and continue
626 627 repo.ui.debug(
627 628 b"couldn't write branch cache: %s\n"
628 629 % stringutil.forcebytestr(inst)
629 630 )
630 631
631 632 def _write_header(self, fp) -> None:
632 633 """write the branch cache header to a file"""
633 634 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
634 635 if self.filteredhash is not None:
635 636 cachekey.append(hex(self.filteredhash))
636 637 fp.write(b" ".join(cachekey) + b'\n')
637 638
638 639 def _write_heads(self, fp) -> int:
639 640 """write list of heads to a file
640 641
641 642 Return the number of heads written."""
642 643 nodecount = 0
643 644 for label, nodes in sorted(self._entries.items()):
644 645 label = encoding.fromlocal(label)
645 646 for node in nodes:
646 647 nodecount += 1
647 648 if node in self._closednodes:
648 649 state = b'c'
649 650 else:
650 651 state = b'o'
651 652 fp.write(b"%s %s %s\n" % (hex(node), state, label))
652 653 return nodecount
653 654
654 655 def _verifybranch(self, branch):
655 656 """verify head nodes for the given branch."""
656 657 if not self._verify_node:
657 658 return
658 659 if branch not in self._entries or branch in self._verifiedbranches:
659 660 return
660 661 assert self._hasnode is not None
661 662 for n in self._entries[branch]:
662 663 if not self._hasnode(n):
663 664 _unknownnode(n)
664 665
665 666 self._verifiedbranches.add(branch)
666 667
667 668 def _verifyall(self):
668 669 """verifies nodes of all the branches"""
669 670 for b in self._entries.keys():
670 671 if b not in self._verifiedbranches:
671 672 self._verifybranch(b)
672 673
673 674 def __getitem__(self, key):
674 675 self._verifybranch(key)
675 676 return super().__getitem__(key)
676 677
677 678 def __contains__(self, key):
678 679 self._verifybranch(key)
679 680 return super().__contains__(key)
680 681
681 682 def iteritems(self):
682 683 self._verifyall()
683 684 return super().iteritems()
684 685
685 686 items = iteritems
686 687
687 688 def iterheads(self):
688 689 """returns all the heads"""
689 690 self._verifyall()
690 691 return super().iterheads()
691 692
692 693 def hasbranch(self, label):
693 694 """checks whether a branch of this name exists or not"""
694 695 self._verifybranch(label)
695 696 return super().hasbranch(label)
696 697
697 698 def branchheads(self, branch, closed=False):
698 699 self._verifybranch(branch)
699 700 return super().branchheads(branch, closed=closed)
700 701
701 702 def update(self, repo, revgen):
702 703 assert self._filtername == repo.filtername, (
703 704 self._filtername,
704 705 repo.filtername,
705 706 )
706 707 cl = repo.changelog
707 708 max_rev = super().update(repo, revgen)
708 709 # new tip revision which we found after iterating items from new
709 710 # branches
710 711 if max_rev is not None and max_rev > self.tiprev:
711 712 self.tiprev = max_rev
712 713 self.tipnode = cl.node(max_rev)
713 714
714 715 if not self.validfor(repo):
715 716 # old cache key is now invalid for the repo, but we've just updated
716 717 # the cache and we assume it's valid, so let's make the cache key
717 718 # valid as well by recomputing it from the cached data
718 719 self.tipnode = repo.nullid
719 720 self.tiprev = nullrev
720 721 for heads in self.iterheads():
721 722 if not heads:
722 723 # all revisions on a branch are obsolete
723 724 continue
724 725 # note: tiprev is not necessarily the tip revision of repo,
725 726 # because the tip could be obsolete (i.e. not a head)
726 727 tiprev = max(cl.rev(node) for node in heads)
727 728 if tiprev > self.tiprev:
728 729 self.tipnode = cl.node(tiprev)
729 730 self.tiprev = tiprev
730 731 self.filteredhash = scmutil.filteredhash(
731 732 repo, self.tiprev, needobsolete=True
732 733 )
733 734 self._state = STATE_DIRTY
734 735 self.write(repo)
735 736
736 737
737 738 class remotebranchcache(_BaseBranchCache):
738 739 """Branchmap info for a remote connection, should not write locally"""
739 740
740 741 def __init__(
741 742 self,
742 743 repo: "localrepo.localrepository",
743 744 entries: Union[
744 745 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
745 746 ] = (),
746 747 closednodes: Optional[Set[bytes]] = None,
747 748 ) -> None:
748 749 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
749 750
750 751
751 752 # Revision branch info cache
752 753
753 754 _rbcversion = b'-v1'
754 755 _rbcnames = b'rbc-names' + _rbcversion
755 756 _rbcrevs = b'rbc-revs' + _rbcversion
756 757 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
757 758 _rbcrecfmt = b'>4sI'
758 759 _rbcrecsize = calcsize(_rbcrecfmt)
759 760 _rbcmininc = 64 * _rbcrecsize
760 761 _rbcnodelen = 4
761 762 _rbcbranchidxmask = 0x7FFFFFFF
762 763 _rbccloseflag = 0x80000000
763 764
764 765
765 766 class rbcrevs:
766 767 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
767 768
768 769 def __init__(self, revs):
769 770 self._prefix = revs
770 771 self._rest = bytearray()
771 772
772 773 def __len__(self):
773 774 return len(self._prefix) + len(self._rest)
774 775
775 776 def unpack_record(self, rbcrevidx):
776 777 if rbcrevidx < len(self._prefix):
777 778 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
778 779 else:
779 780 return unpack_from(
780 781 _rbcrecfmt,
781 782 util.buffer(self._rest),
782 783 rbcrevidx - len(self._prefix),
783 784 )
784 785
785 786 def make_mutable(self):
786 787 if len(self._prefix) > 0:
787 788 entirety = bytearray()
788 789 entirety[:] = self._prefix
789 790 entirety.extend(self._rest)
790 791 self._rest = entirety
791 792 self._prefix = bytearray()
792 793
793 794 def truncate(self, pos):
794 795 self.make_mutable()
795 796 del self._rest[pos:]
796 797
797 798 def pack_into(self, rbcrevidx, node, branchidx):
798 799 if rbcrevidx < len(self._prefix):
799 800 self.make_mutable()
800 801 buf = self._rest
801 802 start_offset = rbcrevidx - len(self._prefix)
802 803 end_offset = start_offset + _rbcrecsize
803 804
804 805 if len(self._rest) < end_offset:
805 806 # bytearray doesn't allocate extra space at least in Python 3.7.
806 807 # When multiple changesets are added in a row, precise resize would
807 808 # result in quadratic complexity. Overallocate to compensate by
808 809 # using the classic doubling technique for dynamic arrays instead.
809 810 # If there was a gap in the map before, less space will be reserved.
810 811 self._rest.extend(b'\0' * end_offset)
811 812 return pack_into(
812 813 _rbcrecfmt,
813 814 buf,
814 815 start_offset,
815 816 node,
816 817 branchidx,
817 818 )
818 819
819 820 def extend(self, extension):
820 821 return self._rest.extend(extension)
821 822
822 823 def slice(self, begin, end):
823 824 if begin < len(self._prefix):
824 825 acc = bytearray()
825 826 acc[:] = self._prefix[begin:end]
826 827 acc.extend(
827 828 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
828 829 )
829 830 return acc
830 831 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
831 832
832 833
833 834 class revbranchcache:
834 835 """Persistent cache, mapping from revision number to branch name and close.
835 836 This is a low level cache, independent of filtering.
836 837
837 838 Branch names are stored in rbc-names in internal encoding separated by 0.
838 839 rbc-names is append-only, and each branch name is only stored once and will
839 840 thus have a unique index.
840 841
841 842 The branch info for each revision is stored in rbc-revs as constant size
842 843 records. The whole file is read into memory, but it is only 'parsed' on
843 844 demand. The file is usually append-only but will be truncated if repo
844 845 modification is detected.
845 846 The record for each revision contains the first 4 bytes of the
846 847 corresponding node hash, and the record is only used if it still matches.
847 848 Even a completely trashed rbc-revs fill thus still give the right result
848 849 while converging towards full recovery ... assuming no incorrectly matching
849 850 node hashes.
850 851 The record also contains 4 bytes where 31 bits contains the index of the
851 852 branch and the last bit indicate that it is a branch close commit.
852 853 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
853 854 and will grow with it but be 1/8th of its size.
854 855 """
855 856
856 857 def __init__(self, repo, readonly=True):
857 858 assert repo.filtername is None
858 859 self._repo = repo
859 860 self._names = [] # branch names in local encoding with static index
860 861 self._rbcrevs = rbcrevs(bytearray())
861 862 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
862 863 try:
863 864 bndata = repo.cachevfs.read(_rbcnames)
864 865 self._rbcsnameslen = len(bndata) # for verification before writing
865 866 if bndata:
866 867 self._names = [
867 868 encoding.tolocal(bn) for bn in bndata.split(b'\0')
868 869 ]
869 870 except (IOError, OSError):
870 871 if readonly:
871 872 # don't try to use cache - fall back to the slow path
872 873 self.branchinfo = self._branchinfo
873 874
874 875 if self._names:
875 876 try:
876 877 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
877 878 with repo.cachevfs(_rbcrevs) as fp:
878 879 data = util.buffer(util.mmapread(fp))
879 880 else:
880 881 data = repo.cachevfs.read(_rbcrevs)
881 882 self._rbcrevs = rbcrevs(data)
882 883 except (IOError, OSError) as inst:
883 884 repo.ui.debug(
884 885 b"couldn't read revision branch cache: %s\n"
885 886 % stringutil.forcebytestr(inst)
886 887 )
887 888 # remember number of good records on disk
888 889 self._rbcrevslen = min(
889 890 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
890 891 )
891 892 if self._rbcrevslen == 0:
892 893 self._names = []
893 894 self._rbcnamescount = len(self._names) # number of names read at
894 895 # _rbcsnameslen
895 896
896 897 def _clear(self):
897 898 self._rbcsnameslen = 0
898 899 del self._names[:]
899 900 self._rbcnamescount = 0
900 901 self._rbcrevslen = len(self._repo.changelog)
901 902 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
902 903 util.clearcachedproperty(self, b'_namesreverse')
903 904
904 905 @util.propertycache
905 906 def _namesreverse(self):
906 907 return {b: r for r, b in enumerate(self._names)}
907 908
908 909 def branchinfo(self, rev):
909 910 """Return branch name and close flag for rev, using and updating
910 911 persistent cache."""
911 912 changelog = self._repo.changelog
912 913 rbcrevidx = rev * _rbcrecsize
913 914
914 915 # avoid negative index, changelog.read(nullrev) is fast without cache
915 916 if rev == nullrev:
916 917 return changelog.branchinfo(rev)
917 918
918 919 # if requested rev isn't allocated, grow and cache the rev info
919 920 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
920 921 return self._branchinfo(rev)
921 922
922 923 # fast path: extract data from cache, use it if node is matching
923 924 reponode = changelog.node(rev)[:_rbcnodelen]
924 925 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
925 926 close = bool(branchidx & _rbccloseflag)
926 927 if close:
927 928 branchidx &= _rbcbranchidxmask
928 929 if cachenode == b'\0\0\0\0':
929 930 pass
930 931 elif cachenode == reponode:
931 932 try:
932 933 return self._names[branchidx], close
933 934 except IndexError:
934 935 # recover from invalid reference to unknown branch
935 936 self._repo.ui.debug(
936 937 b"referenced branch names not found"
937 938 b" - rebuilding revision branch cache from scratch\n"
938 939 )
939 940 self._clear()
940 941 else:
941 942 # rev/node map has changed, invalidate the cache from here up
942 943 self._repo.ui.debug(
943 944 b"history modification detected - truncating "
944 945 b"revision branch cache to revision %d\n" % rev
945 946 )
946 947 truncate = rbcrevidx + _rbcrecsize
947 948 self._rbcrevs.truncate(truncate)
948 949 self._rbcrevslen = min(self._rbcrevslen, truncate)
949 950
950 951 # fall back to slow path and make sure it will be written to disk
951 952 return self._branchinfo(rev)
952 953
953 954 def _branchinfo(self, rev):
954 955 """Retrieve branch info from changelog and update _rbcrevs"""
955 956 changelog = self._repo.changelog
956 957 b, close = changelog.branchinfo(rev)
957 958 if b in self._namesreverse:
958 959 branchidx = self._namesreverse[b]
959 960 else:
960 961 branchidx = len(self._names)
961 962 self._names.append(b)
962 963 self._namesreverse[b] = branchidx
963 964 reponode = changelog.node(rev)
964 965 if close:
965 966 branchidx |= _rbccloseflag
966 967 self._setcachedata(rev, reponode, branchidx)
967 968 return b, close
968 969
969 970 def setdata(self, rev, changelogrevision):
970 971 """add new data information to the cache"""
971 972 branch, close = changelogrevision.branchinfo
972 973
973 974 if branch in self._namesreverse:
974 975 branchidx = self._namesreverse[branch]
975 976 else:
976 977 branchidx = len(self._names)
977 978 self._names.append(branch)
978 979 self._namesreverse[branch] = branchidx
979 980 if close:
980 981 branchidx |= _rbccloseflag
981 982 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
982 983 # If no cache data were readable (non exists, bad permission, etc)
983 984 # the cache was bypassing itself by setting:
984 985 #
985 986 # self.branchinfo = self._branchinfo
986 987 #
987 988 # Since we now have data in the cache, we need to drop this bypassing.
988 989 if 'branchinfo' in vars(self):
989 990 del self.branchinfo
990 991
991 992 def _setcachedata(self, rev, node, branchidx):
992 993 """Writes the node's branch data to the in-memory cache data."""
993 994 if rev == nullrev:
994 995 return
995 996 rbcrevidx = rev * _rbcrecsize
996 997 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
997 998 self._rbcrevslen = min(self._rbcrevslen, rev)
998 999
999 1000 tr = self._repo.currenttransaction()
1000 1001 if tr:
1001 1002 tr.addfinalize(b'write-revbranchcache', self.write)
1002 1003
1003 1004 def write(self, tr=None):
1004 1005 """Save branch cache if it is dirty."""
1005 1006 repo = self._repo
1006 1007 wlock = None
1007 1008 step = b''
1008 1009 try:
1009 1010 # write the new names
1010 1011 if self._rbcnamescount < len(self._names):
1011 1012 wlock = repo.wlock(wait=False)
1012 1013 step = b' names'
1013 1014 self._writenames(repo)
1014 1015
1015 1016 # write the new revs
1016 1017 start = self._rbcrevslen * _rbcrecsize
1017 1018 if start != len(self._rbcrevs):
1018 1019 step = b''
1019 1020 if wlock is None:
1020 1021 wlock = repo.wlock(wait=False)
1021 1022 self._writerevs(repo, start)
1022 1023
1023 1024 except (IOError, OSError, error.Abort, error.LockError) as inst:
1024 1025 repo.ui.debug(
1025 1026 b"couldn't write revision branch cache%s: %s\n"
1026 1027 % (step, stringutil.forcebytestr(inst))
1027 1028 )
1028 1029 finally:
1029 1030 if wlock is not None:
1030 1031 wlock.release()
1031 1032
1032 1033 def _writenames(self, repo):
1033 1034 """write the new branch names to revbranchcache"""
1034 1035 if self._rbcnamescount != 0:
1035 1036 f = repo.cachevfs.open(_rbcnames, b'ab')
1036 1037 if f.tell() == self._rbcsnameslen:
1037 1038 f.write(b'\0')
1038 1039 else:
1039 1040 f.close()
1040 1041 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1041 1042 self._rbcnamescount = 0
1042 1043 self._rbcrevslen = 0
1043 1044 if self._rbcnamescount == 0:
1044 1045 # before rewriting names, make sure references are removed
1045 1046 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1046 1047 f = repo.cachevfs.open(_rbcnames, b'wb')
1047 1048 f.write(
1048 1049 b'\0'.join(
1049 1050 encoding.fromlocal(b)
1050 1051 for b in self._names[self._rbcnamescount :]
1051 1052 )
1052 1053 )
1053 1054 self._rbcsnameslen = f.tell()
1054 1055 f.close()
1055 1056 self._rbcnamescount = len(self._names)
1056 1057
1057 1058 def _writerevs(self, repo, start):
1058 1059 """write the new revs to revbranchcache"""
1059 1060 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1060 1061 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1061 1062 if f.tell() != start:
1062 1063 repo.ui.debug(
1063 1064 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1064 1065 )
1065 1066 f.seek(start)
1066 1067 if f.tell() != start:
1067 1068 start = 0
1068 1069 f.seek(start)
1069 1070 f.truncate()
1070 1071 end = revs * _rbcrecsize
1071 1072 f.write(self._rbcrevs.slice(start, end))
1072 1073 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now