##// END OF EJS Templates
branchcache: stop using `copy(…)` in `replace(…)`...
marmoute -
r52382:659f7666 default
parent child Browse files
Show More
@@ -1,1048 +1,1050 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Any,
19 19 Callable,
20 20 Dict,
21 21 Iterable,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 TYPE_CHECKING,
26 26 Tuple,
27 27 Union,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50 calcsize = struct.calcsize
51 51 pack_into = struct.pack_into
52 52 unpack_from = struct.unpack_from
53 53
54 54
55 55 class BranchMapCache:
56 56 """mapping of filtered views of repo with their branchcache"""
57 57
58 58 def __init__(self):
59 59 self._per_filter = {}
60 60
61 61 def __getitem__(self, repo):
62 62 self.updatecache(repo)
63 63 bcache = self._per_filter[repo.filtername]
64 64 assert bcache._filtername == repo.filtername, (
65 65 bcache._filtername,
66 66 repo.filtername,
67 67 )
68 68 return bcache
69 69
70 70 def update_disk(self, repo):
71 71 """ensure and up-to-date cache is (or will be) written on disk
72 72
73 73 The cache for this repository view is updated if needed and written on
74 74 disk.
75 75
76 76 If a transaction is in progress, the writing is schedule to transaction
77 77 close. See the `BranchMapCache.write_dirty` method.
78 78
79 79 This method exist independently of __getitem__ as it is sometime useful
80 80 to signal that we have no intend to use the data in memory yet.
81 81 """
82 82 self.updatecache(repo)
83 83 bcache = self._per_filter[repo.filtername]
84 84 assert bcache._filtername == repo.filtername, (
85 85 bcache._filtername,
86 86 repo.filtername,
87 87 )
88 88 bcache.write(repo)
89 89
90 90 def updatecache(self, repo):
91 91 """Update the cache for the given filtered view on a repository"""
92 92 # This can trigger updates for the caches for subsets of the filtered
93 93 # view, e.g. when there is no cache for this filtered view or the cache
94 94 # is stale.
95 95
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 bcache = self._per_filter.get(filtername)
99 99 if bcache is None or not bcache.validfor(repo):
100 100 # cache object missing or cache object stale? Read from disk
101 101 bcache = branchcache.fromfile(repo)
102 102
103 103 revs = []
104 104 if bcache is None:
105 105 # no (fresh) cache available anymore, perhaps we can re-use
106 106 # the cache for a subset, then extend that to add info on missing
107 107 # revisions.
108 108 subsetname = subsettable.get(filtername)
109 109 if subsetname is not None:
110 110 subset = repo.filtered(subsetname)
111 111 bcache = self[subset].copy(repo)
112 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 114 else:
115 115 # nothing to fall back on, start empty.
116 116 bcache = branchcache(repo)
117 117
118 118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 119 if revs:
120 120 bcache.update(repo, revs)
121 121
122 122 assert bcache.validfor(repo), filtername
123 123 self._per_filter[repo.filtername] = bcache
124 124
125 125 def replace(self, repo, remotebranchmap):
126 126 """Replace the branchmap cache for a repo with a branch mapping.
127 127
128 128 This is likely only called during clone with a branch map from a
129 129 remote.
130 130
131 131 """
132 132 cl = repo.changelog
133 133 clrev = cl.rev
134 134 clbranchinfo = cl.branchinfo
135 135 rbheads = []
136 136 closed = set()
137 137 for bheads in remotebranchmap.values():
138 138 rbheads += bheads
139 139 for h in bheads:
140 140 r = clrev(h)
141 141 b, c = clbranchinfo(r)
142 142 if c:
143 143 closed.add(h)
144 144
145 145 if rbheads:
146 146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 147 cache = branchcache(
148 148 repo,
149 149 remotebranchmap,
150 150 repo[rtiprev].node(),
151 151 rtiprev,
152 152 closednodes=closed,
153 153 )
154 154
155 155 # Try to stick it as low as possible
156 156 # filter above served are unlikely to be fetch from a clone
157 157 for candidate in (b'base', b'immutable', b'served'):
158 158 rview = repo.filtered(candidate)
159 159 if cache.validfor(rview):
160 cache = self._per_filter[candidate] = cache.copy(rview)
160 cache._filtername = candidate
161 self._per_filter[candidate] = cache
162 cache._dirty = True
161 163 cache.write(rview)
162 164 return
163 165
164 166 def clear(self):
165 167 self._per_filter.clear()
166 168
167 169 def write_dirty(self, repo):
168 170 unfi = repo.unfiltered()
169 171 for filtername in repoviewutil.get_ordered_subset():
170 172 cache = self._per_filter.get(filtername)
171 173 if cache is None:
172 174 continue
173 175 if cache._dirty:
174 176 if filtername is None:
175 177 repo = unfi
176 178 else:
177 179 repo = unfi.filtered(filtername)
178 180 cache.write(repo)
179 181
180 182
181 183 def _unknownnode(node):
182 184 """raises ValueError when branchcache found a node which does not exists"""
183 185 raise ValueError('node %s does not exist' % node.hex())
184 186
185 187
186 188 def _branchcachedesc(repo):
187 189 if repo.filtername is not None:
188 190 return b'branch cache (%s)' % repo.filtername
189 191 else:
190 192 return b'branch cache'
191 193
192 194
193 195 class _BaseBranchCache:
194 196 """A dict like object that hold branches heads cache.
195 197
196 198 This cache is used to avoid costly computations to determine all the
197 199 branch heads of a repo.
198 200
199 201 The cache is serialized on disk in the following format:
200 202
201 203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
202 204 <branch head hex node> <open/closed state> <branch name>
203 205 <branch head hex node> <open/closed state> <branch name>
204 206 ...
205 207
206 208 The first line is used to check if the cache is still valid. If the
207 209 branch cache is for a filtered repo view, an optional third hash is
208 210 included that hashes the hashes of all filtered and obsolete revisions.
209 211
210 212 The open/closed state is represented by a single letter 'o' or 'c'.
211 213 This field can be used to avoid changelog reads when determining if a
212 214 branch head closes a branch or not.
213 215 """
214 216
215 217 def __init__(
216 218 self,
217 219 repo: "localrepo.localrepository",
218 220 entries: Union[
219 221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
220 222 ] = (),
221 223 closed_nodes: Optional[Set[bytes]] = None,
222 224 ) -> None:
223 225 """hasnode is a function which can be used to verify whether changelog
224 226 has a given node or not. If it's not provided, we assume that every node
225 227 we have exists in changelog"""
226 228 # closednodes is a set of nodes that close their branch. If the branch
227 229 # cache has been updated, it may contain nodes that are no longer
228 230 # heads.
229 231 if closed_nodes is None:
230 232 closed_nodes = set()
231 233 self._closednodes = set(closed_nodes)
232 234 self._entries = dict(entries)
233 235
234 236 def __iter__(self):
235 237 return iter(self._entries)
236 238
237 239 def __setitem__(self, key, value):
238 240 self._entries[key] = value
239 241
240 242 def __getitem__(self, key):
241 243 return self._entries[key]
242 244
243 245 def __contains__(self, key):
244 246 return key in self._entries
245 247
246 248 def iteritems(self):
247 249 return self._entries.items()
248 250
249 251 items = iteritems
250 252
251 253 def hasbranch(self, label):
252 254 """checks whether a branch of this name exists or not"""
253 255 return label in self._entries
254 256
255 257 def _branchtip(self, heads):
256 258 """Return tuple with last open head in heads and false,
257 259 otherwise return last closed head and true."""
258 260 tip = heads[-1]
259 261 closed = True
260 262 for h in reversed(heads):
261 263 if h not in self._closednodes:
262 264 tip = h
263 265 closed = False
264 266 break
265 267 return tip, closed
266 268
267 269 def branchtip(self, branch):
268 270 """Return the tipmost open head on branch head, otherwise return the
269 271 tipmost closed head on branch.
270 272 Raise KeyError for unknown branch."""
271 273 return self._branchtip(self[branch])[0]
272 274
273 275 def iteropen(self, nodes):
274 276 return (n for n in nodes if n not in self._closednodes)
275 277
276 278 def branchheads(self, branch, closed=False):
277 279 heads = self._entries[branch]
278 280 if not closed:
279 281 heads = list(self.iteropen(heads))
280 282 return heads
281 283
282 284 def iterbranches(self):
283 285 for bn, heads in self.items():
284 286 yield (bn, heads) + self._branchtip(heads)
285 287
286 288 def iterheads(self):
287 289 """returns all the heads"""
288 290 return self._entries.values()
289 291
290 292 def update(self, repo, revgen):
291 293 """Given a branchhead cache, self, that may have extra nodes or be
292 294 missing heads, and a generator of nodes that are strictly a superset of
293 295 heads missing, this function updates self to be correct.
294 296 """
295 297 starttime = util.timer()
296 298 cl = repo.changelog
297 299 # collect new branch entries
298 300 newbranches = {}
299 301 getbranchinfo = repo.revbranchcache().branchinfo
300 302 max_rev = -1
301 303 for r in revgen:
302 304 branch, closesbranch = getbranchinfo(r)
303 305 newbranches.setdefault(branch, []).append(r)
304 306 if closesbranch:
305 307 self._closednodes.add(cl.node(r))
306 308 max_rev = max(max_rev, r)
307 309 if max_rev < 0:
308 310 msg = "running branchcache.update without revision to update"
309 311 raise error.ProgrammingError(msg)
310 312
311 313 # Delay fetching the topological heads until they are needed.
312 314 # A repository without non-continous branches can skip this part.
313 315 topoheads = None
314 316
315 317 # If a changeset is visible, its parents must be visible too, so
316 318 # use the faster unfiltered parent accessor.
317 319 parentrevs = repo.unfiltered().changelog.parentrevs
318 320
319 321 # Faster than using ctx.obsolete()
320 322 obsrevs = obsolete.getrevs(repo, b'obsolete')
321 323
322 324 for branch, newheadrevs in newbranches.items():
323 325 # For every branch, compute the new branchheads.
324 326 # A branchhead is a revision such that no descendant is on
325 327 # the same branch.
326 328 #
327 329 # The branchheads are computed iteratively in revision order.
328 330 # This ensures topological order, i.e. parents are processed
329 331 # before their children. Ancestors are inclusive here, i.e.
330 332 # any revision is an ancestor of itself.
331 333 #
332 334 # Core observations:
333 335 # - The current revision is always a branchhead for the
334 336 # repository up to that point.
335 337 # - It is the first revision of the branch if and only if
336 338 # there was no branchhead before. In that case, it is the
337 339 # only branchhead as there are no possible ancestors on
338 340 # the same branch.
339 341 # - If a parent is on the same branch, a branchhead can
340 342 # only be an ancestor of that parent, if it is parent
341 343 # itself. Otherwise it would have been removed as ancestor
342 344 # of that parent before.
343 345 # - Therefore, if all parents are on the same branch, they
344 346 # can just be removed from the branchhead set.
345 347 # - If one parent is on the same branch and the other is not
346 348 # and there was exactly one branchhead known, the existing
347 349 # branchhead can only be an ancestor if it is the parent.
348 350 # Otherwise it would have been removed as ancestor of
349 351 # the parent before. The other parent therefore can't have
350 352 # a branchhead as ancestor.
351 353 # - In all other cases, the parents on different branches
352 354 # could have a branchhead as ancestor. Those parents are
353 355 # kept in the "uncertain" set. If all branchheads are also
354 356 # topological heads, they can't have descendants and further
355 357 # checks can be skipped. Otherwise, the ancestors of the
356 358 # "uncertain" set are removed from branchheads.
357 359 # This computation is heavy and avoided if at all possible.
358 360 bheads = self._entries.get(branch, [])
359 361 bheadset = {cl.rev(node) for node in bheads}
360 362 uncertain = set()
361 363 for newrev in sorted(newheadrevs):
362 364 if newrev in obsrevs:
363 365 # We ignore obsolete changesets as they shouldn't be
364 366 # considered heads.
365 367 continue
366 368
367 369 if not bheadset:
368 370 bheadset.add(newrev)
369 371 continue
370 372
371 373 parents = [p for p in parentrevs(newrev) if p != nullrev]
372 374 samebranch = set()
373 375 otherbranch = set()
374 376 obsparents = set()
375 377 for p in parents:
376 378 if p in obsrevs:
377 379 # We ignored this obsolete changeset earlier, but now
378 380 # that it has non-ignored children, we need to make
379 381 # sure their ancestors are not considered heads. To
380 382 # achieve that, we will simply treat this obsolete
381 383 # changeset as a parent from other branch.
382 384 obsparents.add(p)
383 385 elif p in bheadset or getbranchinfo(p)[0] == branch:
384 386 samebranch.add(p)
385 387 else:
386 388 otherbranch.add(p)
387 389 if not (len(bheadset) == len(samebranch) == 1):
388 390 uncertain.update(otherbranch)
389 391 uncertain.update(obsparents)
390 392 bheadset.difference_update(samebranch)
391 393 bheadset.add(newrev)
392 394
393 395 if uncertain:
394 396 if topoheads is None:
395 397 topoheads = set(cl.headrevs())
396 398 if bheadset - topoheads:
397 399 floorrev = min(bheadset)
398 400 if floorrev <= max(uncertain):
399 401 ancestors = set(cl.ancestors(uncertain, floorrev))
400 402 bheadset -= ancestors
401 403 if bheadset:
402 404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
403 405
404 406 duration = util.timer() - starttime
405 407 repo.ui.log(
406 408 b'branchcache',
407 409 b'updated %s in %.4f seconds\n',
408 410 _branchcachedesc(repo),
409 411 duration,
410 412 )
411 413 return max_rev
412 414
413 415
414 416 class branchcache(_BaseBranchCache):
415 417 """Branchmap info for a local repo or repoview"""
416 418
417 419 _base_filename = b"branch2"
418 420
419 421 def __init__(
420 422 self,
421 423 repo: "localrepo.localrepository",
422 424 entries: Union[
423 425 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
424 426 ] = (),
425 427 tipnode: Optional[bytes] = None,
426 428 tiprev: Optional[int] = nullrev,
427 429 filteredhash: Optional[bytes] = None,
428 430 closednodes: Optional[Set[bytes]] = None,
429 431 hasnode: Optional[Callable[[bytes], bool]] = None,
430 432 verify_node: bool = False,
431 433 ) -> None:
432 434 """hasnode is a function which can be used to verify whether changelog
433 435 has a given node or not. If it's not provided, we assume that every node
434 436 we have exists in changelog"""
435 437 self._filtername = repo.filtername
436 438 if tipnode is None:
437 439 self.tipnode = repo.nullid
438 440 else:
439 441 self.tipnode = tipnode
440 442 self.tiprev = tiprev
441 443 self.filteredhash = filteredhash
442 444 self._dirty = False
443 445
444 446 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
445 447 # closednodes is a set of nodes that close their branch. If the branch
446 448 # cache has been updated, it may contain nodes that are no longer
447 449 # heads.
448 450
449 451 # Do we need to verify branch at all ?
450 452 self._verify_node = verify_node
451 453 # branches for which nodes are verified
452 454 self._verifiedbranches = set()
453 455 self._hasnode = None
454 456 if self._verify_node:
455 457 self._hasnode = repo.changelog.hasnode
456 458
457 459 def validfor(self, repo):
458 460 """check that cache contents are valid for (a subset of) this repo
459 461
460 462 - False when the order of changesets changed or if we detect a strip.
461 463 - True when cache is up-to-date for the current repo or its subset."""
462 464 try:
463 465 node = repo.changelog.node(self.tiprev)
464 466 except IndexError:
465 467 # changesets were stripped and now we don't even have enough to
466 468 # find tiprev
467 469 return False
468 470 if self.tipnode != node:
469 471 # tiprev doesn't correspond to tipnode: repo was stripped, or this
470 472 # repo has a different order of changesets
471 473 return False
472 474 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
473 475 # hashes don't match if this repo view has a different set of filtered
474 476 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
475 477 # history was rewritten)
476 478 return self.filteredhash == tiphash
477 479
478 480 @classmethod
479 481 def fromfile(cls, repo):
480 482 f = None
481 483 try:
482 484 f = repo.cachevfs(cls._filename(repo))
483 485 lineiter = iter(f)
484 486 init_kwargs = cls._load_header(repo, lineiter)
485 487 bcache = cls(
486 488 repo,
487 489 verify_node=True,
488 490 **init_kwargs,
489 491 )
490 492 if not bcache.validfor(repo):
491 493 # invalidate the cache
492 494 raise ValueError('tip differs')
493 495 bcache._load_heads(repo, lineiter)
494 496 except (IOError, OSError):
495 497 return None
496 498
497 499 except Exception as inst:
498 500 if repo.ui.debugflag:
499 501 msg = b'invalid %s: %s\n'
500 502 msg %= (
501 503 _branchcachedesc(repo),
502 504 stringutil.forcebytestr(inst),
503 505 )
504 506 repo.ui.debug(msg)
505 507 bcache = None
506 508
507 509 finally:
508 510 if f:
509 511 f.close()
510 512
511 513 return bcache
512 514
513 515 @classmethod
514 516 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
515 517 """parse the head of a branchmap file
516 518
517 519 return parameters to pass to a newly created class instance.
518 520 """
519 521 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
520 522 last, lrev = cachekey[:2]
521 523 last, lrev = bin(last), int(lrev)
522 524 filteredhash = None
523 525 if len(cachekey) > 2:
524 526 filteredhash = bin(cachekey[2])
525 527 return {
526 528 "tipnode": last,
527 529 "tiprev": lrev,
528 530 "filteredhash": filteredhash,
529 531 }
530 532
531 533 def _load_heads(self, repo, lineiter):
532 534 """fully loads the branchcache by reading from the file using the line
533 535 iterator passed"""
534 536 for line in lineiter:
535 537 line = line.rstrip(b'\n')
536 538 if not line:
537 539 continue
538 540 node, state, label = line.split(b" ", 2)
539 541 if state not in b'oc':
540 542 raise ValueError('invalid branch state')
541 543 label = encoding.tolocal(label.strip())
542 544 node = bin(node)
543 545 self._entries.setdefault(label, []).append(node)
544 546 if state == b'c':
545 547 self._closednodes.add(node)
546 548
547 549 @classmethod
548 550 def _filename(cls, repo):
549 551 """name of a branchcache file for a given repo or repoview"""
550 552 filename = cls._base_filename
551 553 if repo.filtername:
552 554 filename = b'%s-%s' % (filename, repo.filtername)
553 555 return filename
554 556
555 557 def copy(self, repo):
556 558 """return a deep copy of the branchcache object"""
557 559 other = type(self)(
558 560 repo=repo,
559 561 # we always do a shally copy of self._entries, and the values is
560 562 # always replaced, so no need to deepcopy until the above remains
561 563 # true.
562 564 entries=self._entries,
563 565 tipnode=self.tipnode,
564 566 tiprev=self.tiprev,
565 567 filteredhash=self.filteredhash,
566 568 closednodes=set(self._closednodes),
567 569 verify_node=self._verify_node,
568 570 )
569 571 # we copy will likely schedule a write anyway, but that does not seems
570 572 # to hurt to overschedule
571 573 other._dirty = self._dirty
572 574 # also copy information about the current verification state
573 575 other._verifiedbranches = set(self._verifiedbranches)
574 576 return other
575 577
576 578 def write(self, repo):
577 579 assert self._filtername == repo.filtername, (
578 580 self._filtername,
579 581 repo.filtername,
580 582 )
581 583 tr = repo.currenttransaction()
582 584 if not getattr(tr, 'finalized', True):
583 585 # Avoid premature writing.
584 586 #
585 587 # (The cache warming setup by localrepo will update the file later.)
586 588 return
587 589 try:
588 590 filename = self._filename(repo)
589 591 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
590 592 self._write_header(f)
591 593 nodecount = self._write_heads(f)
592 594 repo.ui.log(
593 595 b'branchcache',
594 596 b'wrote %s with %d labels and %d nodes\n',
595 597 _branchcachedesc(repo),
596 598 len(self._entries),
597 599 nodecount,
598 600 )
599 601 self._dirty = False
600 602 except (IOError, OSError, error.Abort) as inst:
601 603 # Abort may be raised by read only opener, so log and continue
602 604 repo.ui.debug(
603 605 b"couldn't write branch cache: %s\n"
604 606 % stringutil.forcebytestr(inst)
605 607 )
606 608
607 609 def _write_header(self, fp) -> None:
608 610 """write the branch cache header to a file"""
609 611 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
610 612 if self.filteredhash is not None:
611 613 cachekey.append(hex(self.filteredhash))
612 614 fp.write(b" ".join(cachekey) + b'\n')
613 615
614 616 def _write_heads(self, fp) -> int:
615 617 """write list of heads to a file
616 618
617 619 Return the number of heads written."""
618 620 nodecount = 0
619 621 for label, nodes in sorted(self._entries.items()):
620 622 label = encoding.fromlocal(label)
621 623 for node in nodes:
622 624 nodecount += 1
623 625 if node in self._closednodes:
624 626 state = b'c'
625 627 else:
626 628 state = b'o'
627 629 fp.write(b"%s %s %s\n" % (hex(node), state, label))
628 630 return nodecount
629 631
630 632 def _verifybranch(self, branch):
631 633 """verify head nodes for the given branch."""
632 634 if not self._verify_node:
633 635 return
634 636 if branch not in self._entries or branch in self._verifiedbranches:
635 637 return
636 638 assert self._hasnode is not None
637 639 for n in self._entries[branch]:
638 640 if not self._hasnode(n):
639 641 _unknownnode(n)
640 642
641 643 self._verifiedbranches.add(branch)
642 644
643 645 def _verifyall(self):
644 646 """verifies nodes of all the branches"""
645 647 for b in self._entries.keys():
646 648 if b not in self._verifiedbranches:
647 649 self._verifybranch(b)
648 650
649 651 def __getitem__(self, key):
650 652 self._verifybranch(key)
651 653 return super().__getitem__(key)
652 654
653 655 def __contains__(self, key):
654 656 self._verifybranch(key)
655 657 return super().__contains__(key)
656 658
657 659 def iteritems(self):
658 660 self._verifyall()
659 661 return super().iteritems()
660 662
661 663 items = iteritems
662 664
663 665 def iterheads(self):
664 666 """returns all the heads"""
665 667 self._verifyall()
666 668 return super().iterheads()
667 669
668 670 def hasbranch(self, label):
669 671 """checks whether a branch of this name exists or not"""
670 672 self._verifybranch(label)
671 673 return super().hasbranch(label)
672 674
673 675 def branchheads(self, branch, closed=False):
674 676 self._verifybranch(branch)
675 677 return super().branchheads(branch, closed=closed)
676 678
677 679 def update(self, repo, revgen):
678 680 assert self._filtername == repo.filtername, (
679 681 self._filtername,
680 682 repo.filtername,
681 683 )
682 684 cl = repo.changelog
683 685 max_rev = super().update(repo, revgen)
684 686 # new tip revision which we found after iterating items from new
685 687 # branches
686 688 if max_rev is not None and max_rev > self.tiprev:
687 689 self.tiprev = max_rev
688 690 self.tipnode = cl.node(max_rev)
689 691
690 692 if not self.validfor(repo):
691 693 # old cache key is now invalid for the repo, but we've just updated
692 694 # the cache and we assume it's valid, so let's make the cache key
693 695 # valid as well by recomputing it from the cached data
694 696 self.tipnode = repo.nullid
695 697 self.tiprev = nullrev
696 698 for heads in self.iterheads():
697 699 if not heads:
698 700 # all revisions on a branch are obsolete
699 701 continue
700 702 # note: tiprev is not necessarily the tip revision of repo,
701 703 # because the tip could be obsolete (i.e. not a head)
702 704 tiprev = max(cl.rev(node) for node in heads)
703 705 if tiprev > self.tiprev:
704 706 self.tipnode = cl.node(tiprev)
705 707 self.tiprev = tiprev
706 708 self.filteredhash = scmutil.filteredhash(
707 709 repo, self.tiprev, needobsolete=True
708 710 )
709 711 self._dirty = True
710 712 self.write(repo)
711 713
712 714
713 715 class remotebranchcache(_BaseBranchCache):
714 716 """Branchmap info for a remote connection, should not write locally"""
715 717
716 718 def __init__(
717 719 self,
718 720 repo: "localrepo.localrepository",
719 721 entries: Union[
720 722 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
721 723 ] = (),
722 724 closednodes: Optional[Set[bytes]] = None,
723 725 ) -> None:
724 726 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
725 727
726 728
727 729 # Revision branch info cache
728 730
729 731 _rbcversion = b'-v1'
730 732 _rbcnames = b'rbc-names' + _rbcversion
731 733 _rbcrevs = b'rbc-revs' + _rbcversion
732 734 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
733 735 _rbcrecfmt = b'>4sI'
734 736 _rbcrecsize = calcsize(_rbcrecfmt)
735 737 _rbcmininc = 64 * _rbcrecsize
736 738 _rbcnodelen = 4
737 739 _rbcbranchidxmask = 0x7FFFFFFF
738 740 _rbccloseflag = 0x80000000
739 741
740 742
741 743 class rbcrevs:
742 744 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
743 745
744 746 def __init__(self, revs):
745 747 self._prefix = revs
746 748 self._rest = bytearray()
747 749
748 750 def __len__(self):
749 751 return len(self._prefix) + len(self._rest)
750 752
751 753 def unpack_record(self, rbcrevidx):
752 754 if rbcrevidx < len(self._prefix):
753 755 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
754 756 else:
755 757 return unpack_from(
756 758 _rbcrecfmt,
757 759 util.buffer(self._rest),
758 760 rbcrevidx - len(self._prefix),
759 761 )
760 762
761 763 def make_mutable(self):
762 764 if len(self._prefix) > 0:
763 765 entirety = bytearray()
764 766 entirety[:] = self._prefix
765 767 entirety.extend(self._rest)
766 768 self._rest = entirety
767 769 self._prefix = bytearray()
768 770
769 771 def truncate(self, pos):
770 772 self.make_mutable()
771 773 del self._rest[pos:]
772 774
773 775 def pack_into(self, rbcrevidx, node, branchidx):
774 776 if rbcrevidx < len(self._prefix):
775 777 self.make_mutable()
776 778 buf = self._rest
777 779 start_offset = rbcrevidx - len(self._prefix)
778 780 end_offset = start_offset + _rbcrecsize
779 781
780 782 if len(self._rest) < end_offset:
781 783 # bytearray doesn't allocate extra space at least in Python 3.7.
782 784 # When multiple changesets are added in a row, precise resize would
783 785 # result in quadratic complexity. Overallocate to compensate by
784 786 # using the classic doubling technique for dynamic arrays instead.
785 787 # If there was a gap in the map before, less space will be reserved.
786 788 self._rest.extend(b'\0' * end_offset)
787 789 return pack_into(
788 790 _rbcrecfmt,
789 791 buf,
790 792 start_offset,
791 793 node,
792 794 branchidx,
793 795 )
794 796
795 797 def extend(self, extension):
796 798 return self._rest.extend(extension)
797 799
798 800 def slice(self, begin, end):
799 801 if begin < len(self._prefix):
800 802 acc = bytearray()
801 803 acc[:] = self._prefix[begin:end]
802 804 acc.extend(
803 805 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
804 806 )
805 807 return acc
806 808 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
807 809
808 810
809 811 class revbranchcache:
810 812 """Persistent cache, mapping from revision number to branch name and close.
811 813 This is a low level cache, independent of filtering.
812 814
813 815 Branch names are stored in rbc-names in internal encoding separated by 0.
814 816 rbc-names is append-only, and each branch name is only stored once and will
815 817 thus have a unique index.
816 818
817 819 The branch info for each revision is stored in rbc-revs as constant size
818 820 records. The whole file is read into memory, but it is only 'parsed' on
819 821 demand. The file is usually append-only but will be truncated if repo
820 822 modification is detected.
821 823 The record for each revision contains the first 4 bytes of the
822 824 corresponding node hash, and the record is only used if it still matches.
823 825 Even a completely trashed rbc-revs fill thus still give the right result
824 826 while converging towards full recovery ... assuming no incorrectly matching
825 827 node hashes.
826 828 The record also contains 4 bytes where 31 bits contains the index of the
827 829 branch and the last bit indicate that it is a branch close commit.
828 830 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
829 831 and will grow with it but be 1/8th of its size.
830 832 """
831 833
832 834 def __init__(self, repo, readonly=True):
833 835 assert repo.filtername is None
834 836 self._repo = repo
835 837 self._names = [] # branch names in local encoding with static index
836 838 self._rbcrevs = rbcrevs(bytearray())
837 839 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
838 840 try:
839 841 bndata = repo.cachevfs.read(_rbcnames)
840 842 self._rbcsnameslen = len(bndata) # for verification before writing
841 843 if bndata:
842 844 self._names = [
843 845 encoding.tolocal(bn) for bn in bndata.split(b'\0')
844 846 ]
845 847 except (IOError, OSError):
846 848 if readonly:
847 849 # don't try to use cache - fall back to the slow path
848 850 self.branchinfo = self._branchinfo
849 851
850 852 if self._names:
851 853 try:
852 854 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
853 855 with repo.cachevfs(_rbcrevs) as fp:
854 856 data = util.buffer(util.mmapread(fp))
855 857 else:
856 858 data = repo.cachevfs.read(_rbcrevs)
857 859 self._rbcrevs = rbcrevs(data)
858 860 except (IOError, OSError) as inst:
859 861 repo.ui.debug(
860 862 b"couldn't read revision branch cache: %s\n"
861 863 % stringutil.forcebytestr(inst)
862 864 )
863 865 # remember number of good records on disk
864 866 self._rbcrevslen = min(
865 867 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
866 868 )
867 869 if self._rbcrevslen == 0:
868 870 self._names = []
869 871 self._rbcnamescount = len(self._names) # number of names read at
870 872 # _rbcsnameslen
871 873
872 874 def _clear(self):
873 875 self._rbcsnameslen = 0
874 876 del self._names[:]
875 877 self._rbcnamescount = 0
876 878 self._rbcrevslen = len(self._repo.changelog)
877 879 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
878 880 util.clearcachedproperty(self, b'_namesreverse')
879 881
880 882 @util.propertycache
881 883 def _namesreverse(self):
882 884 return {b: r for r, b in enumerate(self._names)}
883 885
884 886 def branchinfo(self, rev):
885 887 """Return branch name and close flag for rev, using and updating
886 888 persistent cache."""
887 889 changelog = self._repo.changelog
888 890 rbcrevidx = rev * _rbcrecsize
889 891
890 892 # avoid negative index, changelog.read(nullrev) is fast without cache
891 893 if rev == nullrev:
892 894 return changelog.branchinfo(rev)
893 895
894 896 # if requested rev isn't allocated, grow and cache the rev info
895 897 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
896 898 return self._branchinfo(rev)
897 899
898 900 # fast path: extract data from cache, use it if node is matching
899 901 reponode = changelog.node(rev)[:_rbcnodelen]
900 902 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
901 903 close = bool(branchidx & _rbccloseflag)
902 904 if close:
903 905 branchidx &= _rbcbranchidxmask
904 906 if cachenode == b'\0\0\0\0':
905 907 pass
906 908 elif cachenode == reponode:
907 909 try:
908 910 return self._names[branchidx], close
909 911 except IndexError:
910 912 # recover from invalid reference to unknown branch
911 913 self._repo.ui.debug(
912 914 b"referenced branch names not found"
913 915 b" - rebuilding revision branch cache from scratch\n"
914 916 )
915 917 self._clear()
916 918 else:
917 919 # rev/node map has changed, invalidate the cache from here up
918 920 self._repo.ui.debug(
919 921 b"history modification detected - truncating "
920 922 b"revision branch cache to revision %d\n" % rev
921 923 )
922 924 truncate = rbcrevidx + _rbcrecsize
923 925 self._rbcrevs.truncate(truncate)
924 926 self._rbcrevslen = min(self._rbcrevslen, truncate)
925 927
926 928 # fall back to slow path and make sure it will be written to disk
927 929 return self._branchinfo(rev)
928 930
929 931 def _branchinfo(self, rev):
930 932 """Retrieve branch info from changelog and update _rbcrevs"""
931 933 changelog = self._repo.changelog
932 934 b, close = changelog.branchinfo(rev)
933 935 if b in self._namesreverse:
934 936 branchidx = self._namesreverse[b]
935 937 else:
936 938 branchidx = len(self._names)
937 939 self._names.append(b)
938 940 self._namesreverse[b] = branchidx
939 941 reponode = changelog.node(rev)
940 942 if close:
941 943 branchidx |= _rbccloseflag
942 944 self._setcachedata(rev, reponode, branchidx)
943 945 return b, close
944 946
945 947 def setdata(self, rev, changelogrevision):
946 948 """add new data information to the cache"""
947 949 branch, close = changelogrevision.branchinfo
948 950
949 951 if branch in self._namesreverse:
950 952 branchidx = self._namesreverse[branch]
951 953 else:
952 954 branchidx = len(self._names)
953 955 self._names.append(branch)
954 956 self._namesreverse[branch] = branchidx
955 957 if close:
956 958 branchidx |= _rbccloseflag
957 959 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
958 960 # If no cache data were readable (non exists, bad permission, etc)
959 961 # the cache was bypassing itself by setting:
960 962 #
961 963 # self.branchinfo = self._branchinfo
962 964 #
963 965 # Since we now have data in the cache, we need to drop this bypassing.
964 966 if 'branchinfo' in vars(self):
965 967 del self.branchinfo
966 968
967 969 def _setcachedata(self, rev, node, branchidx):
968 970 """Writes the node's branch data to the in-memory cache data."""
969 971 if rev == nullrev:
970 972 return
971 973 rbcrevidx = rev * _rbcrecsize
972 974 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
973 975 self._rbcrevslen = min(self._rbcrevslen, rev)
974 976
975 977 tr = self._repo.currenttransaction()
976 978 if tr:
977 979 tr.addfinalize(b'write-revbranchcache', self.write)
978 980
979 981 def write(self, tr=None):
980 982 """Save branch cache if it is dirty."""
981 983 repo = self._repo
982 984 wlock = None
983 985 step = b''
984 986 try:
985 987 # write the new names
986 988 if self._rbcnamescount < len(self._names):
987 989 wlock = repo.wlock(wait=False)
988 990 step = b' names'
989 991 self._writenames(repo)
990 992
991 993 # write the new revs
992 994 start = self._rbcrevslen * _rbcrecsize
993 995 if start != len(self._rbcrevs):
994 996 step = b''
995 997 if wlock is None:
996 998 wlock = repo.wlock(wait=False)
997 999 self._writerevs(repo, start)
998 1000
999 1001 except (IOError, OSError, error.Abort, error.LockError) as inst:
1000 1002 repo.ui.debug(
1001 1003 b"couldn't write revision branch cache%s: %s\n"
1002 1004 % (step, stringutil.forcebytestr(inst))
1003 1005 )
1004 1006 finally:
1005 1007 if wlock is not None:
1006 1008 wlock.release()
1007 1009
1008 1010 def _writenames(self, repo):
1009 1011 """write the new branch names to revbranchcache"""
1010 1012 if self._rbcnamescount != 0:
1011 1013 f = repo.cachevfs.open(_rbcnames, b'ab')
1012 1014 if f.tell() == self._rbcsnameslen:
1013 1015 f.write(b'\0')
1014 1016 else:
1015 1017 f.close()
1016 1018 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1017 1019 self._rbcnamescount = 0
1018 1020 self._rbcrevslen = 0
1019 1021 if self._rbcnamescount == 0:
1020 1022 # before rewriting names, make sure references are removed
1021 1023 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1022 1024 f = repo.cachevfs.open(_rbcnames, b'wb')
1023 1025 f.write(
1024 1026 b'\0'.join(
1025 1027 encoding.fromlocal(b)
1026 1028 for b in self._names[self._rbcnamescount :]
1027 1029 )
1028 1030 )
1029 1031 self._rbcsnameslen = f.tell()
1030 1032 f.close()
1031 1033 self._rbcnamescount = len(self._names)
1032 1034
1033 1035 def _writerevs(self, repo, start):
1034 1036 """write the new revs to revbranchcache"""
1035 1037 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1036 1038 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1037 1039 if f.tell() != start:
1038 1040 repo.ui.debug(
1039 1041 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1040 1042 )
1041 1043 f.seek(start)
1042 1044 if f.tell() != start:
1043 1045 start = 0
1044 1046 f.seek(start)
1045 1047 f.truncate()
1046 1048 end = revs * _rbcrecsize
1047 1049 f.write(self._rbcrevs.slice(start, end))
1048 1050 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now