##// END OF EJS Templates
branchcache: avoid created a `None` filter repoview when writing...
marmoute -
r52361:4188a057 default
parent child Browse files
Show More
@@ -1,1042 +1,1045 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Any,
19 19 Callable,
20 20 Dict,
21 21 Iterable,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 TYPE_CHECKING,
26 26 Tuple,
27 27 Union,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50 calcsize = struct.calcsize
51 51 pack_into = struct.pack_into
52 52 unpack_from = struct.unpack_from
53 53
54 54
55 55 class BranchMapCache:
56 56 """mapping of filtered views of repo with their branchcache"""
57 57
58 58 def __init__(self):
59 59 self._per_filter = {}
60 60
61 61 def __getitem__(self, repo):
62 62 self.updatecache(repo)
63 63 bcache = self._per_filter[repo.filtername]
64 64 assert bcache._filtername == repo.filtername, (
65 65 bcache._filtername,
66 66 repo.filtername,
67 67 )
68 68 return bcache
69 69
70 70 def update_disk(self, repo):
71 71 """ensure and up-to-date cache is (or will be) written on disk
72 72
73 73 The cache for this repository view is updated if needed and written on
74 74 disk.
75 75
76 76 If a transaction is in progress, the writing is schedule to transaction
77 77 close. See the `BranchMapCache.write_delayed` method.
78 78
79 79 This method exist independently of __getitem__ as it is sometime useful
80 80 to signal that we have no intend to use the data in memory yet.
81 81 """
82 82 self.updatecache(repo)
83 83 bcache = self._per_filter[repo.filtername]
84 84 assert bcache._filtername == repo.filtername, (
85 85 bcache._filtername,
86 86 repo.filtername,
87 87 )
88 88 bcache.write(repo)
89 89
90 90 def updatecache(self, repo):
91 91 """Update the cache for the given filtered view on a repository"""
92 92 # This can trigger updates for the caches for subsets of the filtered
93 93 # view, e.g. when there is no cache for this filtered view or the cache
94 94 # is stale.
95 95
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 bcache = self._per_filter.get(filtername)
99 99 if bcache is None or not bcache.validfor(repo):
100 100 # cache object missing or cache object stale? Read from disk
101 101 bcache = branchcache.fromfile(repo)
102 102
103 103 revs = []
104 104 if bcache is None:
105 105 # no (fresh) cache available anymore, perhaps we can re-use
106 106 # the cache for a subset, then extend that to add info on missing
107 107 # revisions.
108 108 subsetname = subsettable.get(filtername)
109 109 if subsetname is not None:
110 110 subset = repo.filtered(subsetname)
111 111 bcache = self[subset].copy(repo)
112 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 114 else:
115 115 # nothing to fall back on, start empty.
116 116 bcache = branchcache(repo)
117 117
118 118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 119 if revs:
120 120 bcache.update(repo, revs)
121 121
122 122 assert bcache.validfor(repo), filtername
123 123 self._per_filter[repo.filtername] = bcache
124 124
125 125 def replace(self, repo, remotebranchmap):
126 126 """Replace the branchmap cache for a repo with a branch mapping.
127 127
128 128 This is likely only called during clone with a branch map from a
129 129 remote.
130 130
131 131 """
132 132 cl = repo.changelog
133 133 clrev = cl.rev
134 134 clbranchinfo = cl.branchinfo
135 135 rbheads = []
136 136 closed = set()
137 137 for bheads in remotebranchmap.values():
138 138 rbheads += bheads
139 139 for h in bheads:
140 140 r = clrev(h)
141 141 b, c = clbranchinfo(r)
142 142 if c:
143 143 closed.add(h)
144 144
145 145 if rbheads:
146 146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 147 cache = branchcache(
148 148 repo,
149 149 remotebranchmap,
150 150 repo[rtiprev].node(),
151 151 rtiprev,
152 152 closednodes=closed,
153 153 )
154 154
155 155 # Try to stick it as low as possible
156 156 # filter above served are unlikely to be fetch from a clone
157 157 for candidate in (b'base', b'immutable', b'served'):
158 158 rview = repo.filtered(candidate)
159 159 if cache.validfor(rview):
160 160 cache = self._per_filter[candidate] = cache.copy(rview)
161 161 cache.write(rview)
162 162 return
163 163
164 164 def clear(self):
165 165 self._per_filter.clear()
166 166
167 167 def write_delayed(self, repo):
168 168 unfi = repo.unfiltered()
169 169 for filtername, cache in self._per_filter.items():
170 170 if cache._delayed:
171 repo = unfi.filtered(filtername)
171 if filtername is None:
172 repo = unfi
173 else:
174 repo = unfi.filtered(filtername)
172 175 cache.write(repo)
173 176
174 177
175 178 def _unknownnode(node):
176 179 """raises ValueError when branchcache found a node which does not exists"""
177 180 raise ValueError('node %s does not exist' % node.hex())
178 181
179 182
180 183 def _branchcachedesc(repo):
181 184 if repo.filtername is not None:
182 185 return b'branch cache (%s)' % repo.filtername
183 186 else:
184 187 return b'branch cache'
185 188
186 189
187 190 class _BaseBranchCache:
188 191 """A dict like object that hold branches heads cache.
189 192
190 193 This cache is used to avoid costly computations to determine all the
191 194 branch heads of a repo.
192 195
193 196 The cache is serialized on disk in the following format:
194 197
195 198 <tip hex node> <tip rev number> [optional filtered repo hex hash]
196 199 <branch head hex node> <open/closed state> <branch name>
197 200 <branch head hex node> <open/closed state> <branch name>
198 201 ...
199 202
200 203 The first line is used to check if the cache is still valid. If the
201 204 branch cache is for a filtered repo view, an optional third hash is
202 205 included that hashes the hashes of all filtered and obsolete revisions.
203 206
204 207 The open/closed state is represented by a single letter 'o' or 'c'.
205 208 This field can be used to avoid changelog reads when determining if a
206 209 branch head closes a branch or not.
207 210 """
208 211
209 212 def __init__(
210 213 self,
211 214 repo: "localrepo.localrepository",
212 215 entries: Union[
213 216 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
214 217 ] = (),
215 218 closed_nodes: Optional[Set[bytes]] = None,
216 219 ) -> None:
217 220 """hasnode is a function which can be used to verify whether changelog
218 221 has a given node or not. If it's not provided, we assume that every node
219 222 we have exists in changelog"""
220 223 # closednodes is a set of nodes that close their branch. If the branch
221 224 # cache has been updated, it may contain nodes that are no longer
222 225 # heads.
223 226 if closed_nodes is None:
224 227 closed_nodes = set()
225 228 self._closednodes = set(closed_nodes)
226 229 self._entries = dict(entries)
227 230
228 231 def __iter__(self):
229 232 return iter(self._entries)
230 233
231 234 def __setitem__(self, key, value):
232 235 self._entries[key] = value
233 236
234 237 def __getitem__(self, key):
235 238 return self._entries[key]
236 239
237 240 def __contains__(self, key):
238 241 return key in self._entries
239 242
240 243 def iteritems(self):
241 244 return self._entries.items()
242 245
243 246 items = iteritems
244 247
245 248 def hasbranch(self, label):
246 249 """checks whether a branch of this name exists or not"""
247 250 return label in self._entries
248 251
249 252 def _branchtip(self, heads):
250 253 """Return tuple with last open head in heads and false,
251 254 otherwise return last closed head and true."""
252 255 tip = heads[-1]
253 256 closed = True
254 257 for h in reversed(heads):
255 258 if h not in self._closednodes:
256 259 tip = h
257 260 closed = False
258 261 break
259 262 return tip, closed
260 263
261 264 def branchtip(self, branch):
262 265 """Return the tipmost open head on branch head, otherwise return the
263 266 tipmost closed head on branch.
264 267 Raise KeyError for unknown branch."""
265 268 return self._branchtip(self[branch])[0]
266 269
267 270 def iteropen(self, nodes):
268 271 return (n for n in nodes if n not in self._closednodes)
269 272
270 273 def branchheads(self, branch, closed=False):
271 274 heads = self._entries[branch]
272 275 if not closed:
273 276 heads = list(self.iteropen(heads))
274 277 return heads
275 278
276 279 def iterbranches(self):
277 280 for bn, heads in self.items():
278 281 yield (bn, heads) + self._branchtip(heads)
279 282
280 283 def iterheads(self):
281 284 """returns all the heads"""
282 285 return self._entries.values()
283 286
284 287 def update(self, repo, revgen):
285 288 """Given a branchhead cache, self, that may have extra nodes or be
286 289 missing heads, and a generator of nodes that are strictly a superset of
287 290 heads missing, this function updates self to be correct.
288 291 """
289 292 starttime = util.timer()
290 293 cl = repo.changelog
291 294 # collect new branch entries
292 295 newbranches = {}
293 296 getbranchinfo = repo.revbranchcache().branchinfo
294 297 max_rev = -1
295 298 for r in revgen:
296 299 branch, closesbranch = getbranchinfo(r)
297 300 newbranches.setdefault(branch, []).append(r)
298 301 if closesbranch:
299 302 self._closednodes.add(cl.node(r))
300 303 max_rev = max(max_rev, r)
301 304 if max_rev < 0:
302 305 max_rev = None
303 306
304 307 # Delay fetching the topological heads until they are needed.
305 308 # A repository without non-continous branches can skip this part.
306 309 topoheads = None
307 310
308 311 # If a changeset is visible, its parents must be visible too, so
309 312 # use the faster unfiltered parent accessor.
310 313 parentrevs = repo.unfiltered().changelog.parentrevs
311 314
312 315 # Faster than using ctx.obsolete()
313 316 obsrevs = obsolete.getrevs(repo, b'obsolete')
314 317
315 318 for branch, newheadrevs in newbranches.items():
316 319 # For every branch, compute the new branchheads.
317 320 # A branchhead is a revision such that no descendant is on
318 321 # the same branch.
319 322 #
320 323 # The branchheads are computed iteratively in revision order.
321 324 # This ensures topological order, i.e. parents are processed
322 325 # before their children. Ancestors are inclusive here, i.e.
323 326 # any revision is an ancestor of itself.
324 327 #
325 328 # Core observations:
326 329 # - The current revision is always a branchhead for the
327 330 # repository up to that point.
328 331 # - It is the first revision of the branch if and only if
329 332 # there was no branchhead before. In that case, it is the
330 333 # only branchhead as there are no possible ancestors on
331 334 # the same branch.
332 335 # - If a parent is on the same branch, a branchhead can
333 336 # only be an ancestor of that parent, if it is parent
334 337 # itself. Otherwise it would have been removed as ancestor
335 338 # of that parent before.
336 339 # - Therefore, if all parents are on the same branch, they
337 340 # can just be removed from the branchhead set.
338 341 # - If one parent is on the same branch and the other is not
339 342 # and there was exactly one branchhead known, the existing
340 343 # branchhead can only be an ancestor if it is the parent.
341 344 # Otherwise it would have been removed as ancestor of
342 345 # the parent before. The other parent therefore can't have
343 346 # a branchhead as ancestor.
344 347 # - In all other cases, the parents on different branches
345 348 # could have a branchhead as ancestor. Those parents are
346 349 # kept in the "uncertain" set. If all branchheads are also
347 350 # topological heads, they can't have descendants and further
348 351 # checks can be skipped. Otherwise, the ancestors of the
349 352 # "uncertain" set are removed from branchheads.
350 353 # This computation is heavy and avoided if at all possible.
351 354 bheads = self._entries.get(branch, [])
352 355 bheadset = {cl.rev(node) for node in bheads}
353 356 uncertain = set()
354 357 for newrev in sorted(newheadrevs):
355 358 if newrev in obsrevs:
356 359 # We ignore obsolete changesets as they shouldn't be
357 360 # considered heads.
358 361 continue
359 362
360 363 if not bheadset:
361 364 bheadset.add(newrev)
362 365 continue
363 366
364 367 parents = [p for p in parentrevs(newrev) if p != nullrev]
365 368 samebranch = set()
366 369 otherbranch = set()
367 370 obsparents = set()
368 371 for p in parents:
369 372 if p in obsrevs:
370 373 # We ignored this obsolete changeset earlier, but now
371 374 # that it has non-ignored children, we need to make
372 375 # sure their ancestors are not considered heads. To
373 376 # achieve that, we will simply treat this obsolete
374 377 # changeset as a parent from other branch.
375 378 obsparents.add(p)
376 379 elif p in bheadset or getbranchinfo(p)[0] == branch:
377 380 samebranch.add(p)
378 381 else:
379 382 otherbranch.add(p)
380 383 if not (len(bheadset) == len(samebranch) == 1):
381 384 uncertain.update(otherbranch)
382 385 uncertain.update(obsparents)
383 386 bheadset.difference_update(samebranch)
384 387 bheadset.add(newrev)
385 388
386 389 if uncertain:
387 390 if topoheads is None:
388 391 topoheads = set(cl.headrevs())
389 392 if bheadset - topoheads:
390 393 floorrev = min(bheadset)
391 394 if floorrev <= max(uncertain):
392 395 ancestors = set(cl.ancestors(uncertain, floorrev))
393 396 bheadset -= ancestors
394 397 if bheadset:
395 398 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
396 399
397 400 duration = util.timer() - starttime
398 401 repo.ui.log(
399 402 b'branchcache',
400 403 b'updated %s in %.4f seconds\n',
401 404 _branchcachedesc(repo),
402 405 duration,
403 406 )
404 407 return max_rev
405 408
406 409
407 410 class branchcache(_BaseBranchCache):
408 411 """Branchmap info for a local repo or repoview"""
409 412
410 413 _base_filename = b"branch2"
411 414
412 415 def __init__(
413 416 self,
414 417 repo: "localrepo.localrepository",
415 418 entries: Union[
416 419 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
417 420 ] = (),
418 421 tipnode: Optional[bytes] = None,
419 422 tiprev: Optional[int] = nullrev,
420 423 filteredhash: Optional[bytes] = None,
421 424 closednodes: Optional[Set[bytes]] = None,
422 425 hasnode: Optional[Callable[[bytes], bool]] = None,
423 426 verify_node: bool = False,
424 427 ) -> None:
425 428 """hasnode is a function which can be used to verify whether changelog
426 429 has a given node or not. If it's not provided, we assume that every node
427 430 we have exists in changelog"""
428 431 self._filtername = repo.filtername
429 432 self._delayed = False
430 433 if tipnode is None:
431 434 self.tipnode = repo.nullid
432 435 else:
433 436 self.tipnode = tipnode
434 437 self.tiprev = tiprev
435 438 self.filteredhash = filteredhash
436 439
437 440 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
438 441 # closednodes is a set of nodes that close their branch. If the branch
439 442 # cache has been updated, it may contain nodes that are no longer
440 443 # heads.
441 444
442 445 # Do we need to verify branch at all ?
443 446 self._verify_node = verify_node
444 447 # branches for which nodes are verified
445 448 self._verifiedbranches = set()
446 449 self._hasnode = None
447 450 if self._verify_node:
448 451 self._hasnode = repo.changelog.hasnode
449 452
450 453 def validfor(self, repo):
451 454 """check that cache contents are valid for (a subset of) this repo
452 455
453 456 - False when the order of changesets changed or if we detect a strip.
454 457 - True when cache is up-to-date for the current repo or its subset."""
455 458 try:
456 459 node = repo.changelog.node(self.tiprev)
457 460 except IndexError:
458 461 # changesets were stripped and now we don't even have enough to
459 462 # find tiprev
460 463 return False
461 464 if self.tipnode != node:
462 465 # tiprev doesn't correspond to tipnode: repo was stripped, or this
463 466 # repo has a different order of changesets
464 467 return False
465 468 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
466 469 # hashes don't match if this repo view has a different set of filtered
467 470 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
468 471 # history was rewritten)
469 472 return self.filteredhash == tiphash
470 473
471 474 @classmethod
472 475 def fromfile(cls, repo):
473 476 f = None
474 477 try:
475 478 f = repo.cachevfs(cls._filename(repo))
476 479 lineiter = iter(f)
477 480 init_kwargs = cls._load_header(repo, lineiter)
478 481 bcache = cls(
479 482 repo,
480 483 verify_node=True,
481 484 **init_kwargs,
482 485 )
483 486 if not bcache.validfor(repo):
484 487 # invalidate the cache
485 488 raise ValueError('tip differs')
486 489 bcache._load_heads(repo, lineiter)
487 490 except (IOError, OSError):
488 491 return None
489 492
490 493 except Exception as inst:
491 494 if repo.ui.debugflag:
492 495 msg = b'invalid %s: %s\n'
493 496 msg %= (
494 497 _branchcachedesc(repo),
495 498 stringutil.forcebytestr(inst),
496 499 )
497 500 repo.ui.debug(msg)
498 501 bcache = None
499 502
500 503 finally:
501 504 if f:
502 505 f.close()
503 506
504 507 return bcache
505 508
506 509 @classmethod
507 510 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 511 """parse the head of a branchmap file
509 512
510 513 return parameters to pass to a newly created class instance.
511 514 """
512 515 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 516 last, lrev = cachekey[:2]
514 517 last, lrev = bin(last), int(lrev)
515 518 filteredhash = None
516 519 if len(cachekey) > 2:
517 520 filteredhash = bin(cachekey[2])
518 521 return {
519 522 "tipnode": last,
520 523 "tiprev": lrev,
521 524 "filteredhash": filteredhash,
522 525 }
523 526
524 527 def _load_heads(self, repo, lineiter):
525 528 """fully loads the branchcache by reading from the file using the line
526 529 iterator passed"""
527 530 for line in lineiter:
528 531 line = line.rstrip(b'\n')
529 532 if not line:
530 533 continue
531 534 node, state, label = line.split(b" ", 2)
532 535 if state not in b'oc':
533 536 raise ValueError('invalid branch state')
534 537 label = encoding.tolocal(label.strip())
535 538 node = bin(node)
536 539 self._entries.setdefault(label, []).append(node)
537 540 if state == b'c':
538 541 self._closednodes.add(node)
539 542
540 543 @classmethod
541 544 def _filename(cls, repo):
542 545 """name of a branchcache file for a given repo or repoview"""
543 546 filename = cls._base_filename
544 547 if repo.filtername:
545 548 filename = b'%s-%s' % (filename, repo.filtername)
546 549 return filename
547 550
548 551 def copy(self, repo):
549 552 """return a deep copy of the branchcache object"""
550 553 other = type(self)(
551 554 repo=repo,
552 555 # we always do a shally copy of self._entries, and the values is
553 556 # always replaced, so no need to deepcopy until the above remains
554 557 # true.
555 558 entries=self._entries,
556 559 tipnode=self.tipnode,
557 560 tiprev=self.tiprev,
558 561 filteredhash=self.filteredhash,
559 562 closednodes=set(self._closednodes),
560 563 verify_node=self._verify_node,
561 564 )
562 565 # we copy will likely schedule a write anyway, but that does not seems
563 566 # to hurt to overschedule
564 567 other._delayed = self._delayed
565 568 # also copy information about the current verification state
566 569 other._verifiedbranches = set(self._verifiedbranches)
567 570 return other
568 571
569 572 def write(self, repo):
570 573 assert self._filtername == repo.filtername, (
571 574 self._filtername,
572 575 repo.filtername,
573 576 )
574 577 tr = repo.currenttransaction()
575 578 if not getattr(tr, 'finalized', True):
576 579 # Avoid premature writing.
577 580 #
578 581 # (The cache warming setup by localrepo will update the file later.)
579 582 self._delayed = True
580 583 return
581 584 try:
582 585 filename = self._filename(repo)
583 586 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
584 587 self._write_header(f)
585 588 nodecount = self._write_heads(f)
586 589 repo.ui.log(
587 590 b'branchcache',
588 591 b'wrote %s with %d labels and %d nodes\n',
589 592 _branchcachedesc(repo),
590 593 len(self._entries),
591 594 nodecount,
592 595 )
593 596 self._delayed = False
594 597 except (IOError, OSError, error.Abort) as inst:
595 598 # Abort may be raised by read only opener, so log and continue
596 599 repo.ui.debug(
597 600 b"couldn't write branch cache: %s\n"
598 601 % stringutil.forcebytestr(inst)
599 602 )
600 603
601 604 def _write_header(self, fp) -> None:
602 605 """write the branch cache header to a file"""
603 606 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
604 607 if self.filteredhash is not None:
605 608 cachekey.append(hex(self.filteredhash))
606 609 fp.write(b" ".join(cachekey) + b'\n')
607 610
608 611 def _write_heads(self, fp) -> int:
609 612 """write list of heads to a file
610 613
611 614 Return the number of heads written."""
612 615 nodecount = 0
613 616 for label, nodes in sorted(self._entries.items()):
614 617 label = encoding.fromlocal(label)
615 618 for node in nodes:
616 619 nodecount += 1
617 620 if node in self._closednodes:
618 621 state = b'c'
619 622 else:
620 623 state = b'o'
621 624 fp.write(b"%s %s %s\n" % (hex(node), state, label))
622 625 return nodecount
623 626
624 627 def _verifybranch(self, branch):
625 628 """verify head nodes for the given branch."""
626 629 if not self._verify_node:
627 630 return
628 631 if branch not in self._entries or branch in self._verifiedbranches:
629 632 return
630 633 assert self._hasnode is not None
631 634 for n in self._entries[branch]:
632 635 if not self._hasnode(n):
633 636 _unknownnode(n)
634 637
635 638 self._verifiedbranches.add(branch)
636 639
637 640 def _verifyall(self):
638 641 """verifies nodes of all the branches"""
639 642 for b in self._entries.keys():
640 643 if b not in self._verifiedbranches:
641 644 self._verifybranch(b)
642 645
643 646 def __getitem__(self, key):
644 647 self._verifybranch(key)
645 648 return super().__getitem__(key)
646 649
647 650 def __contains__(self, key):
648 651 self._verifybranch(key)
649 652 return super().__contains__(key)
650 653
651 654 def iteritems(self):
652 655 self._verifyall()
653 656 return super().iteritems()
654 657
655 658 items = iteritems
656 659
657 660 def iterheads(self):
658 661 """returns all the heads"""
659 662 self._verifyall()
660 663 return super().iterheads()
661 664
662 665 def hasbranch(self, label):
663 666 """checks whether a branch of this name exists or not"""
664 667 self._verifybranch(label)
665 668 return super().hasbranch(label)
666 669
667 670 def branchheads(self, branch, closed=False):
668 671 self._verifybranch(branch)
669 672 return super().branchheads(branch, closed=closed)
670 673
671 674 def update(self, repo, revgen):
672 675 assert self._filtername == repo.filtername, (
673 676 self._filtername,
674 677 repo.filtername,
675 678 )
676 679 cl = repo.changelog
677 680 max_rev = super().update(repo, revgen)
678 681 # new tip revision which we found after iterating items from new
679 682 # branches
680 683 if max_rev is not None and max_rev > self.tiprev:
681 684 self.tiprev = max_rev
682 685 self.tipnode = cl.node(max_rev)
683 686
684 687 if not self.validfor(repo):
685 688 # old cache key is now invalid for the repo, but we've just updated
686 689 # the cache and we assume it's valid, so let's make the cache key
687 690 # valid as well by recomputing it from the cached data
688 691 self.tipnode = repo.nullid
689 692 self.tiprev = nullrev
690 693 for heads in self.iterheads():
691 694 if not heads:
692 695 # all revisions on a branch are obsolete
693 696 continue
694 697 # note: tiprev is not necessarily the tip revision of repo,
695 698 # because the tip could be obsolete (i.e. not a head)
696 699 tiprev = max(cl.rev(node) for node in heads)
697 700 if tiprev > self.tiprev:
698 701 self.tipnode = cl.node(tiprev)
699 702 self.tiprev = tiprev
700 703 self.filteredhash = scmutil.filteredhash(
701 704 repo, self.tiprev, needobsolete=True
702 705 )
703 706
704 707 self.write(repo)
705 708
706 709
707 710 class remotebranchcache(_BaseBranchCache):
708 711 """Branchmap info for a remote connection, should not write locally"""
709 712
710 713 def __init__(
711 714 self,
712 715 repo: "localrepo.localrepository",
713 716 entries: Union[
714 717 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
715 718 ] = (),
716 719 closednodes: Optional[Set[bytes]] = None,
717 720 ) -> None:
718 721 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
719 722
720 723
721 724 # Revision branch info cache
722 725
723 726 _rbcversion = b'-v1'
724 727 _rbcnames = b'rbc-names' + _rbcversion
725 728 _rbcrevs = b'rbc-revs' + _rbcversion
726 729 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
727 730 _rbcrecfmt = b'>4sI'
728 731 _rbcrecsize = calcsize(_rbcrecfmt)
729 732 _rbcmininc = 64 * _rbcrecsize
730 733 _rbcnodelen = 4
731 734 _rbcbranchidxmask = 0x7FFFFFFF
732 735 _rbccloseflag = 0x80000000
733 736
734 737
735 738 class rbcrevs:
736 739 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
737 740
738 741 def __init__(self, revs):
739 742 self._prefix = revs
740 743 self._rest = bytearray()
741 744
742 745 def __len__(self):
743 746 return len(self._prefix) + len(self._rest)
744 747
745 748 def unpack_record(self, rbcrevidx):
746 749 if rbcrevidx < len(self._prefix):
747 750 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
748 751 else:
749 752 return unpack_from(
750 753 _rbcrecfmt,
751 754 util.buffer(self._rest),
752 755 rbcrevidx - len(self._prefix),
753 756 )
754 757
755 758 def make_mutable(self):
756 759 if len(self._prefix) > 0:
757 760 entirety = bytearray()
758 761 entirety[:] = self._prefix
759 762 entirety.extend(self._rest)
760 763 self._rest = entirety
761 764 self._prefix = bytearray()
762 765
763 766 def truncate(self, pos):
764 767 self.make_mutable()
765 768 del self._rest[pos:]
766 769
767 770 def pack_into(self, rbcrevidx, node, branchidx):
768 771 if rbcrevidx < len(self._prefix):
769 772 self.make_mutable()
770 773 buf = self._rest
771 774 start_offset = rbcrevidx - len(self._prefix)
772 775 end_offset = start_offset + _rbcrecsize
773 776
774 777 if len(self._rest) < end_offset:
775 778 # bytearray doesn't allocate extra space at least in Python 3.7.
776 779 # When multiple changesets are added in a row, precise resize would
777 780 # result in quadratic complexity. Overallocate to compensate by
778 781 # using the classic doubling technique for dynamic arrays instead.
779 782 # If there was a gap in the map before, less space will be reserved.
780 783 self._rest.extend(b'\0' * end_offset)
781 784 return pack_into(
782 785 _rbcrecfmt,
783 786 buf,
784 787 start_offset,
785 788 node,
786 789 branchidx,
787 790 )
788 791
789 792 def extend(self, extension):
790 793 return self._rest.extend(extension)
791 794
792 795 def slice(self, begin, end):
793 796 if begin < len(self._prefix):
794 797 acc = bytearray()
795 798 acc[:] = self._prefix[begin:end]
796 799 acc.extend(
797 800 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
798 801 )
799 802 return acc
800 803 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
801 804
802 805
803 806 class revbranchcache:
804 807 """Persistent cache, mapping from revision number to branch name and close.
805 808 This is a low level cache, independent of filtering.
806 809
807 810 Branch names are stored in rbc-names in internal encoding separated by 0.
808 811 rbc-names is append-only, and each branch name is only stored once and will
809 812 thus have a unique index.
810 813
811 814 The branch info for each revision is stored in rbc-revs as constant size
812 815 records. The whole file is read into memory, but it is only 'parsed' on
813 816 demand. The file is usually append-only but will be truncated if repo
814 817 modification is detected.
815 818 The record for each revision contains the first 4 bytes of the
816 819 corresponding node hash, and the record is only used if it still matches.
817 820 Even a completely trashed rbc-revs fill thus still give the right result
818 821 while converging towards full recovery ... assuming no incorrectly matching
819 822 node hashes.
820 823 The record also contains 4 bytes where 31 bits contains the index of the
821 824 branch and the last bit indicate that it is a branch close commit.
822 825 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
823 826 and will grow with it but be 1/8th of its size.
824 827 """
825 828
826 829 def __init__(self, repo, readonly=True):
827 830 assert repo.filtername is None
828 831 self._repo = repo
829 832 self._names = [] # branch names in local encoding with static index
830 833 self._rbcrevs = rbcrevs(bytearray())
831 834 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
832 835 try:
833 836 bndata = repo.cachevfs.read(_rbcnames)
834 837 self._rbcsnameslen = len(bndata) # for verification before writing
835 838 if bndata:
836 839 self._names = [
837 840 encoding.tolocal(bn) for bn in bndata.split(b'\0')
838 841 ]
839 842 except (IOError, OSError):
840 843 if readonly:
841 844 # don't try to use cache - fall back to the slow path
842 845 self.branchinfo = self._branchinfo
843 846
844 847 if self._names:
845 848 try:
846 849 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
847 850 with repo.cachevfs(_rbcrevs) as fp:
848 851 data = util.buffer(util.mmapread(fp))
849 852 else:
850 853 data = repo.cachevfs.read(_rbcrevs)
851 854 self._rbcrevs = rbcrevs(data)
852 855 except (IOError, OSError) as inst:
853 856 repo.ui.debug(
854 857 b"couldn't read revision branch cache: %s\n"
855 858 % stringutil.forcebytestr(inst)
856 859 )
857 860 # remember number of good records on disk
858 861 self._rbcrevslen = min(
859 862 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
860 863 )
861 864 if self._rbcrevslen == 0:
862 865 self._names = []
863 866 self._rbcnamescount = len(self._names) # number of names read at
864 867 # _rbcsnameslen
865 868
866 869 def _clear(self):
867 870 self._rbcsnameslen = 0
868 871 del self._names[:]
869 872 self._rbcnamescount = 0
870 873 self._rbcrevslen = len(self._repo.changelog)
871 874 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
872 875 util.clearcachedproperty(self, b'_namesreverse')
873 876
874 877 @util.propertycache
875 878 def _namesreverse(self):
876 879 return {b: r for r, b in enumerate(self._names)}
877 880
878 881 def branchinfo(self, rev):
879 882 """Return branch name and close flag for rev, using and updating
880 883 persistent cache."""
881 884 changelog = self._repo.changelog
882 885 rbcrevidx = rev * _rbcrecsize
883 886
884 887 # avoid negative index, changelog.read(nullrev) is fast without cache
885 888 if rev == nullrev:
886 889 return changelog.branchinfo(rev)
887 890
888 891 # if requested rev isn't allocated, grow and cache the rev info
889 892 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
890 893 return self._branchinfo(rev)
891 894
892 895 # fast path: extract data from cache, use it if node is matching
893 896 reponode = changelog.node(rev)[:_rbcnodelen]
894 897 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
895 898 close = bool(branchidx & _rbccloseflag)
896 899 if close:
897 900 branchidx &= _rbcbranchidxmask
898 901 if cachenode == b'\0\0\0\0':
899 902 pass
900 903 elif cachenode == reponode:
901 904 try:
902 905 return self._names[branchidx], close
903 906 except IndexError:
904 907 # recover from invalid reference to unknown branch
905 908 self._repo.ui.debug(
906 909 b"referenced branch names not found"
907 910 b" - rebuilding revision branch cache from scratch\n"
908 911 )
909 912 self._clear()
910 913 else:
911 914 # rev/node map has changed, invalidate the cache from here up
912 915 self._repo.ui.debug(
913 916 b"history modification detected - truncating "
914 917 b"revision branch cache to revision %d\n" % rev
915 918 )
916 919 truncate = rbcrevidx + _rbcrecsize
917 920 self._rbcrevs.truncate(truncate)
918 921 self._rbcrevslen = min(self._rbcrevslen, truncate)
919 922
920 923 # fall back to slow path and make sure it will be written to disk
921 924 return self._branchinfo(rev)
922 925
923 926 def _branchinfo(self, rev):
924 927 """Retrieve branch info from changelog and update _rbcrevs"""
925 928 changelog = self._repo.changelog
926 929 b, close = changelog.branchinfo(rev)
927 930 if b in self._namesreverse:
928 931 branchidx = self._namesreverse[b]
929 932 else:
930 933 branchidx = len(self._names)
931 934 self._names.append(b)
932 935 self._namesreverse[b] = branchidx
933 936 reponode = changelog.node(rev)
934 937 if close:
935 938 branchidx |= _rbccloseflag
936 939 self._setcachedata(rev, reponode, branchidx)
937 940 return b, close
938 941
939 942 def setdata(self, rev, changelogrevision):
940 943 """add new data information to the cache"""
941 944 branch, close = changelogrevision.branchinfo
942 945
943 946 if branch in self._namesreverse:
944 947 branchidx = self._namesreverse[branch]
945 948 else:
946 949 branchidx = len(self._names)
947 950 self._names.append(branch)
948 951 self._namesreverse[branch] = branchidx
949 952 if close:
950 953 branchidx |= _rbccloseflag
951 954 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
952 955 # If no cache data were readable (non exists, bad permission, etc)
953 956 # the cache was bypassing itself by setting:
954 957 #
955 958 # self.branchinfo = self._branchinfo
956 959 #
957 960 # Since we now have data in the cache, we need to drop this bypassing.
958 961 if 'branchinfo' in vars(self):
959 962 del self.branchinfo
960 963
961 964 def _setcachedata(self, rev, node, branchidx):
962 965 """Writes the node's branch data to the in-memory cache data."""
963 966 if rev == nullrev:
964 967 return
965 968 rbcrevidx = rev * _rbcrecsize
966 969 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
967 970 self._rbcrevslen = min(self._rbcrevslen, rev)
968 971
969 972 tr = self._repo.currenttransaction()
970 973 if tr:
971 974 tr.addfinalize(b'write-revbranchcache', self.write)
972 975
973 976 def write(self, tr=None):
974 977 """Save branch cache if it is dirty."""
975 978 repo = self._repo
976 979 wlock = None
977 980 step = b''
978 981 try:
979 982 # write the new names
980 983 if self._rbcnamescount < len(self._names):
981 984 wlock = repo.wlock(wait=False)
982 985 step = b' names'
983 986 self._writenames(repo)
984 987
985 988 # write the new revs
986 989 start = self._rbcrevslen * _rbcrecsize
987 990 if start != len(self._rbcrevs):
988 991 step = b''
989 992 if wlock is None:
990 993 wlock = repo.wlock(wait=False)
991 994 self._writerevs(repo, start)
992 995
993 996 except (IOError, OSError, error.Abort, error.LockError) as inst:
994 997 repo.ui.debug(
995 998 b"couldn't write revision branch cache%s: %s\n"
996 999 % (step, stringutil.forcebytestr(inst))
997 1000 )
998 1001 finally:
999 1002 if wlock is not None:
1000 1003 wlock.release()
1001 1004
1002 1005 def _writenames(self, repo):
1003 1006 """write the new branch names to revbranchcache"""
1004 1007 if self._rbcnamescount != 0:
1005 1008 f = repo.cachevfs.open(_rbcnames, b'ab')
1006 1009 if f.tell() == self._rbcsnameslen:
1007 1010 f.write(b'\0')
1008 1011 else:
1009 1012 f.close()
1010 1013 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1011 1014 self._rbcnamescount = 0
1012 1015 self._rbcrevslen = 0
1013 1016 if self._rbcnamescount == 0:
1014 1017 # before rewriting names, make sure references are removed
1015 1018 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1016 1019 f = repo.cachevfs.open(_rbcnames, b'wb')
1017 1020 f.write(
1018 1021 b'\0'.join(
1019 1022 encoding.fromlocal(b)
1020 1023 for b in self._names[self._rbcnamescount :]
1021 1024 )
1022 1025 )
1023 1026 self._rbcsnameslen = f.tell()
1024 1027 f.close()
1025 1028 self._rbcnamescount = len(self._names)
1026 1029
1027 1030 def _writerevs(self, repo, start):
1028 1031 """write the new revs to revbranchcache"""
1029 1032 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1030 1033 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1031 1034 if f.tell() != start:
1032 1035 repo.ui.debug(
1033 1036 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1034 1037 )
1035 1038 f.seek(start)
1036 1039 if f.tell() != start:
1037 1040 start = 0
1038 1041 f.seek(start)
1039 1042 f.truncate()
1040 1043 end = revs * _rbcrecsize
1041 1044 f.write(self._rbcrevs.slice(start, end))
1042 1045 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now