##// END OF EJS Templates
branchcache: move the filename to a class attribute...
marmoute -
r52353:cebd96de default
parent child Browse files
Show More
@@ -1,1019 +1,1021 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Callable,
19 19 Dict,
20 20 Iterable,
21 21 List,
22 22 Optional,
23 23 Set,
24 24 TYPE_CHECKING,
25 25 Tuple,
26 26 Union,
27 27 )
28 28
29 29 from . import (
30 30 encoding,
31 31 error,
32 32 obsolete,
33 33 scmutil,
34 34 util,
35 35 )
36 36
37 37 from .utils import (
38 38 repoviewutil,
39 39 stringutil,
40 40 )
41 41
42 42 if TYPE_CHECKING:
43 43 from . import localrepo
44 44
45 45 assert [localrepo]
46 46
47 47 subsettable = repoviewutil.subsettable
48 48
49 49 calcsize = struct.calcsize
50 50 pack_into = struct.pack_into
51 51 unpack_from = struct.unpack_from
52 52
53 53
54 54 class BranchMapCache:
55 55 """mapping of filtered views of repo with their branchcache"""
56 56
57 57 def __init__(self):
58 58 self._per_filter = {}
59 59
60 60 def __getitem__(self, repo):
61 61 self.updatecache(repo)
62 62 bcache = self._per_filter[repo.filtername]
63 63 assert bcache._filtername == repo.filtername, (
64 64 bcache._filtername,
65 65 repo.filtername,
66 66 )
67 67 return bcache
68 68
69 69 def update_disk(self, repo):
70 70 """ensure and up-to-date cache is (or will be) written on disk
71 71
72 72 The cache for this repository view is updated if needed and written on
73 73 disk.
74 74
75 75 If a transaction is in progress, the writing is schedule to transaction
76 76 close. See the `BranchMapCache.write_delayed` method.
77 77
78 78 This method exist independently of __getitem__ as it is sometime useful
79 79 to signal that we have no intend to use the data in memory yet.
80 80 """
81 81 self.updatecache(repo)
82 82 bcache = self._per_filter[repo.filtername]
83 83 assert bcache._filtername == repo.filtername, (
84 84 bcache._filtername,
85 85 repo.filtername,
86 86 )
87 87 bcache.write(repo)
88 88
89 89 def updatecache(self, repo):
90 90 """Update the cache for the given filtered view on a repository"""
91 91 # This can trigger updates for the caches for subsets of the filtered
92 92 # view, e.g. when there is no cache for this filtered view or the cache
93 93 # is stale.
94 94
95 95 cl = repo.changelog
96 96 filtername = repo.filtername
97 97 bcache = self._per_filter.get(filtername)
98 98 if bcache is None or not bcache.validfor(repo):
99 99 # cache object missing or cache object stale? Read from disk
100 100 bcache = branchcache.fromfile(repo)
101 101
102 102 revs = []
103 103 if bcache is None:
104 104 # no (fresh) cache available anymore, perhaps we can re-use
105 105 # the cache for a subset, then extend that to add info on missing
106 106 # revisions.
107 107 subsetname = subsettable.get(filtername)
108 108 if subsetname is not None:
109 109 subset = repo.filtered(subsetname)
110 110 bcache = self[subset].copy(repo)
111 111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 113 else:
114 114 # nothing to fall back on, start empty.
115 115 bcache = branchcache(repo)
116 116
117 117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 118 if revs:
119 119 bcache.update(repo, revs)
120 120
121 121 assert bcache.validfor(repo), filtername
122 122 self._per_filter[repo.filtername] = bcache
123 123
124 124 def replace(self, repo, remotebranchmap):
125 125 """Replace the branchmap cache for a repo with a branch mapping.
126 126
127 127 This is likely only called during clone with a branch map from a
128 128 remote.
129 129
130 130 """
131 131 cl = repo.changelog
132 132 clrev = cl.rev
133 133 clbranchinfo = cl.branchinfo
134 134 rbheads = []
135 135 closed = set()
136 136 for bheads in remotebranchmap.values():
137 137 rbheads += bheads
138 138 for h in bheads:
139 139 r = clrev(h)
140 140 b, c = clbranchinfo(r)
141 141 if c:
142 142 closed.add(h)
143 143
144 144 if rbheads:
145 145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 146 cache = branchcache(
147 147 repo,
148 148 remotebranchmap,
149 149 repo[rtiprev].node(),
150 150 rtiprev,
151 151 closednodes=closed,
152 152 )
153 153
154 154 # Try to stick it as low as possible
155 155 # filter above served are unlikely to be fetch from a clone
156 156 for candidate in (b'base', b'immutable', b'served'):
157 157 rview = repo.filtered(candidate)
158 158 if cache.validfor(rview):
159 159 cache = self._per_filter[candidate] = cache.copy(rview)
160 160 cache.write(rview)
161 161 return
162 162
163 163 def clear(self):
164 164 self._per_filter.clear()
165 165
166 166 def write_delayed(self, repo):
167 167 unfi = repo.unfiltered()
168 168 for filtername, cache in self._per_filter.items():
169 169 if cache._delayed:
170 170 repo = unfi.filtered(filtername)
171 171 cache.write(repo)
172 172
173 173
174 174 def _unknownnode(node):
175 175 """raises ValueError when branchcache found a node which does not exists"""
176 176 raise ValueError('node %s does not exist' % node.hex())
177 177
178 178
179 179 def _branchcachedesc(repo):
180 180 if repo.filtername is not None:
181 181 return b'branch cache (%s)' % repo.filtername
182 182 else:
183 183 return b'branch cache'
184 184
185 185
186 186 class _BaseBranchCache:
187 187 """A dict like object that hold branches heads cache.
188 188
189 189 This cache is used to avoid costly computations to determine all the
190 190 branch heads of a repo.
191 191
192 192 The cache is serialized on disk in the following format:
193 193
194 194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 195 <branch head hex node> <open/closed state> <branch name>
196 196 <branch head hex node> <open/closed state> <branch name>
197 197 ...
198 198
199 199 The first line is used to check if the cache is still valid. If the
200 200 branch cache is for a filtered repo view, an optional third hash is
201 201 included that hashes the hashes of all filtered and obsolete revisions.
202 202
203 203 The open/closed state is represented by a single letter 'o' or 'c'.
204 204 This field can be used to avoid changelog reads when determining if a
205 205 branch head closes a branch or not.
206 206 """
207 207
208 208 def __init__(
209 209 self,
210 210 repo: "localrepo.localrepository",
211 211 entries: Union[
212 212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 213 ] = (),
214 214 closed_nodes: Optional[Set[bytes]] = None,
215 215 ) -> None:
216 216 """hasnode is a function which can be used to verify whether changelog
217 217 has a given node or not. If it's not provided, we assume that every node
218 218 we have exists in changelog"""
219 219 # closednodes is a set of nodes that close their branch. If the branch
220 220 # cache has been updated, it may contain nodes that are no longer
221 221 # heads.
222 222 if closed_nodes is None:
223 223 closed_nodes = set()
224 224 self._closednodes = set(closed_nodes)
225 225 self._entries = dict(entries)
226 226
227 227 def __iter__(self):
228 228 return iter(self._entries)
229 229
230 230 def __setitem__(self, key, value):
231 231 self._entries[key] = value
232 232
233 233 def __getitem__(self, key):
234 234 return self._entries[key]
235 235
236 236 def __contains__(self, key):
237 237 return key in self._entries
238 238
239 239 def iteritems(self):
240 240 return self._entries.items()
241 241
242 242 items = iteritems
243 243
244 244 def hasbranch(self, label):
245 245 """checks whether a branch of this name exists or not"""
246 246 return label in self._entries
247 247
248 248 def _branchtip(self, heads):
249 249 """Return tuple with last open head in heads and false,
250 250 otherwise return last closed head and true."""
251 251 tip = heads[-1]
252 252 closed = True
253 253 for h in reversed(heads):
254 254 if h not in self._closednodes:
255 255 tip = h
256 256 closed = False
257 257 break
258 258 return tip, closed
259 259
260 260 def branchtip(self, branch):
261 261 """Return the tipmost open head on branch head, otherwise return the
262 262 tipmost closed head on branch.
263 263 Raise KeyError for unknown branch."""
264 264 return self._branchtip(self[branch])[0]
265 265
266 266 def iteropen(self, nodes):
267 267 return (n for n in nodes if n not in self._closednodes)
268 268
269 269 def branchheads(self, branch, closed=False):
270 270 heads = self._entries[branch]
271 271 if not closed:
272 272 heads = list(self.iteropen(heads))
273 273 return heads
274 274
275 275 def iterbranches(self):
276 276 for bn, heads in self.items():
277 277 yield (bn, heads) + self._branchtip(heads)
278 278
279 279 def iterheads(self):
280 280 """returns all the heads"""
281 281 return self._entries.values()
282 282
283 283 def update(self, repo, revgen):
284 284 """Given a branchhead cache, self, that may have extra nodes or be
285 285 missing heads, and a generator of nodes that are strictly a superset of
286 286 heads missing, this function updates self to be correct.
287 287 """
288 288 starttime = util.timer()
289 289 cl = repo.changelog
290 290 # collect new branch entries
291 291 newbranches = {}
292 292 getbranchinfo = repo.revbranchcache().branchinfo
293 293 max_rev = -1
294 294 for r in revgen:
295 295 branch, closesbranch = getbranchinfo(r)
296 296 newbranches.setdefault(branch, []).append(r)
297 297 if closesbranch:
298 298 self._closednodes.add(cl.node(r))
299 299 max_rev = max(max_rev, r)
300 300 if max_rev < 0:
301 301 max_rev = None
302 302
303 303 # Delay fetching the topological heads until they are needed.
304 304 # A repository without non-continous branches can skip this part.
305 305 topoheads = None
306 306
307 307 # If a changeset is visible, its parents must be visible too, so
308 308 # use the faster unfiltered parent accessor.
309 309 parentrevs = repo.unfiltered().changelog.parentrevs
310 310
311 311 # Faster than using ctx.obsolete()
312 312 obsrevs = obsolete.getrevs(repo, b'obsolete')
313 313
314 314 for branch, newheadrevs in newbranches.items():
315 315 # For every branch, compute the new branchheads.
316 316 # A branchhead is a revision such that no descendant is on
317 317 # the same branch.
318 318 #
319 319 # The branchheads are computed iteratively in revision order.
320 320 # This ensures topological order, i.e. parents are processed
321 321 # before their children. Ancestors are inclusive here, i.e.
322 322 # any revision is an ancestor of itself.
323 323 #
324 324 # Core observations:
325 325 # - The current revision is always a branchhead for the
326 326 # repository up to that point.
327 327 # - It is the first revision of the branch if and only if
328 328 # there was no branchhead before. In that case, it is the
329 329 # only branchhead as there are no possible ancestors on
330 330 # the same branch.
331 331 # - If a parent is on the same branch, a branchhead can
332 332 # only be an ancestor of that parent, if it is parent
333 333 # itself. Otherwise it would have been removed as ancestor
334 334 # of that parent before.
335 335 # - Therefore, if all parents are on the same branch, they
336 336 # can just be removed from the branchhead set.
337 337 # - If one parent is on the same branch and the other is not
338 338 # and there was exactly one branchhead known, the existing
339 339 # branchhead can only be an ancestor if it is the parent.
340 340 # Otherwise it would have been removed as ancestor of
341 341 # the parent before. The other parent therefore can't have
342 342 # a branchhead as ancestor.
343 343 # - In all other cases, the parents on different branches
344 344 # could have a branchhead as ancestor. Those parents are
345 345 # kept in the "uncertain" set. If all branchheads are also
346 346 # topological heads, they can't have descendants and further
347 347 # checks can be skipped. Otherwise, the ancestors of the
348 348 # "uncertain" set are removed from branchheads.
349 349 # This computation is heavy and avoided if at all possible.
350 350 bheads = self._entries.get(branch, [])
351 351 bheadset = {cl.rev(node) for node in bheads}
352 352 uncertain = set()
353 353 for newrev in sorted(newheadrevs):
354 354 if newrev in obsrevs:
355 355 # We ignore obsolete changesets as they shouldn't be
356 356 # considered heads.
357 357 continue
358 358
359 359 if not bheadset:
360 360 bheadset.add(newrev)
361 361 continue
362 362
363 363 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 364 samebranch = set()
365 365 otherbranch = set()
366 366 obsparents = set()
367 367 for p in parents:
368 368 if p in obsrevs:
369 369 # We ignored this obsolete changeset earlier, but now
370 370 # that it has non-ignored children, we need to make
371 371 # sure their ancestors are not considered heads. To
372 372 # achieve that, we will simply treat this obsolete
373 373 # changeset as a parent from other branch.
374 374 obsparents.add(p)
375 375 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 376 samebranch.add(p)
377 377 else:
378 378 otherbranch.add(p)
379 379 if not (len(bheadset) == len(samebranch) == 1):
380 380 uncertain.update(otherbranch)
381 381 uncertain.update(obsparents)
382 382 bheadset.difference_update(samebranch)
383 383 bheadset.add(newrev)
384 384
385 385 if uncertain:
386 386 if topoheads is None:
387 387 topoheads = set(cl.headrevs())
388 388 if bheadset - topoheads:
389 389 floorrev = min(bheadset)
390 390 if floorrev <= max(uncertain):
391 391 ancestors = set(cl.ancestors(uncertain, floorrev))
392 392 bheadset -= ancestors
393 393 if bheadset:
394 394 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395 395
396 396 duration = util.timer() - starttime
397 397 repo.ui.log(
398 398 b'branchcache',
399 399 b'updated %s in %.4f seconds\n',
400 400 _branchcachedesc(repo),
401 401 duration,
402 402 )
403 403 return max_rev
404 404
405 405
406 406 class branchcache(_BaseBranchCache):
407 407 """Branchmap info for a local repo or repoview"""
408 408
409 _base_filename = b"branch2"
410
409 411 def __init__(
410 412 self,
411 413 repo: "localrepo.localrepository",
412 414 entries: Union[
413 415 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
414 416 ] = (),
415 417 tipnode: Optional[bytes] = None,
416 418 tiprev: Optional[int] = nullrev,
417 419 filteredhash: Optional[bytes] = None,
418 420 closednodes: Optional[Set[bytes]] = None,
419 421 hasnode: Optional[Callable[[bytes], bool]] = None,
420 422 verify_node: bool = False,
421 423 ) -> None:
422 424 """hasnode is a function which can be used to verify whether changelog
423 425 has a given node or not. If it's not provided, we assume that every node
424 426 we have exists in changelog"""
425 427 self._filtername = repo.filtername
426 428 self._delayed = False
427 429 if tipnode is None:
428 430 self.tipnode = repo.nullid
429 431 else:
430 432 self.tipnode = tipnode
431 433 self.tiprev = tiprev
432 434 self.filteredhash = filteredhash
433 435
434 436 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
435 437 # closednodes is a set of nodes that close their branch. If the branch
436 438 # cache has been updated, it may contain nodes that are no longer
437 439 # heads.
438 440
439 441 # Do we need to verify branch at all ?
440 442 self._verify_node = verify_node
441 443 # branches for which nodes are verified
442 444 self._verifiedbranches = set()
443 445 self._hasnode = None
444 446 if self._verify_node:
445 447 self._hasnode = repo.changelog.hasnode
446 448
447 449 def validfor(self, repo):
448 450 """check that cache contents are valid for (a subset of) this repo
449 451
450 452 - False when the order of changesets changed or if we detect a strip.
451 453 - True when cache is up-to-date for the current repo or its subset."""
452 454 try:
453 455 node = repo.changelog.node(self.tiprev)
454 456 except IndexError:
455 457 # changesets were stripped and now we don't even have enough to
456 458 # find tiprev
457 459 return False
458 460 if self.tipnode != node:
459 461 # tiprev doesn't correspond to tipnode: repo was stripped, or this
460 462 # repo has a different order of changesets
461 463 return False
462 464 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
463 465 # hashes don't match if this repo view has a different set of filtered
464 466 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
465 467 # history was rewritten)
466 468 return self.filteredhash == tiphash
467 469
468 470 @classmethod
469 471 def fromfile(cls, repo):
470 472 f = None
471 473 try:
472 474 f = repo.cachevfs(cls._filename(repo))
473 475 lineiter = iter(f)
474 476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
475 477 last, lrev = cachekey[:2]
476 478 last, lrev = bin(last), int(lrev)
477 479 filteredhash = None
478 480 if len(cachekey) > 2:
479 481 filteredhash = bin(cachekey[2])
480 482 bcache = cls(
481 483 repo,
482 484 tipnode=last,
483 485 tiprev=lrev,
484 486 filteredhash=filteredhash,
485 487 verify_node=True,
486 488 )
487 489 if not bcache.validfor(repo):
488 490 # invalidate the cache
489 491 raise ValueError('tip differs')
490 492 bcache.load(repo, lineiter)
491 493 except (IOError, OSError):
492 494 return None
493 495
494 496 except Exception as inst:
495 497 if repo.ui.debugflag:
496 498 msg = b'invalid %s: %s\n'
497 499 repo.ui.debug(
498 500 msg
499 501 % (
500 502 _branchcachedesc(repo),
501 503 stringutil.forcebytestr(inst),
502 504 )
503 505 )
504 506 bcache = None
505 507
506 508 finally:
507 509 if f:
508 510 f.close()
509 511
510 512 return bcache
511 513
512 514 def load(self, repo, lineiter):
513 515 """fully loads the branchcache by reading from the file using the line
514 516 iterator passed"""
515 517 for line in lineiter:
516 518 line = line.rstrip(b'\n')
517 519 if not line:
518 520 continue
519 521 node, state, label = line.split(b" ", 2)
520 522 if state not in b'oc':
521 523 raise ValueError('invalid branch state')
522 524 label = encoding.tolocal(label.strip())
523 525 node = bin(node)
524 526 self._entries.setdefault(label, []).append(node)
525 527 if state == b'c':
526 528 self._closednodes.add(node)
527 529
528 @staticmethod
529 def _filename(repo):
530 @classmethod
531 def _filename(cls, repo):
530 532 """name of a branchcache file for a given repo or repoview"""
531 filename = b"branch2"
533 filename = cls._base_filename
532 534 if repo.filtername:
533 535 filename = b'%s-%s' % (filename, repo.filtername)
534 536 return filename
535 537
536 538 def copy(self, repo):
537 539 """return a deep copy of the branchcache object"""
538 540 other = type(self)(
539 541 repo=repo,
540 542 # we always do a shally copy of self._entries, and the values is
541 543 # always replaced, so no need to deepcopy until the above remains
542 544 # true.
543 545 entries=self._entries,
544 546 tipnode=self.tipnode,
545 547 tiprev=self.tiprev,
546 548 filteredhash=self.filteredhash,
547 549 closednodes=set(self._closednodes),
548 550 verify_node=self._verify_node,
549 551 )
550 552 # we copy will likely schedule a write anyway, but that does not seems
551 553 # to hurt to overschedule
552 554 other._delayed = self._delayed
553 555 # also copy information about the current verification state
554 556 other._verifiedbranches = set(self._verifiedbranches)
555 557 return other
556 558
557 559 def write(self, repo):
558 560 assert self._filtername == repo.filtername, (
559 561 self._filtername,
560 562 repo.filtername,
561 563 )
562 564 tr = repo.currenttransaction()
563 565 if not getattr(tr, 'finalized', True):
564 566 # Avoid premature writing.
565 567 #
566 568 # (The cache warming setup by localrepo will update the file later.)
567 569 self._delayed = True
568 570 return
569 571 try:
570 572 filename = self._filename(repo)
571 573 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
572 574 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
573 575 if self.filteredhash is not None:
574 576 cachekey.append(hex(self.filteredhash))
575 577 f.write(b" ".join(cachekey) + b'\n')
576 578 nodecount = 0
577 579 for label, nodes in sorted(self._entries.items()):
578 580 label = encoding.fromlocal(label)
579 581 for node in nodes:
580 582 nodecount += 1
581 583 if node in self._closednodes:
582 584 state = b'c'
583 585 else:
584 586 state = b'o'
585 587 f.write(b"%s %s %s\n" % (hex(node), state, label))
586 588 repo.ui.log(
587 589 b'branchcache',
588 590 b'wrote %s with %d labels and %d nodes\n',
589 591 _branchcachedesc(repo),
590 592 len(self._entries),
591 593 nodecount,
592 594 )
593 595 self._delayed = False
594 596 except (IOError, OSError, error.Abort) as inst:
595 597 # Abort may be raised by read only opener, so log and continue
596 598 repo.ui.debug(
597 599 b"couldn't write branch cache: %s\n"
598 600 % stringutil.forcebytestr(inst)
599 601 )
600 602
601 603 def _verifybranch(self, branch):
602 604 """verify head nodes for the given branch."""
603 605 if not self._verify_node:
604 606 return
605 607 if branch not in self._entries or branch in self._verifiedbranches:
606 608 return
607 609 assert self._hasnode is not None
608 610 for n in self._entries[branch]:
609 611 if not self._hasnode(n):
610 612 _unknownnode(n)
611 613
612 614 self._verifiedbranches.add(branch)
613 615
614 616 def _verifyall(self):
615 617 """verifies nodes of all the branches"""
616 618 for b in self._entries.keys():
617 619 if b not in self._verifiedbranches:
618 620 self._verifybranch(b)
619 621
620 622 def __getitem__(self, key):
621 623 self._verifybranch(key)
622 624 return super().__getitem__(key)
623 625
624 626 def __contains__(self, key):
625 627 self._verifybranch(key)
626 628 return super().__contains__(key)
627 629
628 630 def iteritems(self):
629 631 self._verifyall()
630 632 return super().iteritems()
631 633
632 634 items = iteritems
633 635
634 636 def iterheads(self):
635 637 """returns all the heads"""
636 638 self._verifyall()
637 639 return super().iterheads()
638 640
639 641 def hasbranch(self, label):
640 642 """checks whether a branch of this name exists or not"""
641 643 self._verifybranch(label)
642 644 return super().hasbranch(label)
643 645
644 646 def branchheads(self, branch, closed=False):
645 647 self._verifybranch(branch)
646 648 return super().branchheads(branch, closed=closed)
647 649
648 650 def update(self, repo, revgen):
649 651 assert self._filtername == repo.filtername, (
650 652 self._filtername,
651 653 repo.filtername,
652 654 )
653 655 cl = repo.changelog
654 656 max_rev = super().update(repo, revgen)
655 657 # new tip revision which we found after iterating items from new
656 658 # branches
657 659 if max_rev is not None and max_rev > self.tiprev:
658 660 self.tiprev = max_rev
659 661 self.tipnode = cl.node(max_rev)
660 662
661 663 if not self.validfor(repo):
662 664 # old cache key is now invalid for the repo, but we've just updated
663 665 # the cache and we assume it's valid, so let's make the cache key
664 666 # valid as well by recomputing it from the cached data
665 667 self.tipnode = repo.nullid
666 668 self.tiprev = nullrev
667 669 for heads in self.iterheads():
668 670 if not heads:
669 671 # all revisions on a branch are obsolete
670 672 continue
671 673 # note: tiprev is not necessarily the tip revision of repo,
672 674 # because the tip could be obsolete (i.e. not a head)
673 675 tiprev = max(cl.rev(node) for node in heads)
674 676 if tiprev > self.tiprev:
675 677 self.tipnode = cl.node(tiprev)
676 678 self.tiprev = tiprev
677 679 self.filteredhash = scmutil.filteredhash(
678 680 repo, self.tiprev, needobsolete=True
679 681 )
680 682
681 683 self.write(repo)
682 684
683 685
684 686 class remotebranchcache(_BaseBranchCache):
685 687 """Branchmap info for a remote connection, should not write locally"""
686 688
687 689 def __init__(
688 690 self,
689 691 repo: "localrepo.localrepository",
690 692 entries: Union[
691 693 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
692 694 ] = (),
693 695 closednodes: Optional[Set[bytes]] = None,
694 696 ) -> None:
695 697 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
696 698
697 699
698 700 # Revision branch info cache
699 701
700 702 _rbcversion = b'-v1'
701 703 _rbcnames = b'rbc-names' + _rbcversion
702 704 _rbcrevs = b'rbc-revs' + _rbcversion
703 705 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
704 706 _rbcrecfmt = b'>4sI'
705 707 _rbcrecsize = calcsize(_rbcrecfmt)
706 708 _rbcmininc = 64 * _rbcrecsize
707 709 _rbcnodelen = 4
708 710 _rbcbranchidxmask = 0x7FFFFFFF
709 711 _rbccloseflag = 0x80000000
710 712
711 713
712 714 class rbcrevs:
713 715 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
714 716
715 717 def __init__(self, revs):
716 718 self._prefix = revs
717 719 self._rest = bytearray()
718 720
719 721 def __len__(self):
720 722 return len(self._prefix) + len(self._rest)
721 723
722 724 def unpack_record(self, rbcrevidx):
723 725 if rbcrevidx < len(self._prefix):
724 726 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
725 727 else:
726 728 return unpack_from(
727 729 _rbcrecfmt,
728 730 util.buffer(self._rest),
729 731 rbcrevidx - len(self._prefix),
730 732 )
731 733
732 734 def make_mutable(self):
733 735 if len(self._prefix) > 0:
734 736 entirety = bytearray()
735 737 entirety[:] = self._prefix
736 738 entirety.extend(self._rest)
737 739 self._rest = entirety
738 740 self._prefix = bytearray()
739 741
740 742 def truncate(self, pos):
741 743 self.make_mutable()
742 744 del self._rest[pos:]
743 745
744 746 def pack_into(self, rbcrevidx, node, branchidx):
745 747 if rbcrevidx < len(self._prefix):
746 748 self.make_mutable()
747 749 buf = self._rest
748 750 start_offset = rbcrevidx - len(self._prefix)
749 751 end_offset = start_offset + _rbcrecsize
750 752
751 753 if len(self._rest) < end_offset:
752 754 # bytearray doesn't allocate extra space at least in Python 3.7.
753 755 # When multiple changesets are added in a row, precise resize would
754 756 # result in quadratic complexity. Overallocate to compensate by
755 757 # using the classic doubling technique for dynamic arrays instead.
756 758 # If there was a gap in the map before, less space will be reserved.
757 759 self._rest.extend(b'\0' * end_offset)
758 760 return pack_into(
759 761 _rbcrecfmt,
760 762 buf,
761 763 start_offset,
762 764 node,
763 765 branchidx,
764 766 )
765 767
766 768 def extend(self, extension):
767 769 return self._rest.extend(extension)
768 770
769 771 def slice(self, begin, end):
770 772 if begin < len(self._prefix):
771 773 acc = bytearray()
772 774 acc[:] = self._prefix[begin:end]
773 775 acc.extend(
774 776 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
775 777 )
776 778 return acc
777 779 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
778 780
779 781
780 782 class revbranchcache:
781 783 """Persistent cache, mapping from revision number to branch name and close.
782 784 This is a low level cache, independent of filtering.
783 785
784 786 Branch names are stored in rbc-names in internal encoding separated by 0.
785 787 rbc-names is append-only, and each branch name is only stored once and will
786 788 thus have a unique index.
787 789
788 790 The branch info for each revision is stored in rbc-revs as constant size
789 791 records. The whole file is read into memory, but it is only 'parsed' on
790 792 demand. The file is usually append-only but will be truncated if repo
791 793 modification is detected.
792 794 The record for each revision contains the first 4 bytes of the
793 795 corresponding node hash, and the record is only used if it still matches.
794 796 Even a completely trashed rbc-revs fill thus still give the right result
795 797 while converging towards full recovery ... assuming no incorrectly matching
796 798 node hashes.
797 799 The record also contains 4 bytes where 31 bits contains the index of the
798 800 branch and the last bit indicate that it is a branch close commit.
799 801 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
800 802 and will grow with it but be 1/8th of its size.
801 803 """
802 804
803 805 def __init__(self, repo, readonly=True):
804 806 assert repo.filtername is None
805 807 self._repo = repo
806 808 self._names = [] # branch names in local encoding with static index
807 809 self._rbcrevs = rbcrevs(bytearray())
808 810 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
809 811 try:
810 812 bndata = repo.cachevfs.read(_rbcnames)
811 813 self._rbcsnameslen = len(bndata) # for verification before writing
812 814 if bndata:
813 815 self._names = [
814 816 encoding.tolocal(bn) for bn in bndata.split(b'\0')
815 817 ]
816 818 except (IOError, OSError):
817 819 if readonly:
818 820 # don't try to use cache - fall back to the slow path
819 821 self.branchinfo = self._branchinfo
820 822
821 823 if self._names:
822 824 try:
823 825 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
824 826 with repo.cachevfs(_rbcrevs) as fp:
825 827 data = util.buffer(util.mmapread(fp))
826 828 else:
827 829 data = repo.cachevfs.read(_rbcrevs)
828 830 self._rbcrevs = rbcrevs(data)
829 831 except (IOError, OSError) as inst:
830 832 repo.ui.debug(
831 833 b"couldn't read revision branch cache: %s\n"
832 834 % stringutil.forcebytestr(inst)
833 835 )
834 836 # remember number of good records on disk
835 837 self._rbcrevslen = min(
836 838 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
837 839 )
838 840 if self._rbcrevslen == 0:
839 841 self._names = []
840 842 self._rbcnamescount = len(self._names) # number of names read at
841 843 # _rbcsnameslen
842 844
843 845 def _clear(self):
844 846 self._rbcsnameslen = 0
845 847 del self._names[:]
846 848 self._rbcnamescount = 0
847 849 self._rbcrevslen = len(self._repo.changelog)
848 850 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
849 851 util.clearcachedproperty(self, b'_namesreverse')
850 852
851 853 @util.propertycache
852 854 def _namesreverse(self):
853 855 return {b: r for r, b in enumerate(self._names)}
854 856
855 857 def branchinfo(self, rev):
856 858 """Return branch name and close flag for rev, using and updating
857 859 persistent cache."""
858 860 changelog = self._repo.changelog
859 861 rbcrevidx = rev * _rbcrecsize
860 862
861 863 # avoid negative index, changelog.read(nullrev) is fast without cache
862 864 if rev == nullrev:
863 865 return changelog.branchinfo(rev)
864 866
865 867 # if requested rev isn't allocated, grow and cache the rev info
866 868 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
867 869 return self._branchinfo(rev)
868 870
869 871 # fast path: extract data from cache, use it if node is matching
870 872 reponode = changelog.node(rev)[:_rbcnodelen]
871 873 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
872 874 close = bool(branchidx & _rbccloseflag)
873 875 if close:
874 876 branchidx &= _rbcbranchidxmask
875 877 if cachenode == b'\0\0\0\0':
876 878 pass
877 879 elif cachenode == reponode:
878 880 try:
879 881 return self._names[branchidx], close
880 882 except IndexError:
881 883 # recover from invalid reference to unknown branch
882 884 self._repo.ui.debug(
883 885 b"referenced branch names not found"
884 886 b" - rebuilding revision branch cache from scratch\n"
885 887 )
886 888 self._clear()
887 889 else:
888 890 # rev/node map has changed, invalidate the cache from here up
889 891 self._repo.ui.debug(
890 892 b"history modification detected - truncating "
891 893 b"revision branch cache to revision %d\n" % rev
892 894 )
893 895 truncate = rbcrevidx + _rbcrecsize
894 896 self._rbcrevs.truncate(truncate)
895 897 self._rbcrevslen = min(self._rbcrevslen, truncate)
896 898
897 899 # fall back to slow path and make sure it will be written to disk
898 900 return self._branchinfo(rev)
899 901
900 902 def _branchinfo(self, rev):
901 903 """Retrieve branch info from changelog and update _rbcrevs"""
902 904 changelog = self._repo.changelog
903 905 b, close = changelog.branchinfo(rev)
904 906 if b in self._namesreverse:
905 907 branchidx = self._namesreverse[b]
906 908 else:
907 909 branchidx = len(self._names)
908 910 self._names.append(b)
909 911 self._namesreverse[b] = branchidx
910 912 reponode = changelog.node(rev)
911 913 if close:
912 914 branchidx |= _rbccloseflag
913 915 self._setcachedata(rev, reponode, branchidx)
914 916 return b, close
915 917
916 918 def setdata(self, rev, changelogrevision):
917 919 """add new data information to the cache"""
918 920 branch, close = changelogrevision.branchinfo
919 921
920 922 if branch in self._namesreverse:
921 923 branchidx = self._namesreverse[branch]
922 924 else:
923 925 branchidx = len(self._names)
924 926 self._names.append(branch)
925 927 self._namesreverse[branch] = branchidx
926 928 if close:
927 929 branchidx |= _rbccloseflag
928 930 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
929 931 # If no cache data were readable (non exists, bad permission, etc)
930 932 # the cache was bypassing itself by setting:
931 933 #
932 934 # self.branchinfo = self._branchinfo
933 935 #
934 936 # Since we now have data in the cache, we need to drop this bypassing.
935 937 if 'branchinfo' in vars(self):
936 938 del self.branchinfo
937 939
938 940 def _setcachedata(self, rev, node, branchidx):
939 941 """Writes the node's branch data to the in-memory cache data."""
940 942 if rev == nullrev:
941 943 return
942 944 rbcrevidx = rev * _rbcrecsize
943 945 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
944 946 self._rbcrevslen = min(self._rbcrevslen, rev)
945 947
946 948 tr = self._repo.currenttransaction()
947 949 if tr:
948 950 tr.addfinalize(b'write-revbranchcache', self.write)
949 951
950 952 def write(self, tr=None):
951 953 """Save branch cache if it is dirty."""
952 954 repo = self._repo
953 955 wlock = None
954 956 step = b''
955 957 try:
956 958 # write the new names
957 959 if self._rbcnamescount < len(self._names):
958 960 wlock = repo.wlock(wait=False)
959 961 step = b' names'
960 962 self._writenames(repo)
961 963
962 964 # write the new revs
963 965 start = self._rbcrevslen * _rbcrecsize
964 966 if start != len(self._rbcrevs):
965 967 step = b''
966 968 if wlock is None:
967 969 wlock = repo.wlock(wait=False)
968 970 self._writerevs(repo, start)
969 971
970 972 except (IOError, OSError, error.Abort, error.LockError) as inst:
971 973 repo.ui.debug(
972 974 b"couldn't write revision branch cache%s: %s\n"
973 975 % (step, stringutil.forcebytestr(inst))
974 976 )
975 977 finally:
976 978 if wlock is not None:
977 979 wlock.release()
978 980
979 981 def _writenames(self, repo):
980 982 """write the new branch names to revbranchcache"""
981 983 if self._rbcnamescount != 0:
982 984 f = repo.cachevfs.open(_rbcnames, b'ab')
983 985 if f.tell() == self._rbcsnameslen:
984 986 f.write(b'\0')
985 987 else:
986 988 f.close()
987 989 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
988 990 self._rbcnamescount = 0
989 991 self._rbcrevslen = 0
990 992 if self._rbcnamescount == 0:
991 993 # before rewriting names, make sure references are removed
992 994 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
993 995 f = repo.cachevfs.open(_rbcnames, b'wb')
994 996 f.write(
995 997 b'\0'.join(
996 998 encoding.fromlocal(b)
997 999 for b in self._names[self._rbcnamescount :]
998 1000 )
999 1001 )
1000 1002 self._rbcsnameslen = f.tell()
1001 1003 f.close()
1002 1004 self._rbcnamescount = len(self._names)
1003 1005
1004 1006 def _writerevs(self, repo, start):
1005 1007 """write the new revs to revbranchcache"""
1006 1008 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1007 1009 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1008 1010 if f.tell() != start:
1009 1011 repo.ui.debug(
1010 1012 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1011 1013 )
1012 1014 f.seek(start)
1013 1015 if f.tell() != start:
1014 1016 start = 0
1015 1017 f.seek(start)
1016 1018 f.truncate()
1017 1019 end = revs * _rbcrecsize
1018 1020 f.write(self._rbcrevs.slice(start, end))
1019 1021 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now