##// END OF EJS Templates
branchcache: move the header loading in a `_load_header` class method...
marmoute -
r52356:87b830e4 default
parent child Browse files
Show More
@@ -1,1019 +1,1031 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 Any,
18 19 Callable,
19 20 Dict,
20 21 Iterable,
21 22 List,
22 23 Optional,
23 24 Set,
24 25 TYPE_CHECKING,
25 26 Tuple,
26 27 Union,
27 28 )
28 29
29 30 from . import (
30 31 encoding,
31 32 error,
32 33 obsolete,
33 34 scmutil,
34 35 util,
35 36 )
36 37
37 38 from .utils import (
38 39 repoviewutil,
39 40 stringutil,
40 41 )
41 42
42 43 if TYPE_CHECKING:
43 44 from . import localrepo
44 45
45 46 assert [localrepo]
46 47
47 48 subsettable = repoviewutil.subsettable
48 49
49 50 calcsize = struct.calcsize
50 51 pack_into = struct.pack_into
51 52 unpack_from = struct.unpack_from
52 53
53 54
54 55 class BranchMapCache:
55 56 """mapping of filtered views of repo with their branchcache"""
56 57
57 58 def __init__(self):
58 59 self._per_filter = {}
59 60
60 61 def __getitem__(self, repo):
61 62 self.updatecache(repo)
62 63 bcache = self._per_filter[repo.filtername]
63 64 assert bcache._filtername == repo.filtername, (
64 65 bcache._filtername,
65 66 repo.filtername,
66 67 )
67 68 return bcache
68 69
69 70 def update_disk(self, repo):
70 71 """ensure and up-to-date cache is (or will be) written on disk
71 72
72 73 The cache for this repository view is updated if needed and written on
73 74 disk.
74 75
75 76 If a transaction is in progress, the writing is schedule to transaction
76 77 close. See the `BranchMapCache.write_delayed` method.
77 78
78 79 This method exist independently of __getitem__ as it is sometime useful
79 80 to signal that we have no intend to use the data in memory yet.
80 81 """
81 82 self.updatecache(repo)
82 83 bcache = self._per_filter[repo.filtername]
83 84 assert bcache._filtername == repo.filtername, (
84 85 bcache._filtername,
85 86 repo.filtername,
86 87 )
87 88 bcache.write(repo)
88 89
89 90 def updatecache(self, repo):
90 91 """Update the cache for the given filtered view on a repository"""
91 92 # This can trigger updates for the caches for subsets of the filtered
92 93 # view, e.g. when there is no cache for this filtered view or the cache
93 94 # is stale.
94 95
95 96 cl = repo.changelog
96 97 filtername = repo.filtername
97 98 bcache = self._per_filter.get(filtername)
98 99 if bcache is None or not bcache.validfor(repo):
99 100 # cache object missing or cache object stale? Read from disk
100 101 bcache = branchcache.fromfile(repo)
101 102
102 103 revs = []
103 104 if bcache is None:
104 105 # no (fresh) cache available anymore, perhaps we can re-use
105 106 # the cache for a subset, then extend that to add info on missing
106 107 # revisions.
107 108 subsetname = subsettable.get(filtername)
108 109 if subsetname is not None:
109 110 subset = repo.filtered(subsetname)
110 111 bcache = self[subset].copy(repo)
111 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 114 else:
114 115 # nothing to fall back on, start empty.
115 116 bcache = branchcache(repo)
116 117
117 118 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 119 if revs:
119 120 bcache.update(repo, revs)
120 121
121 122 assert bcache.validfor(repo), filtername
122 123 self._per_filter[repo.filtername] = bcache
123 124
124 125 def replace(self, repo, remotebranchmap):
125 126 """Replace the branchmap cache for a repo with a branch mapping.
126 127
127 128 This is likely only called during clone with a branch map from a
128 129 remote.
129 130
130 131 """
131 132 cl = repo.changelog
132 133 clrev = cl.rev
133 134 clbranchinfo = cl.branchinfo
134 135 rbheads = []
135 136 closed = set()
136 137 for bheads in remotebranchmap.values():
137 138 rbheads += bheads
138 139 for h in bheads:
139 140 r = clrev(h)
140 141 b, c = clbranchinfo(r)
141 142 if c:
142 143 closed.add(h)
143 144
144 145 if rbheads:
145 146 rtiprev = max((int(clrev(node)) for node in rbheads))
146 147 cache = branchcache(
147 148 repo,
148 149 remotebranchmap,
149 150 repo[rtiprev].node(),
150 151 rtiprev,
151 152 closednodes=closed,
152 153 )
153 154
154 155 # Try to stick it as low as possible
155 156 # filter above served are unlikely to be fetch from a clone
156 157 for candidate in (b'base', b'immutable', b'served'):
157 158 rview = repo.filtered(candidate)
158 159 if cache.validfor(rview):
159 160 cache = self._per_filter[candidate] = cache.copy(rview)
160 161 cache.write(rview)
161 162 return
162 163
163 164 def clear(self):
164 165 self._per_filter.clear()
165 166
166 167 def write_delayed(self, repo):
167 168 unfi = repo.unfiltered()
168 169 for filtername, cache in self._per_filter.items():
169 170 if cache._delayed:
170 171 repo = unfi.filtered(filtername)
171 172 cache.write(repo)
172 173
173 174
174 175 def _unknownnode(node):
175 176 """raises ValueError when branchcache found a node which does not exists"""
176 177 raise ValueError('node %s does not exist' % node.hex())
177 178
178 179
179 180 def _branchcachedesc(repo):
180 181 if repo.filtername is not None:
181 182 return b'branch cache (%s)' % repo.filtername
182 183 else:
183 184 return b'branch cache'
184 185
185 186
186 187 class _BaseBranchCache:
187 188 """A dict like object that hold branches heads cache.
188 189
189 190 This cache is used to avoid costly computations to determine all the
190 191 branch heads of a repo.
191 192
192 193 The cache is serialized on disk in the following format:
193 194
194 195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 196 <branch head hex node> <open/closed state> <branch name>
196 197 <branch head hex node> <open/closed state> <branch name>
197 198 ...
198 199
199 200 The first line is used to check if the cache is still valid. If the
200 201 branch cache is for a filtered repo view, an optional third hash is
201 202 included that hashes the hashes of all filtered and obsolete revisions.
202 203
203 204 The open/closed state is represented by a single letter 'o' or 'c'.
204 205 This field can be used to avoid changelog reads when determining if a
205 206 branch head closes a branch or not.
206 207 """
207 208
208 209 def __init__(
209 210 self,
210 211 repo: "localrepo.localrepository",
211 212 entries: Union[
212 213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 214 ] = (),
214 215 closed_nodes: Optional[Set[bytes]] = None,
215 216 ) -> None:
216 217 """hasnode is a function which can be used to verify whether changelog
217 218 has a given node or not. If it's not provided, we assume that every node
218 219 we have exists in changelog"""
219 220 # closednodes is a set of nodes that close their branch. If the branch
220 221 # cache has been updated, it may contain nodes that are no longer
221 222 # heads.
222 223 if closed_nodes is None:
223 224 closed_nodes = set()
224 225 self._closednodes = set(closed_nodes)
225 226 self._entries = dict(entries)
226 227
227 228 def __iter__(self):
228 229 return iter(self._entries)
229 230
230 231 def __setitem__(self, key, value):
231 232 self._entries[key] = value
232 233
233 234 def __getitem__(self, key):
234 235 return self._entries[key]
235 236
236 237 def __contains__(self, key):
237 238 return key in self._entries
238 239
239 240 def iteritems(self):
240 241 return self._entries.items()
241 242
242 243 items = iteritems
243 244
244 245 def hasbranch(self, label):
245 246 """checks whether a branch of this name exists or not"""
246 247 return label in self._entries
247 248
248 249 def _branchtip(self, heads):
249 250 """Return tuple with last open head in heads and false,
250 251 otherwise return last closed head and true."""
251 252 tip = heads[-1]
252 253 closed = True
253 254 for h in reversed(heads):
254 255 if h not in self._closednodes:
255 256 tip = h
256 257 closed = False
257 258 break
258 259 return tip, closed
259 260
260 261 def branchtip(self, branch):
261 262 """Return the tipmost open head on branch head, otherwise return the
262 263 tipmost closed head on branch.
263 264 Raise KeyError for unknown branch."""
264 265 return self._branchtip(self[branch])[0]
265 266
266 267 def iteropen(self, nodes):
267 268 return (n for n in nodes if n not in self._closednodes)
268 269
269 270 def branchheads(self, branch, closed=False):
270 271 heads = self._entries[branch]
271 272 if not closed:
272 273 heads = list(self.iteropen(heads))
273 274 return heads
274 275
275 276 def iterbranches(self):
276 277 for bn, heads in self.items():
277 278 yield (bn, heads) + self._branchtip(heads)
278 279
279 280 def iterheads(self):
280 281 """returns all the heads"""
281 282 return self._entries.values()
282 283
283 284 def update(self, repo, revgen):
284 285 """Given a branchhead cache, self, that may have extra nodes or be
285 286 missing heads, and a generator of nodes that are strictly a superset of
286 287 heads missing, this function updates self to be correct.
287 288 """
288 289 starttime = util.timer()
289 290 cl = repo.changelog
290 291 # collect new branch entries
291 292 newbranches = {}
292 293 getbranchinfo = repo.revbranchcache().branchinfo
293 294 max_rev = -1
294 295 for r in revgen:
295 296 branch, closesbranch = getbranchinfo(r)
296 297 newbranches.setdefault(branch, []).append(r)
297 298 if closesbranch:
298 299 self._closednodes.add(cl.node(r))
299 300 max_rev = max(max_rev, r)
300 301 if max_rev < 0:
301 302 max_rev = None
302 303
303 304 # Delay fetching the topological heads until they are needed.
304 305 # A repository without non-continous branches can skip this part.
305 306 topoheads = None
306 307
307 308 # If a changeset is visible, its parents must be visible too, so
308 309 # use the faster unfiltered parent accessor.
309 310 parentrevs = repo.unfiltered().changelog.parentrevs
310 311
311 312 # Faster than using ctx.obsolete()
312 313 obsrevs = obsolete.getrevs(repo, b'obsolete')
313 314
314 315 for branch, newheadrevs in newbranches.items():
315 316 # For every branch, compute the new branchheads.
316 317 # A branchhead is a revision such that no descendant is on
317 318 # the same branch.
318 319 #
319 320 # The branchheads are computed iteratively in revision order.
320 321 # This ensures topological order, i.e. parents are processed
321 322 # before their children. Ancestors are inclusive here, i.e.
322 323 # any revision is an ancestor of itself.
323 324 #
324 325 # Core observations:
325 326 # - The current revision is always a branchhead for the
326 327 # repository up to that point.
327 328 # - It is the first revision of the branch if and only if
328 329 # there was no branchhead before. In that case, it is the
329 330 # only branchhead as there are no possible ancestors on
330 331 # the same branch.
331 332 # - If a parent is on the same branch, a branchhead can
332 333 # only be an ancestor of that parent, if it is parent
333 334 # itself. Otherwise it would have been removed as ancestor
334 335 # of that parent before.
335 336 # - Therefore, if all parents are on the same branch, they
336 337 # can just be removed from the branchhead set.
337 338 # - If one parent is on the same branch and the other is not
338 339 # and there was exactly one branchhead known, the existing
339 340 # branchhead can only be an ancestor if it is the parent.
340 341 # Otherwise it would have been removed as ancestor of
341 342 # the parent before. The other parent therefore can't have
342 343 # a branchhead as ancestor.
343 344 # - In all other cases, the parents on different branches
344 345 # could have a branchhead as ancestor. Those parents are
345 346 # kept in the "uncertain" set. If all branchheads are also
346 347 # topological heads, they can't have descendants and further
347 348 # checks can be skipped. Otherwise, the ancestors of the
348 349 # "uncertain" set are removed from branchheads.
349 350 # This computation is heavy and avoided if at all possible.
350 351 bheads = self._entries.get(branch, [])
351 352 bheadset = {cl.rev(node) for node in bheads}
352 353 uncertain = set()
353 354 for newrev in sorted(newheadrevs):
354 355 if newrev in obsrevs:
355 356 # We ignore obsolete changesets as they shouldn't be
356 357 # considered heads.
357 358 continue
358 359
359 360 if not bheadset:
360 361 bheadset.add(newrev)
361 362 continue
362 363
363 364 parents = [p for p in parentrevs(newrev) if p != nullrev]
364 365 samebranch = set()
365 366 otherbranch = set()
366 367 obsparents = set()
367 368 for p in parents:
368 369 if p in obsrevs:
369 370 # We ignored this obsolete changeset earlier, but now
370 371 # that it has non-ignored children, we need to make
371 372 # sure their ancestors are not considered heads. To
372 373 # achieve that, we will simply treat this obsolete
373 374 # changeset as a parent from other branch.
374 375 obsparents.add(p)
375 376 elif p in bheadset or getbranchinfo(p)[0] == branch:
376 377 samebranch.add(p)
377 378 else:
378 379 otherbranch.add(p)
379 380 if not (len(bheadset) == len(samebranch) == 1):
380 381 uncertain.update(otherbranch)
381 382 uncertain.update(obsparents)
382 383 bheadset.difference_update(samebranch)
383 384 bheadset.add(newrev)
384 385
385 386 if uncertain:
386 387 if topoheads is None:
387 388 topoheads = set(cl.headrevs())
388 389 if bheadset - topoheads:
389 390 floorrev = min(bheadset)
390 391 if floorrev <= max(uncertain):
391 392 ancestors = set(cl.ancestors(uncertain, floorrev))
392 393 bheadset -= ancestors
393 394 if bheadset:
394 395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
395 396
396 397 duration = util.timer() - starttime
397 398 repo.ui.log(
398 399 b'branchcache',
399 400 b'updated %s in %.4f seconds\n',
400 401 _branchcachedesc(repo),
401 402 duration,
402 403 )
403 404 return max_rev
404 405
405 406
406 407 class branchcache(_BaseBranchCache):
407 408 """Branchmap info for a local repo or repoview"""
408 409
409 410 _base_filename = b"branch2"
410 411
411 412 def __init__(
412 413 self,
413 414 repo: "localrepo.localrepository",
414 415 entries: Union[
415 416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
416 417 ] = (),
417 418 tipnode: Optional[bytes] = None,
418 419 tiprev: Optional[int] = nullrev,
419 420 filteredhash: Optional[bytes] = None,
420 421 closednodes: Optional[Set[bytes]] = None,
421 422 hasnode: Optional[Callable[[bytes], bool]] = None,
422 423 verify_node: bool = False,
423 424 ) -> None:
424 425 """hasnode is a function which can be used to verify whether changelog
425 426 has a given node or not. If it's not provided, we assume that every node
426 427 we have exists in changelog"""
427 428 self._filtername = repo.filtername
428 429 self._delayed = False
429 430 if tipnode is None:
430 431 self.tipnode = repo.nullid
431 432 else:
432 433 self.tipnode = tipnode
433 434 self.tiprev = tiprev
434 435 self.filteredhash = filteredhash
435 436
436 437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
437 438 # closednodes is a set of nodes that close their branch. If the branch
438 439 # cache has been updated, it may contain nodes that are no longer
439 440 # heads.
440 441
441 442 # Do we need to verify branch at all ?
442 443 self._verify_node = verify_node
443 444 # branches for which nodes are verified
444 445 self._verifiedbranches = set()
445 446 self._hasnode = None
446 447 if self._verify_node:
447 448 self._hasnode = repo.changelog.hasnode
448 449
449 450 def validfor(self, repo):
450 451 """check that cache contents are valid for (a subset of) this repo
451 452
452 453 - False when the order of changesets changed or if we detect a strip.
453 454 - True when cache is up-to-date for the current repo or its subset."""
454 455 try:
455 456 node = repo.changelog.node(self.tiprev)
456 457 except IndexError:
457 458 # changesets were stripped and now we don't even have enough to
458 459 # find tiprev
459 460 return False
460 461 if self.tipnode != node:
461 462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
462 463 # repo has a different order of changesets
463 464 return False
464 465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
465 466 # hashes don't match if this repo view has a different set of filtered
466 467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
467 468 # history was rewritten)
468 469 return self.filteredhash == tiphash
469 470
470 471 @classmethod
471 472 def fromfile(cls, repo):
472 473 f = None
473 474 try:
474 475 f = repo.cachevfs(cls._filename(repo))
475 476 lineiter = iter(f)
476 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
477 last, lrev = cachekey[:2]
478 last, lrev = bin(last), int(lrev)
479 filteredhash = None
480 if len(cachekey) > 2:
481 filteredhash = bin(cachekey[2])
477 init_kwargs = cls._load_header(repo, lineiter)
482 478 bcache = cls(
483 479 repo,
484 tipnode=last,
485 tiprev=lrev,
486 filteredhash=filteredhash,
487 480 verify_node=True,
481 **init_kwargs,
488 482 )
489 483 if not bcache.validfor(repo):
490 484 # invalidate the cache
491 485 raise ValueError('tip differs')
492 486 bcache._load_heads(repo, lineiter)
493 487 except (IOError, OSError):
494 488 return None
495 489
496 490 except Exception as inst:
497 491 if repo.ui.debugflag:
498 492 msg = b'invalid %s: %s\n'
499 493 msg %= (
500 494 _branchcachedesc(repo),
501 495 stringutil.forcebytestr(inst),
502 496 )
503 497 repo.ui.debug(msg)
504 498 bcache = None
505 499
506 500 finally:
507 501 if f:
508 502 f.close()
509 503
510 504 return bcache
511 505
506 @classmethod
507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 """parse the head of a branchmap file
509
510 return parameters to pass to a newly created class instance.
511 """
512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 last, lrev = cachekey[:2]
514 last, lrev = bin(last), int(lrev)
515 filteredhash = None
516 if len(cachekey) > 2:
517 filteredhash = bin(cachekey[2])
518 return {
519 "tipnode": last,
520 "tiprev": lrev,
521 "filteredhash": filteredhash,
522 }
523
512 524 def _load_heads(self, repo, lineiter):
513 525 """fully loads the branchcache by reading from the file using the line
514 526 iterator passed"""
515 527 for line in lineiter:
516 528 line = line.rstrip(b'\n')
517 529 if not line:
518 530 continue
519 531 node, state, label = line.split(b" ", 2)
520 532 if state not in b'oc':
521 533 raise ValueError('invalid branch state')
522 534 label = encoding.tolocal(label.strip())
523 535 node = bin(node)
524 536 self._entries.setdefault(label, []).append(node)
525 537 if state == b'c':
526 538 self._closednodes.add(node)
527 539
528 540 @classmethod
529 541 def _filename(cls, repo):
530 542 """name of a branchcache file for a given repo or repoview"""
531 543 filename = cls._base_filename
532 544 if repo.filtername:
533 545 filename = b'%s-%s' % (filename, repo.filtername)
534 546 return filename
535 547
536 548 def copy(self, repo):
537 549 """return a deep copy of the branchcache object"""
538 550 other = type(self)(
539 551 repo=repo,
540 552 # we always do a shally copy of self._entries, and the values is
541 553 # always replaced, so no need to deepcopy until the above remains
542 554 # true.
543 555 entries=self._entries,
544 556 tipnode=self.tipnode,
545 557 tiprev=self.tiprev,
546 558 filteredhash=self.filteredhash,
547 559 closednodes=set(self._closednodes),
548 560 verify_node=self._verify_node,
549 561 )
550 562 # we copy will likely schedule a write anyway, but that does not seems
551 563 # to hurt to overschedule
552 564 other._delayed = self._delayed
553 565 # also copy information about the current verification state
554 566 other._verifiedbranches = set(self._verifiedbranches)
555 567 return other
556 568
557 569 def write(self, repo):
558 570 assert self._filtername == repo.filtername, (
559 571 self._filtername,
560 572 repo.filtername,
561 573 )
562 574 tr = repo.currenttransaction()
563 575 if not getattr(tr, 'finalized', True):
564 576 # Avoid premature writing.
565 577 #
566 578 # (The cache warming setup by localrepo will update the file later.)
567 579 self._delayed = True
568 580 return
569 581 try:
570 582 filename = self._filename(repo)
571 583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
572 584 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
573 585 if self.filteredhash is not None:
574 586 cachekey.append(hex(self.filteredhash))
575 587 f.write(b" ".join(cachekey) + b'\n')
576 588 nodecount = 0
577 589 for label, nodes in sorted(self._entries.items()):
578 590 label = encoding.fromlocal(label)
579 591 for node in nodes:
580 592 nodecount += 1
581 593 if node in self._closednodes:
582 594 state = b'c'
583 595 else:
584 596 state = b'o'
585 597 f.write(b"%s %s %s\n" % (hex(node), state, label))
586 598 repo.ui.log(
587 599 b'branchcache',
588 600 b'wrote %s with %d labels and %d nodes\n',
589 601 _branchcachedesc(repo),
590 602 len(self._entries),
591 603 nodecount,
592 604 )
593 605 self._delayed = False
594 606 except (IOError, OSError, error.Abort) as inst:
595 607 # Abort may be raised by read only opener, so log and continue
596 608 repo.ui.debug(
597 609 b"couldn't write branch cache: %s\n"
598 610 % stringutil.forcebytestr(inst)
599 611 )
600 612
601 613 def _verifybranch(self, branch):
602 614 """verify head nodes for the given branch."""
603 615 if not self._verify_node:
604 616 return
605 617 if branch not in self._entries or branch in self._verifiedbranches:
606 618 return
607 619 assert self._hasnode is not None
608 620 for n in self._entries[branch]:
609 621 if not self._hasnode(n):
610 622 _unknownnode(n)
611 623
612 624 self._verifiedbranches.add(branch)
613 625
614 626 def _verifyall(self):
615 627 """verifies nodes of all the branches"""
616 628 for b in self._entries.keys():
617 629 if b not in self._verifiedbranches:
618 630 self._verifybranch(b)
619 631
620 632 def __getitem__(self, key):
621 633 self._verifybranch(key)
622 634 return super().__getitem__(key)
623 635
624 636 def __contains__(self, key):
625 637 self._verifybranch(key)
626 638 return super().__contains__(key)
627 639
628 640 def iteritems(self):
629 641 self._verifyall()
630 642 return super().iteritems()
631 643
632 644 items = iteritems
633 645
634 646 def iterheads(self):
635 647 """returns all the heads"""
636 648 self._verifyall()
637 649 return super().iterheads()
638 650
639 651 def hasbranch(self, label):
640 652 """checks whether a branch of this name exists or not"""
641 653 self._verifybranch(label)
642 654 return super().hasbranch(label)
643 655
644 656 def branchheads(self, branch, closed=False):
645 657 self._verifybranch(branch)
646 658 return super().branchheads(branch, closed=closed)
647 659
648 660 def update(self, repo, revgen):
649 661 assert self._filtername == repo.filtername, (
650 662 self._filtername,
651 663 repo.filtername,
652 664 )
653 665 cl = repo.changelog
654 666 max_rev = super().update(repo, revgen)
655 667 # new tip revision which we found after iterating items from new
656 668 # branches
657 669 if max_rev is not None and max_rev > self.tiprev:
658 670 self.tiprev = max_rev
659 671 self.tipnode = cl.node(max_rev)
660 672
661 673 if not self.validfor(repo):
662 674 # old cache key is now invalid for the repo, but we've just updated
663 675 # the cache and we assume it's valid, so let's make the cache key
664 676 # valid as well by recomputing it from the cached data
665 677 self.tipnode = repo.nullid
666 678 self.tiprev = nullrev
667 679 for heads in self.iterheads():
668 680 if not heads:
669 681 # all revisions on a branch are obsolete
670 682 continue
671 683 # note: tiprev is not necessarily the tip revision of repo,
672 684 # because the tip could be obsolete (i.e. not a head)
673 685 tiprev = max(cl.rev(node) for node in heads)
674 686 if tiprev > self.tiprev:
675 687 self.tipnode = cl.node(tiprev)
676 688 self.tiprev = tiprev
677 689 self.filteredhash = scmutil.filteredhash(
678 690 repo, self.tiprev, needobsolete=True
679 691 )
680 692
681 693 self.write(repo)
682 694
683 695
684 696 class remotebranchcache(_BaseBranchCache):
685 697 """Branchmap info for a remote connection, should not write locally"""
686 698
687 699 def __init__(
688 700 self,
689 701 repo: "localrepo.localrepository",
690 702 entries: Union[
691 703 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
692 704 ] = (),
693 705 closednodes: Optional[Set[bytes]] = None,
694 706 ) -> None:
695 707 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
696 708
697 709
698 710 # Revision branch info cache
699 711
700 712 _rbcversion = b'-v1'
701 713 _rbcnames = b'rbc-names' + _rbcversion
702 714 _rbcrevs = b'rbc-revs' + _rbcversion
703 715 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
704 716 _rbcrecfmt = b'>4sI'
705 717 _rbcrecsize = calcsize(_rbcrecfmt)
706 718 _rbcmininc = 64 * _rbcrecsize
707 719 _rbcnodelen = 4
708 720 _rbcbranchidxmask = 0x7FFFFFFF
709 721 _rbccloseflag = 0x80000000
710 722
711 723
712 724 class rbcrevs:
713 725 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
714 726
715 727 def __init__(self, revs):
716 728 self._prefix = revs
717 729 self._rest = bytearray()
718 730
719 731 def __len__(self):
720 732 return len(self._prefix) + len(self._rest)
721 733
722 734 def unpack_record(self, rbcrevidx):
723 735 if rbcrevidx < len(self._prefix):
724 736 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
725 737 else:
726 738 return unpack_from(
727 739 _rbcrecfmt,
728 740 util.buffer(self._rest),
729 741 rbcrevidx - len(self._prefix),
730 742 )
731 743
732 744 def make_mutable(self):
733 745 if len(self._prefix) > 0:
734 746 entirety = bytearray()
735 747 entirety[:] = self._prefix
736 748 entirety.extend(self._rest)
737 749 self._rest = entirety
738 750 self._prefix = bytearray()
739 751
740 752 def truncate(self, pos):
741 753 self.make_mutable()
742 754 del self._rest[pos:]
743 755
744 756 def pack_into(self, rbcrevidx, node, branchidx):
745 757 if rbcrevidx < len(self._prefix):
746 758 self.make_mutable()
747 759 buf = self._rest
748 760 start_offset = rbcrevidx - len(self._prefix)
749 761 end_offset = start_offset + _rbcrecsize
750 762
751 763 if len(self._rest) < end_offset:
752 764 # bytearray doesn't allocate extra space at least in Python 3.7.
753 765 # When multiple changesets are added in a row, precise resize would
754 766 # result in quadratic complexity. Overallocate to compensate by
755 767 # using the classic doubling technique for dynamic arrays instead.
756 768 # If there was a gap in the map before, less space will be reserved.
757 769 self._rest.extend(b'\0' * end_offset)
758 770 return pack_into(
759 771 _rbcrecfmt,
760 772 buf,
761 773 start_offset,
762 774 node,
763 775 branchidx,
764 776 )
765 777
766 778 def extend(self, extension):
767 779 return self._rest.extend(extension)
768 780
769 781 def slice(self, begin, end):
770 782 if begin < len(self._prefix):
771 783 acc = bytearray()
772 784 acc[:] = self._prefix[begin:end]
773 785 acc.extend(
774 786 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
775 787 )
776 788 return acc
777 789 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
778 790
779 791
780 792 class revbranchcache:
781 793 """Persistent cache, mapping from revision number to branch name and close.
782 794 This is a low level cache, independent of filtering.
783 795
784 796 Branch names are stored in rbc-names in internal encoding separated by 0.
785 797 rbc-names is append-only, and each branch name is only stored once and will
786 798 thus have a unique index.
787 799
788 800 The branch info for each revision is stored in rbc-revs as constant size
789 801 records. The whole file is read into memory, but it is only 'parsed' on
790 802 demand. The file is usually append-only but will be truncated if repo
791 803 modification is detected.
792 804 The record for each revision contains the first 4 bytes of the
793 805 corresponding node hash, and the record is only used if it still matches.
794 806 Even a completely trashed rbc-revs fill thus still give the right result
795 807 while converging towards full recovery ... assuming no incorrectly matching
796 808 node hashes.
797 809 The record also contains 4 bytes where 31 bits contains the index of the
798 810 branch and the last bit indicate that it is a branch close commit.
799 811 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
800 812 and will grow with it but be 1/8th of its size.
801 813 """
802 814
803 815 def __init__(self, repo, readonly=True):
804 816 assert repo.filtername is None
805 817 self._repo = repo
806 818 self._names = [] # branch names in local encoding with static index
807 819 self._rbcrevs = rbcrevs(bytearray())
808 820 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
809 821 try:
810 822 bndata = repo.cachevfs.read(_rbcnames)
811 823 self._rbcsnameslen = len(bndata) # for verification before writing
812 824 if bndata:
813 825 self._names = [
814 826 encoding.tolocal(bn) for bn in bndata.split(b'\0')
815 827 ]
816 828 except (IOError, OSError):
817 829 if readonly:
818 830 # don't try to use cache - fall back to the slow path
819 831 self.branchinfo = self._branchinfo
820 832
821 833 if self._names:
822 834 try:
823 835 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
824 836 with repo.cachevfs(_rbcrevs) as fp:
825 837 data = util.buffer(util.mmapread(fp))
826 838 else:
827 839 data = repo.cachevfs.read(_rbcrevs)
828 840 self._rbcrevs = rbcrevs(data)
829 841 except (IOError, OSError) as inst:
830 842 repo.ui.debug(
831 843 b"couldn't read revision branch cache: %s\n"
832 844 % stringutil.forcebytestr(inst)
833 845 )
834 846 # remember number of good records on disk
835 847 self._rbcrevslen = min(
836 848 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
837 849 )
838 850 if self._rbcrevslen == 0:
839 851 self._names = []
840 852 self._rbcnamescount = len(self._names) # number of names read at
841 853 # _rbcsnameslen
842 854
843 855 def _clear(self):
844 856 self._rbcsnameslen = 0
845 857 del self._names[:]
846 858 self._rbcnamescount = 0
847 859 self._rbcrevslen = len(self._repo.changelog)
848 860 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
849 861 util.clearcachedproperty(self, b'_namesreverse')
850 862
851 863 @util.propertycache
852 864 def _namesreverse(self):
853 865 return {b: r for r, b in enumerate(self._names)}
854 866
855 867 def branchinfo(self, rev):
856 868 """Return branch name and close flag for rev, using and updating
857 869 persistent cache."""
858 870 changelog = self._repo.changelog
859 871 rbcrevidx = rev * _rbcrecsize
860 872
861 873 # avoid negative index, changelog.read(nullrev) is fast without cache
862 874 if rev == nullrev:
863 875 return changelog.branchinfo(rev)
864 876
865 877 # if requested rev isn't allocated, grow and cache the rev info
866 878 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
867 879 return self._branchinfo(rev)
868 880
869 881 # fast path: extract data from cache, use it if node is matching
870 882 reponode = changelog.node(rev)[:_rbcnodelen]
871 883 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
872 884 close = bool(branchidx & _rbccloseflag)
873 885 if close:
874 886 branchidx &= _rbcbranchidxmask
875 887 if cachenode == b'\0\0\0\0':
876 888 pass
877 889 elif cachenode == reponode:
878 890 try:
879 891 return self._names[branchidx], close
880 892 except IndexError:
881 893 # recover from invalid reference to unknown branch
882 894 self._repo.ui.debug(
883 895 b"referenced branch names not found"
884 896 b" - rebuilding revision branch cache from scratch\n"
885 897 )
886 898 self._clear()
887 899 else:
888 900 # rev/node map has changed, invalidate the cache from here up
889 901 self._repo.ui.debug(
890 902 b"history modification detected - truncating "
891 903 b"revision branch cache to revision %d\n" % rev
892 904 )
893 905 truncate = rbcrevidx + _rbcrecsize
894 906 self._rbcrevs.truncate(truncate)
895 907 self._rbcrevslen = min(self._rbcrevslen, truncate)
896 908
897 909 # fall back to slow path and make sure it will be written to disk
898 910 return self._branchinfo(rev)
899 911
900 912 def _branchinfo(self, rev):
901 913 """Retrieve branch info from changelog and update _rbcrevs"""
902 914 changelog = self._repo.changelog
903 915 b, close = changelog.branchinfo(rev)
904 916 if b in self._namesreverse:
905 917 branchidx = self._namesreverse[b]
906 918 else:
907 919 branchidx = len(self._names)
908 920 self._names.append(b)
909 921 self._namesreverse[b] = branchidx
910 922 reponode = changelog.node(rev)
911 923 if close:
912 924 branchidx |= _rbccloseflag
913 925 self._setcachedata(rev, reponode, branchidx)
914 926 return b, close
915 927
916 928 def setdata(self, rev, changelogrevision):
917 929 """add new data information to the cache"""
918 930 branch, close = changelogrevision.branchinfo
919 931
920 932 if branch in self._namesreverse:
921 933 branchidx = self._namesreverse[branch]
922 934 else:
923 935 branchidx = len(self._names)
924 936 self._names.append(branch)
925 937 self._namesreverse[branch] = branchidx
926 938 if close:
927 939 branchidx |= _rbccloseflag
928 940 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
929 941 # If no cache data were readable (non exists, bad permission, etc)
930 942 # the cache was bypassing itself by setting:
931 943 #
932 944 # self.branchinfo = self._branchinfo
933 945 #
934 946 # Since we now have data in the cache, we need to drop this bypassing.
935 947 if 'branchinfo' in vars(self):
936 948 del self.branchinfo
937 949
938 950 def _setcachedata(self, rev, node, branchidx):
939 951 """Writes the node's branch data to the in-memory cache data."""
940 952 if rev == nullrev:
941 953 return
942 954 rbcrevidx = rev * _rbcrecsize
943 955 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
944 956 self._rbcrevslen = min(self._rbcrevslen, rev)
945 957
946 958 tr = self._repo.currenttransaction()
947 959 if tr:
948 960 tr.addfinalize(b'write-revbranchcache', self.write)
949 961
950 962 def write(self, tr=None):
951 963 """Save branch cache if it is dirty."""
952 964 repo = self._repo
953 965 wlock = None
954 966 step = b''
955 967 try:
956 968 # write the new names
957 969 if self._rbcnamescount < len(self._names):
958 970 wlock = repo.wlock(wait=False)
959 971 step = b' names'
960 972 self._writenames(repo)
961 973
962 974 # write the new revs
963 975 start = self._rbcrevslen * _rbcrecsize
964 976 if start != len(self._rbcrevs):
965 977 step = b''
966 978 if wlock is None:
967 979 wlock = repo.wlock(wait=False)
968 980 self._writerevs(repo, start)
969 981
970 982 except (IOError, OSError, error.Abort, error.LockError) as inst:
971 983 repo.ui.debug(
972 984 b"couldn't write revision branch cache%s: %s\n"
973 985 % (step, stringutil.forcebytestr(inst))
974 986 )
975 987 finally:
976 988 if wlock is not None:
977 989 wlock.release()
978 990
979 991 def _writenames(self, repo):
980 992 """write the new branch names to revbranchcache"""
981 993 if self._rbcnamescount != 0:
982 994 f = repo.cachevfs.open(_rbcnames, b'ab')
983 995 if f.tell() == self._rbcsnameslen:
984 996 f.write(b'\0')
985 997 else:
986 998 f.close()
987 999 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
988 1000 self._rbcnamescount = 0
989 1001 self._rbcrevslen = 0
990 1002 if self._rbcnamescount == 0:
991 1003 # before rewriting names, make sure references are removed
992 1004 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
993 1005 f = repo.cachevfs.open(_rbcnames, b'wb')
994 1006 f.write(
995 1007 b'\0'.join(
996 1008 encoding.fromlocal(b)
997 1009 for b in self._names[self._rbcnamescount :]
998 1010 )
999 1011 )
1000 1012 self._rbcsnameslen = f.tell()
1001 1013 f.close()
1002 1014 self._rbcnamescount = len(self._names)
1003 1015
1004 1016 def _writerevs(self, repo, start):
1005 1017 """write the new revs to revbranchcache"""
1006 1018 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1007 1019 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1008 1020 if f.tell() != start:
1009 1021 repo.ui.debug(
1010 1022 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1011 1023 )
1012 1024 f.seek(start)
1013 1025 if f.tell() != start:
1014 1026 start = 0
1015 1027 f.seek(start)
1016 1028 f.truncate()
1017 1029 end = revs * _rbcrecsize
1018 1030 f.write(self._rbcrevs.slice(start, end))
1019 1031 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now