##// END OF EJS Templates
branchcache: move head writing in a `_write_headers` method...
marmoute -
r52358:9007387a default
parent child Browse files
Show More
@@ -1,1038 +1,1042 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Any,
19 19 Callable,
20 20 Dict,
21 21 Iterable,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 TYPE_CHECKING,
26 26 Tuple,
27 27 Union,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50 calcsize = struct.calcsize
51 51 pack_into = struct.pack_into
52 52 unpack_from = struct.unpack_from
53 53
54 54
55 55 class BranchMapCache:
56 56 """mapping of filtered views of repo with their branchcache"""
57 57
58 58 def __init__(self):
59 59 self._per_filter = {}
60 60
61 61 def __getitem__(self, repo):
62 62 self.updatecache(repo)
63 63 bcache = self._per_filter[repo.filtername]
64 64 assert bcache._filtername == repo.filtername, (
65 65 bcache._filtername,
66 66 repo.filtername,
67 67 )
68 68 return bcache
69 69
70 70 def update_disk(self, repo):
71 71 """ensure and up-to-date cache is (or will be) written on disk
72 72
73 73 The cache for this repository view is updated if needed and written on
74 74 disk.
75 75
76 76 If a transaction is in progress, the writing is schedule to transaction
77 77 close. See the `BranchMapCache.write_delayed` method.
78 78
79 79 This method exist independently of __getitem__ as it is sometime useful
80 80 to signal that we have no intend to use the data in memory yet.
81 81 """
82 82 self.updatecache(repo)
83 83 bcache = self._per_filter[repo.filtername]
84 84 assert bcache._filtername == repo.filtername, (
85 85 bcache._filtername,
86 86 repo.filtername,
87 87 )
88 88 bcache.write(repo)
89 89
90 90 def updatecache(self, repo):
91 91 """Update the cache for the given filtered view on a repository"""
92 92 # This can trigger updates for the caches for subsets of the filtered
93 93 # view, e.g. when there is no cache for this filtered view or the cache
94 94 # is stale.
95 95
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 bcache = self._per_filter.get(filtername)
99 99 if bcache is None or not bcache.validfor(repo):
100 100 # cache object missing or cache object stale? Read from disk
101 101 bcache = branchcache.fromfile(repo)
102 102
103 103 revs = []
104 104 if bcache is None:
105 105 # no (fresh) cache available anymore, perhaps we can re-use
106 106 # the cache for a subset, then extend that to add info on missing
107 107 # revisions.
108 108 subsetname = subsettable.get(filtername)
109 109 if subsetname is not None:
110 110 subset = repo.filtered(subsetname)
111 111 bcache = self[subset].copy(repo)
112 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 114 else:
115 115 # nothing to fall back on, start empty.
116 116 bcache = branchcache(repo)
117 117
118 118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 119 if revs:
120 120 bcache.update(repo, revs)
121 121
122 122 assert bcache.validfor(repo), filtername
123 123 self._per_filter[repo.filtername] = bcache
124 124
125 125 def replace(self, repo, remotebranchmap):
126 126 """Replace the branchmap cache for a repo with a branch mapping.
127 127
128 128 This is likely only called during clone with a branch map from a
129 129 remote.
130 130
131 131 """
132 132 cl = repo.changelog
133 133 clrev = cl.rev
134 134 clbranchinfo = cl.branchinfo
135 135 rbheads = []
136 136 closed = set()
137 137 for bheads in remotebranchmap.values():
138 138 rbheads += bheads
139 139 for h in bheads:
140 140 r = clrev(h)
141 141 b, c = clbranchinfo(r)
142 142 if c:
143 143 closed.add(h)
144 144
145 145 if rbheads:
146 146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 147 cache = branchcache(
148 148 repo,
149 149 remotebranchmap,
150 150 repo[rtiprev].node(),
151 151 rtiprev,
152 152 closednodes=closed,
153 153 )
154 154
155 155 # Try to stick it as low as possible
156 156 # filter above served are unlikely to be fetch from a clone
157 157 for candidate in (b'base', b'immutable', b'served'):
158 158 rview = repo.filtered(candidate)
159 159 if cache.validfor(rview):
160 160 cache = self._per_filter[candidate] = cache.copy(rview)
161 161 cache.write(rview)
162 162 return
163 163
164 164 def clear(self):
165 165 self._per_filter.clear()
166 166
167 167 def write_delayed(self, repo):
168 168 unfi = repo.unfiltered()
169 169 for filtername, cache in self._per_filter.items():
170 170 if cache._delayed:
171 171 repo = unfi.filtered(filtername)
172 172 cache.write(repo)
173 173
174 174
175 175 def _unknownnode(node):
176 176 """raises ValueError when branchcache found a node which does not exists"""
177 177 raise ValueError('node %s does not exist' % node.hex())
178 178
179 179
180 180 def _branchcachedesc(repo):
181 181 if repo.filtername is not None:
182 182 return b'branch cache (%s)' % repo.filtername
183 183 else:
184 184 return b'branch cache'
185 185
186 186
187 187 class _BaseBranchCache:
188 188 """A dict like object that hold branches heads cache.
189 189
190 190 This cache is used to avoid costly computations to determine all the
191 191 branch heads of a repo.
192 192
193 193 The cache is serialized on disk in the following format:
194 194
195 195 <tip hex node> <tip rev number> [optional filtered repo hex hash]
196 196 <branch head hex node> <open/closed state> <branch name>
197 197 <branch head hex node> <open/closed state> <branch name>
198 198 ...
199 199
200 200 The first line is used to check if the cache is still valid. If the
201 201 branch cache is for a filtered repo view, an optional third hash is
202 202 included that hashes the hashes of all filtered and obsolete revisions.
203 203
204 204 The open/closed state is represented by a single letter 'o' or 'c'.
205 205 This field can be used to avoid changelog reads when determining if a
206 206 branch head closes a branch or not.
207 207 """
208 208
209 209 def __init__(
210 210 self,
211 211 repo: "localrepo.localrepository",
212 212 entries: Union[
213 213 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
214 214 ] = (),
215 215 closed_nodes: Optional[Set[bytes]] = None,
216 216 ) -> None:
217 217 """hasnode is a function which can be used to verify whether changelog
218 218 has a given node or not. If it's not provided, we assume that every node
219 219 we have exists in changelog"""
220 220 # closednodes is a set of nodes that close their branch. If the branch
221 221 # cache has been updated, it may contain nodes that are no longer
222 222 # heads.
223 223 if closed_nodes is None:
224 224 closed_nodes = set()
225 225 self._closednodes = set(closed_nodes)
226 226 self._entries = dict(entries)
227 227
228 228 def __iter__(self):
229 229 return iter(self._entries)
230 230
231 231 def __setitem__(self, key, value):
232 232 self._entries[key] = value
233 233
234 234 def __getitem__(self, key):
235 235 return self._entries[key]
236 236
237 237 def __contains__(self, key):
238 238 return key in self._entries
239 239
240 240 def iteritems(self):
241 241 return self._entries.items()
242 242
243 243 items = iteritems
244 244
245 245 def hasbranch(self, label):
246 246 """checks whether a branch of this name exists or not"""
247 247 return label in self._entries
248 248
249 249 def _branchtip(self, heads):
250 250 """Return tuple with last open head in heads and false,
251 251 otherwise return last closed head and true."""
252 252 tip = heads[-1]
253 253 closed = True
254 254 for h in reversed(heads):
255 255 if h not in self._closednodes:
256 256 tip = h
257 257 closed = False
258 258 break
259 259 return tip, closed
260 260
261 261 def branchtip(self, branch):
262 262 """Return the tipmost open head on branch head, otherwise return the
263 263 tipmost closed head on branch.
264 264 Raise KeyError for unknown branch."""
265 265 return self._branchtip(self[branch])[0]
266 266
267 267 def iteropen(self, nodes):
268 268 return (n for n in nodes if n not in self._closednodes)
269 269
270 270 def branchheads(self, branch, closed=False):
271 271 heads = self._entries[branch]
272 272 if not closed:
273 273 heads = list(self.iteropen(heads))
274 274 return heads
275 275
276 276 def iterbranches(self):
277 277 for bn, heads in self.items():
278 278 yield (bn, heads) + self._branchtip(heads)
279 279
280 280 def iterheads(self):
281 281 """returns all the heads"""
282 282 return self._entries.values()
283 283
284 284 def update(self, repo, revgen):
285 285 """Given a branchhead cache, self, that may have extra nodes or be
286 286 missing heads, and a generator of nodes that are strictly a superset of
287 287 heads missing, this function updates self to be correct.
288 288 """
289 289 starttime = util.timer()
290 290 cl = repo.changelog
291 291 # collect new branch entries
292 292 newbranches = {}
293 293 getbranchinfo = repo.revbranchcache().branchinfo
294 294 max_rev = -1
295 295 for r in revgen:
296 296 branch, closesbranch = getbranchinfo(r)
297 297 newbranches.setdefault(branch, []).append(r)
298 298 if closesbranch:
299 299 self._closednodes.add(cl.node(r))
300 300 max_rev = max(max_rev, r)
301 301 if max_rev < 0:
302 302 max_rev = None
303 303
304 304 # Delay fetching the topological heads until they are needed.
305 305 # A repository without non-continous branches can skip this part.
306 306 topoheads = None
307 307
308 308 # If a changeset is visible, its parents must be visible too, so
309 309 # use the faster unfiltered parent accessor.
310 310 parentrevs = repo.unfiltered().changelog.parentrevs
311 311
312 312 # Faster than using ctx.obsolete()
313 313 obsrevs = obsolete.getrevs(repo, b'obsolete')
314 314
315 315 for branch, newheadrevs in newbranches.items():
316 316 # For every branch, compute the new branchheads.
317 317 # A branchhead is a revision such that no descendant is on
318 318 # the same branch.
319 319 #
320 320 # The branchheads are computed iteratively in revision order.
321 321 # This ensures topological order, i.e. parents are processed
322 322 # before their children. Ancestors are inclusive here, i.e.
323 323 # any revision is an ancestor of itself.
324 324 #
325 325 # Core observations:
326 326 # - The current revision is always a branchhead for the
327 327 # repository up to that point.
328 328 # - It is the first revision of the branch if and only if
329 329 # there was no branchhead before. In that case, it is the
330 330 # only branchhead as there are no possible ancestors on
331 331 # the same branch.
332 332 # - If a parent is on the same branch, a branchhead can
333 333 # only be an ancestor of that parent, if it is parent
334 334 # itself. Otherwise it would have been removed as ancestor
335 335 # of that parent before.
336 336 # - Therefore, if all parents are on the same branch, they
337 337 # can just be removed from the branchhead set.
338 338 # - If one parent is on the same branch and the other is not
339 339 # and there was exactly one branchhead known, the existing
340 340 # branchhead can only be an ancestor if it is the parent.
341 341 # Otherwise it would have been removed as ancestor of
342 342 # the parent before. The other parent therefore can't have
343 343 # a branchhead as ancestor.
344 344 # - In all other cases, the parents on different branches
345 345 # could have a branchhead as ancestor. Those parents are
346 346 # kept in the "uncertain" set. If all branchheads are also
347 347 # topological heads, they can't have descendants and further
348 348 # checks can be skipped. Otherwise, the ancestors of the
349 349 # "uncertain" set are removed from branchheads.
350 350 # This computation is heavy and avoided if at all possible.
351 351 bheads = self._entries.get(branch, [])
352 352 bheadset = {cl.rev(node) for node in bheads}
353 353 uncertain = set()
354 354 for newrev in sorted(newheadrevs):
355 355 if newrev in obsrevs:
356 356 # We ignore obsolete changesets as they shouldn't be
357 357 # considered heads.
358 358 continue
359 359
360 360 if not bheadset:
361 361 bheadset.add(newrev)
362 362 continue
363 363
364 364 parents = [p for p in parentrevs(newrev) if p != nullrev]
365 365 samebranch = set()
366 366 otherbranch = set()
367 367 obsparents = set()
368 368 for p in parents:
369 369 if p in obsrevs:
370 370 # We ignored this obsolete changeset earlier, but now
371 371 # that it has non-ignored children, we need to make
372 372 # sure their ancestors are not considered heads. To
373 373 # achieve that, we will simply treat this obsolete
374 374 # changeset as a parent from other branch.
375 375 obsparents.add(p)
376 376 elif p in bheadset or getbranchinfo(p)[0] == branch:
377 377 samebranch.add(p)
378 378 else:
379 379 otherbranch.add(p)
380 380 if not (len(bheadset) == len(samebranch) == 1):
381 381 uncertain.update(otherbranch)
382 382 uncertain.update(obsparents)
383 383 bheadset.difference_update(samebranch)
384 384 bheadset.add(newrev)
385 385
386 386 if uncertain:
387 387 if topoheads is None:
388 388 topoheads = set(cl.headrevs())
389 389 if bheadset - topoheads:
390 390 floorrev = min(bheadset)
391 391 if floorrev <= max(uncertain):
392 392 ancestors = set(cl.ancestors(uncertain, floorrev))
393 393 bheadset -= ancestors
394 394 if bheadset:
395 395 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
396 396
397 397 duration = util.timer() - starttime
398 398 repo.ui.log(
399 399 b'branchcache',
400 400 b'updated %s in %.4f seconds\n',
401 401 _branchcachedesc(repo),
402 402 duration,
403 403 )
404 404 return max_rev
405 405
406 406
407 407 class branchcache(_BaseBranchCache):
408 408 """Branchmap info for a local repo or repoview"""
409 409
410 410 _base_filename = b"branch2"
411 411
412 412 def __init__(
413 413 self,
414 414 repo: "localrepo.localrepository",
415 415 entries: Union[
416 416 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
417 417 ] = (),
418 418 tipnode: Optional[bytes] = None,
419 419 tiprev: Optional[int] = nullrev,
420 420 filteredhash: Optional[bytes] = None,
421 421 closednodes: Optional[Set[bytes]] = None,
422 422 hasnode: Optional[Callable[[bytes], bool]] = None,
423 423 verify_node: bool = False,
424 424 ) -> None:
425 425 """hasnode is a function which can be used to verify whether changelog
426 426 has a given node or not. If it's not provided, we assume that every node
427 427 we have exists in changelog"""
428 428 self._filtername = repo.filtername
429 429 self._delayed = False
430 430 if tipnode is None:
431 431 self.tipnode = repo.nullid
432 432 else:
433 433 self.tipnode = tipnode
434 434 self.tiprev = tiprev
435 435 self.filteredhash = filteredhash
436 436
437 437 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
438 438 # closednodes is a set of nodes that close their branch. If the branch
439 439 # cache has been updated, it may contain nodes that are no longer
440 440 # heads.
441 441
442 442 # Do we need to verify branch at all ?
443 443 self._verify_node = verify_node
444 444 # branches for which nodes are verified
445 445 self._verifiedbranches = set()
446 446 self._hasnode = None
447 447 if self._verify_node:
448 448 self._hasnode = repo.changelog.hasnode
449 449
450 450 def validfor(self, repo):
451 451 """check that cache contents are valid for (a subset of) this repo
452 452
453 453 - False when the order of changesets changed or if we detect a strip.
454 454 - True when cache is up-to-date for the current repo or its subset."""
455 455 try:
456 456 node = repo.changelog.node(self.tiprev)
457 457 except IndexError:
458 458 # changesets were stripped and now we don't even have enough to
459 459 # find tiprev
460 460 return False
461 461 if self.tipnode != node:
462 462 # tiprev doesn't correspond to tipnode: repo was stripped, or this
463 463 # repo has a different order of changesets
464 464 return False
465 465 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
466 466 # hashes don't match if this repo view has a different set of filtered
467 467 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
468 468 # history was rewritten)
469 469 return self.filteredhash == tiphash
470 470
471 471 @classmethod
472 472 def fromfile(cls, repo):
473 473 f = None
474 474 try:
475 475 f = repo.cachevfs(cls._filename(repo))
476 476 lineiter = iter(f)
477 477 init_kwargs = cls._load_header(repo, lineiter)
478 478 bcache = cls(
479 479 repo,
480 480 verify_node=True,
481 481 **init_kwargs,
482 482 )
483 483 if not bcache.validfor(repo):
484 484 # invalidate the cache
485 485 raise ValueError('tip differs')
486 486 bcache._load_heads(repo, lineiter)
487 487 except (IOError, OSError):
488 488 return None
489 489
490 490 except Exception as inst:
491 491 if repo.ui.debugflag:
492 492 msg = b'invalid %s: %s\n'
493 493 msg %= (
494 494 _branchcachedesc(repo),
495 495 stringutil.forcebytestr(inst),
496 496 )
497 497 repo.ui.debug(msg)
498 498 bcache = None
499 499
500 500 finally:
501 501 if f:
502 502 f.close()
503 503
504 504 return bcache
505 505
506 506 @classmethod
507 507 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
508 508 """parse the head of a branchmap file
509 509
510 510 return parameters to pass to a newly created class instance.
511 511 """
512 512 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
513 513 last, lrev = cachekey[:2]
514 514 last, lrev = bin(last), int(lrev)
515 515 filteredhash = None
516 516 if len(cachekey) > 2:
517 517 filteredhash = bin(cachekey[2])
518 518 return {
519 519 "tipnode": last,
520 520 "tiprev": lrev,
521 521 "filteredhash": filteredhash,
522 522 }
523 523
524 524 def _load_heads(self, repo, lineiter):
525 525 """fully loads the branchcache by reading from the file using the line
526 526 iterator passed"""
527 527 for line in lineiter:
528 528 line = line.rstrip(b'\n')
529 529 if not line:
530 530 continue
531 531 node, state, label = line.split(b" ", 2)
532 532 if state not in b'oc':
533 533 raise ValueError('invalid branch state')
534 534 label = encoding.tolocal(label.strip())
535 535 node = bin(node)
536 536 self._entries.setdefault(label, []).append(node)
537 537 if state == b'c':
538 538 self._closednodes.add(node)
539 539
540 540 @classmethod
541 541 def _filename(cls, repo):
542 542 """name of a branchcache file for a given repo or repoview"""
543 543 filename = cls._base_filename
544 544 if repo.filtername:
545 545 filename = b'%s-%s' % (filename, repo.filtername)
546 546 return filename
547 547
548 548 def copy(self, repo):
549 549 """return a deep copy of the branchcache object"""
550 550 other = type(self)(
551 551 repo=repo,
552 552 # we always do a shally copy of self._entries, and the values is
553 553 # always replaced, so no need to deepcopy until the above remains
554 554 # true.
555 555 entries=self._entries,
556 556 tipnode=self.tipnode,
557 557 tiprev=self.tiprev,
558 558 filteredhash=self.filteredhash,
559 559 closednodes=set(self._closednodes),
560 560 verify_node=self._verify_node,
561 561 )
562 562 # we copy will likely schedule a write anyway, but that does not seems
563 563 # to hurt to overschedule
564 564 other._delayed = self._delayed
565 565 # also copy information about the current verification state
566 566 other._verifiedbranches = set(self._verifiedbranches)
567 567 return other
568 568
569 569 def write(self, repo):
570 570 assert self._filtername == repo.filtername, (
571 571 self._filtername,
572 572 repo.filtername,
573 573 )
574 574 tr = repo.currenttransaction()
575 575 if not getattr(tr, 'finalized', True):
576 576 # Avoid premature writing.
577 577 #
578 578 # (The cache warming setup by localrepo will update the file later.)
579 579 self._delayed = True
580 580 return
581 581 try:
582 582 filename = self._filename(repo)
583 583 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
584 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
585 if self.filteredhash is not None:
586 cachekey.append(hex(self.filteredhash))
587 f.write(b" ".join(cachekey) + b'\n')
584 self._write_header(f)
588 585 nodecount = self._write_heads(f)
589 586 repo.ui.log(
590 587 b'branchcache',
591 588 b'wrote %s with %d labels and %d nodes\n',
592 589 _branchcachedesc(repo),
593 590 len(self._entries),
594 591 nodecount,
595 592 )
596 593 self._delayed = False
597 594 except (IOError, OSError, error.Abort) as inst:
598 595 # Abort may be raised by read only opener, so log and continue
599 596 repo.ui.debug(
600 597 b"couldn't write branch cache: %s\n"
601 598 % stringutil.forcebytestr(inst)
602 599 )
603 600
601 def _write_header(self, fp) -> None:
602 """write the branch cache header to a file"""
603 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
604 if self.filteredhash is not None:
605 cachekey.append(hex(self.filteredhash))
606 fp.write(b" ".join(cachekey) + b'\n')
607
604 608 def _write_heads(self, fp) -> int:
605 609 """write list of heads to a file
606 610
607 611 Return the number of heads written."""
608 612 nodecount = 0
609 613 for label, nodes in sorted(self._entries.items()):
610 614 label = encoding.fromlocal(label)
611 615 for node in nodes:
612 616 nodecount += 1
613 617 if node in self._closednodes:
614 618 state = b'c'
615 619 else:
616 620 state = b'o'
617 621 fp.write(b"%s %s %s\n" % (hex(node), state, label))
618 622 return nodecount
619 623
620 624 def _verifybranch(self, branch):
621 625 """verify head nodes for the given branch."""
622 626 if not self._verify_node:
623 627 return
624 628 if branch not in self._entries or branch in self._verifiedbranches:
625 629 return
626 630 assert self._hasnode is not None
627 631 for n in self._entries[branch]:
628 632 if not self._hasnode(n):
629 633 _unknownnode(n)
630 634
631 635 self._verifiedbranches.add(branch)
632 636
633 637 def _verifyall(self):
634 638 """verifies nodes of all the branches"""
635 639 for b in self._entries.keys():
636 640 if b not in self._verifiedbranches:
637 641 self._verifybranch(b)
638 642
639 643 def __getitem__(self, key):
640 644 self._verifybranch(key)
641 645 return super().__getitem__(key)
642 646
643 647 def __contains__(self, key):
644 648 self._verifybranch(key)
645 649 return super().__contains__(key)
646 650
647 651 def iteritems(self):
648 652 self._verifyall()
649 653 return super().iteritems()
650 654
651 655 items = iteritems
652 656
653 657 def iterheads(self):
654 658 """returns all the heads"""
655 659 self._verifyall()
656 660 return super().iterheads()
657 661
658 662 def hasbranch(self, label):
659 663 """checks whether a branch of this name exists or not"""
660 664 self._verifybranch(label)
661 665 return super().hasbranch(label)
662 666
663 667 def branchheads(self, branch, closed=False):
664 668 self._verifybranch(branch)
665 669 return super().branchheads(branch, closed=closed)
666 670
667 671 def update(self, repo, revgen):
668 672 assert self._filtername == repo.filtername, (
669 673 self._filtername,
670 674 repo.filtername,
671 675 )
672 676 cl = repo.changelog
673 677 max_rev = super().update(repo, revgen)
674 678 # new tip revision which we found after iterating items from new
675 679 # branches
676 680 if max_rev is not None and max_rev > self.tiprev:
677 681 self.tiprev = max_rev
678 682 self.tipnode = cl.node(max_rev)
679 683
680 684 if not self.validfor(repo):
681 685 # old cache key is now invalid for the repo, but we've just updated
682 686 # the cache and we assume it's valid, so let's make the cache key
683 687 # valid as well by recomputing it from the cached data
684 688 self.tipnode = repo.nullid
685 689 self.tiprev = nullrev
686 690 for heads in self.iterheads():
687 691 if not heads:
688 692 # all revisions on a branch are obsolete
689 693 continue
690 694 # note: tiprev is not necessarily the tip revision of repo,
691 695 # because the tip could be obsolete (i.e. not a head)
692 696 tiprev = max(cl.rev(node) for node in heads)
693 697 if tiprev > self.tiprev:
694 698 self.tipnode = cl.node(tiprev)
695 699 self.tiprev = tiprev
696 700 self.filteredhash = scmutil.filteredhash(
697 701 repo, self.tiprev, needobsolete=True
698 702 )
699 703
700 704 self.write(repo)
701 705
702 706
703 707 class remotebranchcache(_BaseBranchCache):
704 708 """Branchmap info for a remote connection, should not write locally"""
705 709
706 710 def __init__(
707 711 self,
708 712 repo: "localrepo.localrepository",
709 713 entries: Union[
710 714 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
711 715 ] = (),
712 716 closednodes: Optional[Set[bytes]] = None,
713 717 ) -> None:
714 718 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
715 719
716 720
717 721 # Revision branch info cache
718 722
719 723 _rbcversion = b'-v1'
720 724 _rbcnames = b'rbc-names' + _rbcversion
721 725 _rbcrevs = b'rbc-revs' + _rbcversion
722 726 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
723 727 _rbcrecfmt = b'>4sI'
724 728 _rbcrecsize = calcsize(_rbcrecfmt)
725 729 _rbcmininc = 64 * _rbcrecsize
726 730 _rbcnodelen = 4
727 731 _rbcbranchidxmask = 0x7FFFFFFF
728 732 _rbccloseflag = 0x80000000
729 733
730 734
731 735 class rbcrevs:
732 736 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
733 737
734 738 def __init__(self, revs):
735 739 self._prefix = revs
736 740 self._rest = bytearray()
737 741
738 742 def __len__(self):
739 743 return len(self._prefix) + len(self._rest)
740 744
741 745 def unpack_record(self, rbcrevidx):
742 746 if rbcrevidx < len(self._prefix):
743 747 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
744 748 else:
745 749 return unpack_from(
746 750 _rbcrecfmt,
747 751 util.buffer(self._rest),
748 752 rbcrevidx - len(self._prefix),
749 753 )
750 754
751 755 def make_mutable(self):
752 756 if len(self._prefix) > 0:
753 757 entirety = bytearray()
754 758 entirety[:] = self._prefix
755 759 entirety.extend(self._rest)
756 760 self._rest = entirety
757 761 self._prefix = bytearray()
758 762
759 763 def truncate(self, pos):
760 764 self.make_mutable()
761 765 del self._rest[pos:]
762 766
763 767 def pack_into(self, rbcrevidx, node, branchidx):
764 768 if rbcrevidx < len(self._prefix):
765 769 self.make_mutable()
766 770 buf = self._rest
767 771 start_offset = rbcrevidx - len(self._prefix)
768 772 end_offset = start_offset + _rbcrecsize
769 773
770 774 if len(self._rest) < end_offset:
771 775 # bytearray doesn't allocate extra space at least in Python 3.7.
772 776 # When multiple changesets are added in a row, precise resize would
773 777 # result in quadratic complexity. Overallocate to compensate by
774 778 # using the classic doubling technique for dynamic arrays instead.
775 779 # If there was a gap in the map before, less space will be reserved.
776 780 self._rest.extend(b'\0' * end_offset)
777 781 return pack_into(
778 782 _rbcrecfmt,
779 783 buf,
780 784 start_offset,
781 785 node,
782 786 branchidx,
783 787 )
784 788
785 789 def extend(self, extension):
786 790 return self._rest.extend(extension)
787 791
788 792 def slice(self, begin, end):
789 793 if begin < len(self._prefix):
790 794 acc = bytearray()
791 795 acc[:] = self._prefix[begin:end]
792 796 acc.extend(
793 797 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
794 798 )
795 799 return acc
796 800 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
797 801
798 802
799 803 class revbranchcache:
800 804 """Persistent cache, mapping from revision number to branch name and close.
801 805 This is a low level cache, independent of filtering.
802 806
803 807 Branch names are stored in rbc-names in internal encoding separated by 0.
804 808 rbc-names is append-only, and each branch name is only stored once and will
805 809 thus have a unique index.
806 810
807 811 The branch info for each revision is stored in rbc-revs as constant size
808 812 records. The whole file is read into memory, but it is only 'parsed' on
809 813 demand. The file is usually append-only but will be truncated if repo
810 814 modification is detected.
811 815 The record for each revision contains the first 4 bytes of the
812 816 corresponding node hash, and the record is only used if it still matches.
813 817 Even a completely trashed rbc-revs fill thus still give the right result
814 818 while converging towards full recovery ... assuming no incorrectly matching
815 819 node hashes.
816 820 The record also contains 4 bytes where 31 bits contains the index of the
817 821 branch and the last bit indicate that it is a branch close commit.
818 822 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
819 823 and will grow with it but be 1/8th of its size.
820 824 """
821 825
822 826 def __init__(self, repo, readonly=True):
823 827 assert repo.filtername is None
824 828 self._repo = repo
825 829 self._names = [] # branch names in local encoding with static index
826 830 self._rbcrevs = rbcrevs(bytearray())
827 831 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
828 832 try:
829 833 bndata = repo.cachevfs.read(_rbcnames)
830 834 self._rbcsnameslen = len(bndata) # for verification before writing
831 835 if bndata:
832 836 self._names = [
833 837 encoding.tolocal(bn) for bn in bndata.split(b'\0')
834 838 ]
835 839 except (IOError, OSError):
836 840 if readonly:
837 841 # don't try to use cache - fall back to the slow path
838 842 self.branchinfo = self._branchinfo
839 843
840 844 if self._names:
841 845 try:
842 846 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
843 847 with repo.cachevfs(_rbcrevs) as fp:
844 848 data = util.buffer(util.mmapread(fp))
845 849 else:
846 850 data = repo.cachevfs.read(_rbcrevs)
847 851 self._rbcrevs = rbcrevs(data)
848 852 except (IOError, OSError) as inst:
849 853 repo.ui.debug(
850 854 b"couldn't read revision branch cache: %s\n"
851 855 % stringutil.forcebytestr(inst)
852 856 )
853 857 # remember number of good records on disk
854 858 self._rbcrevslen = min(
855 859 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
856 860 )
857 861 if self._rbcrevslen == 0:
858 862 self._names = []
859 863 self._rbcnamescount = len(self._names) # number of names read at
860 864 # _rbcsnameslen
861 865
862 866 def _clear(self):
863 867 self._rbcsnameslen = 0
864 868 del self._names[:]
865 869 self._rbcnamescount = 0
866 870 self._rbcrevslen = len(self._repo.changelog)
867 871 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
868 872 util.clearcachedproperty(self, b'_namesreverse')
869 873
870 874 @util.propertycache
871 875 def _namesreverse(self):
872 876 return {b: r for r, b in enumerate(self._names)}
873 877
874 878 def branchinfo(self, rev):
875 879 """Return branch name and close flag for rev, using and updating
876 880 persistent cache."""
877 881 changelog = self._repo.changelog
878 882 rbcrevidx = rev * _rbcrecsize
879 883
880 884 # avoid negative index, changelog.read(nullrev) is fast without cache
881 885 if rev == nullrev:
882 886 return changelog.branchinfo(rev)
883 887
884 888 # if requested rev isn't allocated, grow and cache the rev info
885 889 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
886 890 return self._branchinfo(rev)
887 891
888 892 # fast path: extract data from cache, use it if node is matching
889 893 reponode = changelog.node(rev)[:_rbcnodelen]
890 894 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
891 895 close = bool(branchidx & _rbccloseflag)
892 896 if close:
893 897 branchidx &= _rbcbranchidxmask
894 898 if cachenode == b'\0\0\0\0':
895 899 pass
896 900 elif cachenode == reponode:
897 901 try:
898 902 return self._names[branchidx], close
899 903 except IndexError:
900 904 # recover from invalid reference to unknown branch
901 905 self._repo.ui.debug(
902 906 b"referenced branch names not found"
903 907 b" - rebuilding revision branch cache from scratch\n"
904 908 )
905 909 self._clear()
906 910 else:
907 911 # rev/node map has changed, invalidate the cache from here up
908 912 self._repo.ui.debug(
909 913 b"history modification detected - truncating "
910 914 b"revision branch cache to revision %d\n" % rev
911 915 )
912 916 truncate = rbcrevidx + _rbcrecsize
913 917 self._rbcrevs.truncate(truncate)
914 918 self._rbcrevslen = min(self._rbcrevslen, truncate)
915 919
916 920 # fall back to slow path and make sure it will be written to disk
917 921 return self._branchinfo(rev)
918 922
919 923 def _branchinfo(self, rev):
920 924 """Retrieve branch info from changelog and update _rbcrevs"""
921 925 changelog = self._repo.changelog
922 926 b, close = changelog.branchinfo(rev)
923 927 if b in self._namesreverse:
924 928 branchidx = self._namesreverse[b]
925 929 else:
926 930 branchidx = len(self._names)
927 931 self._names.append(b)
928 932 self._namesreverse[b] = branchidx
929 933 reponode = changelog.node(rev)
930 934 if close:
931 935 branchidx |= _rbccloseflag
932 936 self._setcachedata(rev, reponode, branchidx)
933 937 return b, close
934 938
935 939 def setdata(self, rev, changelogrevision):
936 940 """add new data information to the cache"""
937 941 branch, close = changelogrevision.branchinfo
938 942
939 943 if branch in self._namesreverse:
940 944 branchidx = self._namesreverse[branch]
941 945 else:
942 946 branchidx = len(self._names)
943 947 self._names.append(branch)
944 948 self._namesreverse[branch] = branchidx
945 949 if close:
946 950 branchidx |= _rbccloseflag
947 951 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
948 952 # If no cache data were readable (non exists, bad permission, etc)
949 953 # the cache was bypassing itself by setting:
950 954 #
951 955 # self.branchinfo = self._branchinfo
952 956 #
953 957 # Since we now have data in the cache, we need to drop this bypassing.
954 958 if 'branchinfo' in vars(self):
955 959 del self.branchinfo
956 960
957 961 def _setcachedata(self, rev, node, branchidx):
958 962 """Writes the node's branch data to the in-memory cache data."""
959 963 if rev == nullrev:
960 964 return
961 965 rbcrevidx = rev * _rbcrecsize
962 966 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
963 967 self._rbcrevslen = min(self._rbcrevslen, rev)
964 968
965 969 tr = self._repo.currenttransaction()
966 970 if tr:
967 971 tr.addfinalize(b'write-revbranchcache', self.write)
968 972
969 973 def write(self, tr=None):
970 974 """Save branch cache if it is dirty."""
971 975 repo = self._repo
972 976 wlock = None
973 977 step = b''
974 978 try:
975 979 # write the new names
976 980 if self._rbcnamescount < len(self._names):
977 981 wlock = repo.wlock(wait=False)
978 982 step = b' names'
979 983 self._writenames(repo)
980 984
981 985 # write the new revs
982 986 start = self._rbcrevslen * _rbcrecsize
983 987 if start != len(self._rbcrevs):
984 988 step = b''
985 989 if wlock is None:
986 990 wlock = repo.wlock(wait=False)
987 991 self._writerevs(repo, start)
988 992
989 993 except (IOError, OSError, error.Abort, error.LockError) as inst:
990 994 repo.ui.debug(
991 995 b"couldn't write revision branch cache%s: %s\n"
992 996 % (step, stringutil.forcebytestr(inst))
993 997 )
994 998 finally:
995 999 if wlock is not None:
996 1000 wlock.release()
997 1001
998 1002 def _writenames(self, repo):
999 1003 """write the new branch names to revbranchcache"""
1000 1004 if self._rbcnamescount != 0:
1001 1005 f = repo.cachevfs.open(_rbcnames, b'ab')
1002 1006 if f.tell() == self._rbcsnameslen:
1003 1007 f.write(b'\0')
1004 1008 else:
1005 1009 f.close()
1006 1010 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1007 1011 self._rbcnamescount = 0
1008 1012 self._rbcrevslen = 0
1009 1013 if self._rbcnamescount == 0:
1010 1014 # before rewriting names, make sure references are removed
1011 1015 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1012 1016 f = repo.cachevfs.open(_rbcnames, b'wb')
1013 1017 f.write(
1014 1018 b'\0'.join(
1015 1019 encoding.fromlocal(b)
1016 1020 for b in self._names[self._rbcnamescount :]
1017 1021 )
1018 1022 )
1019 1023 self._rbcsnameslen = f.tell()
1020 1024 f.close()
1021 1025 self._rbcnamescount = len(self._names)
1022 1026
1023 1027 def _writerevs(self, repo, start):
1024 1028 """write the new revs to revbranchcache"""
1025 1029 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1026 1030 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1027 1031 if f.tell() != start:
1028 1032 repo.ui.debug(
1029 1033 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1030 1034 )
1031 1035 f.seek(start)
1032 1036 if f.tell() != start:
1033 1037 start = 0
1034 1038 f.seek(start)
1035 1039 f.truncate()
1036 1040 end = revs * _rbcrecsize
1037 1041 f.write(self._rbcrevs.slice(start, end))
1038 1042 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now