##// END OF EJS Templates
branchcache: do not copy the `_dirty` flag...
marmoute -
r52384:dd5b5f99 default
parent child Browse files
Show More
@@ -1,1051 +1,1048 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Any,
19 19 Callable,
20 20 Dict,
21 21 Iterable,
22 22 List,
23 23 Optional,
24 24 Set,
25 25 TYPE_CHECKING,
26 26 Tuple,
27 27 Union,
28 28 )
29 29
30 30 from . import (
31 31 encoding,
32 32 error,
33 33 obsolete,
34 34 scmutil,
35 35 util,
36 36 )
37 37
38 38 from .utils import (
39 39 repoviewutil,
40 40 stringutil,
41 41 )
42 42
43 43 if TYPE_CHECKING:
44 44 from . import localrepo
45 45
46 46 assert [localrepo]
47 47
48 48 subsettable = repoviewutil.subsettable
49 49
50 50 calcsize = struct.calcsize
51 51 pack_into = struct.pack_into
52 52 unpack_from = struct.unpack_from
53 53
54 54
55 55 class BranchMapCache:
56 56 """mapping of filtered views of repo with their branchcache"""
57 57
58 58 def __init__(self):
59 59 self._per_filter = {}
60 60
61 61 def __getitem__(self, repo):
62 62 self.updatecache(repo)
63 63 bcache = self._per_filter[repo.filtername]
64 64 assert bcache._filtername == repo.filtername, (
65 65 bcache._filtername,
66 66 repo.filtername,
67 67 )
68 68 return bcache
69 69
70 70 def update_disk(self, repo):
71 71 """ensure and up-to-date cache is (or will be) written on disk
72 72
73 73 The cache for this repository view is updated if needed and written on
74 74 disk.
75 75
76 76 If a transaction is in progress, the writing is schedule to transaction
77 77 close. See the `BranchMapCache.write_dirty` method.
78 78
79 79 This method exist independently of __getitem__ as it is sometime useful
80 80 to signal that we have no intend to use the data in memory yet.
81 81 """
82 82 self.updatecache(repo)
83 83 bcache = self._per_filter[repo.filtername]
84 84 assert bcache._filtername == repo.filtername, (
85 85 bcache._filtername,
86 86 repo.filtername,
87 87 )
88 88 bcache.write(repo)
89 89
90 90 def updatecache(self, repo):
91 91 """Update the cache for the given filtered view on a repository"""
92 92 # This can trigger updates for the caches for subsets of the filtered
93 93 # view, e.g. when there is no cache for this filtered view or the cache
94 94 # is stale.
95 95
96 96 cl = repo.changelog
97 97 filtername = repo.filtername
98 98 bcache = self._per_filter.get(filtername)
99 99 if bcache is None or not bcache.validfor(repo):
100 100 # cache object missing or cache object stale? Read from disk
101 101 bcache = branchcache.fromfile(repo)
102 102
103 103 revs = []
104 104 if bcache is None:
105 105 # no (fresh) cache available anymore, perhaps we can re-use
106 106 # the cache for a subset, then extend that to add info on missing
107 107 # revisions.
108 108 subsetname = subsettable.get(filtername)
109 109 if subsetname is not None:
110 110 subset = repo.filtered(subsetname)
111 111 bcache = self[subset].copy(repo)
112 112 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
113 113 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
114 114 else:
115 115 # nothing to fall back on, start empty.
116 116 bcache = branchcache(repo)
117 117
118 118 revs.extend(cl.revs(start=bcache.tiprev + 1))
119 119 if revs:
120 120 bcache.update(repo, revs)
121 121
122 122 assert bcache.validfor(repo), filtername
123 123 self._per_filter[repo.filtername] = bcache
124 124
125 125 def replace(self, repo, remotebranchmap):
126 126 """Replace the branchmap cache for a repo with a branch mapping.
127 127
128 128 This is likely only called during clone with a branch map from a
129 129 remote.
130 130
131 131 """
132 132 cl = repo.changelog
133 133 clrev = cl.rev
134 134 clbranchinfo = cl.branchinfo
135 135 rbheads = []
136 136 closed = set()
137 137 for bheads in remotebranchmap.values():
138 138 rbheads += bheads
139 139 for h in bheads:
140 140 r = clrev(h)
141 141 b, c = clbranchinfo(r)
142 142 if c:
143 143 closed.add(h)
144 144
145 145 if rbheads:
146 146 rtiprev = max((int(clrev(node)) for node in rbheads))
147 147 cache = branchcache(
148 148 repo,
149 149 remotebranchmap,
150 150 repo[rtiprev].node(),
151 151 rtiprev,
152 152 closednodes=closed,
153 153 )
154 154
155 155 # Try to stick it as low as possible
156 156 # filter above served are unlikely to be fetch from a clone
157 157 for candidate in (b'base', b'immutable', b'served'):
158 158 rview = repo.filtered(candidate)
159 159 if cache.validfor(rview):
160 160 cache._filtername = candidate
161 161 self._per_filter[candidate] = cache
162 162 cache._dirty = True
163 163 cache.write(rview)
164 164 return
165 165
166 166 def clear(self):
167 167 self._per_filter.clear()
168 168
169 169 def write_dirty(self, repo):
170 170 unfi = repo.unfiltered()
171 171 for filtername in repoviewutil.get_ordered_subset():
172 172 cache = self._per_filter.get(filtername)
173 173 if cache is None:
174 174 continue
175 175 if cache._dirty:
176 176 if filtername is None:
177 177 repo = unfi
178 178 else:
179 179 repo = unfi.filtered(filtername)
180 180 cache.write(repo)
181 181
182 182
183 183 def _unknownnode(node):
184 184 """raises ValueError when branchcache found a node which does not exists"""
185 185 raise ValueError('node %s does not exist' % node.hex())
186 186
187 187
188 188 def _branchcachedesc(repo):
189 189 if repo.filtername is not None:
190 190 return b'branch cache (%s)' % repo.filtername
191 191 else:
192 192 return b'branch cache'
193 193
194 194
195 195 class _BaseBranchCache:
196 196 """A dict like object that hold branches heads cache.
197 197
198 198 This cache is used to avoid costly computations to determine all the
199 199 branch heads of a repo.
200 200
201 201 The cache is serialized on disk in the following format:
202 202
203 203 <tip hex node> <tip rev number> [optional filtered repo hex hash]
204 204 <branch head hex node> <open/closed state> <branch name>
205 205 <branch head hex node> <open/closed state> <branch name>
206 206 ...
207 207
208 208 The first line is used to check if the cache is still valid. If the
209 209 branch cache is for a filtered repo view, an optional third hash is
210 210 included that hashes the hashes of all filtered and obsolete revisions.
211 211
212 212 The open/closed state is represented by a single letter 'o' or 'c'.
213 213 This field can be used to avoid changelog reads when determining if a
214 214 branch head closes a branch or not.
215 215 """
216 216
217 217 def __init__(
218 218 self,
219 219 repo: "localrepo.localrepository",
220 220 entries: Union[
221 221 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
222 222 ] = (),
223 223 closed_nodes: Optional[Set[bytes]] = None,
224 224 ) -> None:
225 225 """hasnode is a function which can be used to verify whether changelog
226 226 has a given node or not. If it's not provided, we assume that every node
227 227 we have exists in changelog"""
228 228 # closednodes is a set of nodes that close their branch. If the branch
229 229 # cache has been updated, it may contain nodes that are no longer
230 230 # heads.
231 231 if closed_nodes is None:
232 232 closed_nodes = set()
233 233 self._closednodes = set(closed_nodes)
234 234 self._entries = dict(entries)
235 235
236 236 def __iter__(self):
237 237 return iter(self._entries)
238 238
239 239 def __setitem__(self, key, value):
240 240 self._entries[key] = value
241 241
242 242 def __getitem__(self, key):
243 243 return self._entries[key]
244 244
245 245 def __contains__(self, key):
246 246 return key in self._entries
247 247
248 248 def iteritems(self):
249 249 return self._entries.items()
250 250
251 251 items = iteritems
252 252
253 253 def hasbranch(self, label):
254 254 """checks whether a branch of this name exists or not"""
255 255 return label in self._entries
256 256
257 257 def _branchtip(self, heads):
258 258 """Return tuple with last open head in heads and false,
259 259 otherwise return last closed head and true."""
260 260 tip = heads[-1]
261 261 closed = True
262 262 for h in reversed(heads):
263 263 if h not in self._closednodes:
264 264 tip = h
265 265 closed = False
266 266 break
267 267 return tip, closed
268 268
269 269 def branchtip(self, branch):
270 270 """Return the tipmost open head on branch head, otherwise return the
271 271 tipmost closed head on branch.
272 272 Raise KeyError for unknown branch."""
273 273 return self._branchtip(self[branch])[0]
274 274
275 275 def iteropen(self, nodes):
276 276 return (n for n in nodes if n not in self._closednodes)
277 277
278 278 def branchheads(self, branch, closed=False):
279 279 heads = self._entries[branch]
280 280 if not closed:
281 281 heads = list(self.iteropen(heads))
282 282 return heads
283 283
284 284 def iterbranches(self):
285 285 for bn, heads in self.items():
286 286 yield (bn, heads) + self._branchtip(heads)
287 287
288 288 def iterheads(self):
289 289 """returns all the heads"""
290 290 return self._entries.values()
291 291
292 292 def update(self, repo, revgen):
293 293 """Given a branchhead cache, self, that may have extra nodes or be
294 294 missing heads, and a generator of nodes that are strictly a superset of
295 295 heads missing, this function updates self to be correct.
296 296 """
297 297 starttime = util.timer()
298 298 cl = repo.changelog
299 299 # collect new branch entries
300 300 newbranches = {}
301 301 getbranchinfo = repo.revbranchcache().branchinfo
302 302 max_rev = -1
303 303 for r in revgen:
304 304 branch, closesbranch = getbranchinfo(r)
305 305 newbranches.setdefault(branch, []).append(r)
306 306 if closesbranch:
307 307 self._closednodes.add(cl.node(r))
308 308 max_rev = max(max_rev, r)
309 309 if max_rev < 0:
310 310 msg = "running branchcache.update without revision to update"
311 311 raise error.ProgrammingError(msg)
312 312
313 313 # Delay fetching the topological heads until they are needed.
314 314 # A repository without non-continous branches can skip this part.
315 315 topoheads = None
316 316
317 317 # If a changeset is visible, its parents must be visible too, so
318 318 # use the faster unfiltered parent accessor.
319 319 parentrevs = repo.unfiltered().changelog.parentrevs
320 320
321 321 # Faster than using ctx.obsolete()
322 322 obsrevs = obsolete.getrevs(repo, b'obsolete')
323 323
324 324 for branch, newheadrevs in newbranches.items():
325 325 # For every branch, compute the new branchheads.
326 326 # A branchhead is a revision such that no descendant is on
327 327 # the same branch.
328 328 #
329 329 # The branchheads are computed iteratively in revision order.
330 330 # This ensures topological order, i.e. parents are processed
331 331 # before their children. Ancestors are inclusive here, i.e.
332 332 # any revision is an ancestor of itself.
333 333 #
334 334 # Core observations:
335 335 # - The current revision is always a branchhead for the
336 336 # repository up to that point.
337 337 # - It is the first revision of the branch if and only if
338 338 # there was no branchhead before. In that case, it is the
339 339 # only branchhead as there are no possible ancestors on
340 340 # the same branch.
341 341 # - If a parent is on the same branch, a branchhead can
342 342 # only be an ancestor of that parent, if it is parent
343 343 # itself. Otherwise it would have been removed as ancestor
344 344 # of that parent before.
345 345 # - Therefore, if all parents are on the same branch, they
346 346 # can just be removed from the branchhead set.
347 347 # - If one parent is on the same branch and the other is not
348 348 # and there was exactly one branchhead known, the existing
349 349 # branchhead can only be an ancestor if it is the parent.
350 350 # Otherwise it would have been removed as ancestor of
351 351 # the parent before. The other parent therefore can't have
352 352 # a branchhead as ancestor.
353 353 # - In all other cases, the parents on different branches
354 354 # could have a branchhead as ancestor. Those parents are
355 355 # kept in the "uncertain" set. If all branchheads are also
356 356 # topological heads, they can't have descendants and further
357 357 # checks can be skipped. Otherwise, the ancestors of the
358 358 # "uncertain" set are removed from branchheads.
359 359 # This computation is heavy and avoided if at all possible.
360 360 bheads = self._entries.get(branch, [])
361 361 bheadset = {cl.rev(node) for node in bheads}
362 362 uncertain = set()
363 363 for newrev in sorted(newheadrevs):
364 364 if newrev in obsrevs:
365 365 # We ignore obsolete changesets as they shouldn't be
366 366 # considered heads.
367 367 continue
368 368
369 369 if not bheadset:
370 370 bheadset.add(newrev)
371 371 continue
372 372
373 373 parents = [p for p in parentrevs(newrev) if p != nullrev]
374 374 samebranch = set()
375 375 otherbranch = set()
376 376 obsparents = set()
377 377 for p in parents:
378 378 if p in obsrevs:
379 379 # We ignored this obsolete changeset earlier, but now
380 380 # that it has non-ignored children, we need to make
381 381 # sure their ancestors are not considered heads. To
382 382 # achieve that, we will simply treat this obsolete
383 383 # changeset as a parent from other branch.
384 384 obsparents.add(p)
385 385 elif p in bheadset or getbranchinfo(p)[0] == branch:
386 386 samebranch.add(p)
387 387 else:
388 388 otherbranch.add(p)
389 389 if not (len(bheadset) == len(samebranch) == 1):
390 390 uncertain.update(otherbranch)
391 391 uncertain.update(obsparents)
392 392 bheadset.difference_update(samebranch)
393 393 bheadset.add(newrev)
394 394
395 395 if uncertain:
396 396 if topoheads is None:
397 397 topoheads = set(cl.headrevs())
398 398 if bheadset - topoheads:
399 399 floorrev = min(bheadset)
400 400 if floorrev <= max(uncertain):
401 401 ancestors = set(cl.ancestors(uncertain, floorrev))
402 402 bheadset -= ancestors
403 403 if bheadset:
404 404 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
405 405
406 406 duration = util.timer() - starttime
407 407 repo.ui.log(
408 408 b'branchcache',
409 409 b'updated %s in %.4f seconds\n',
410 410 _branchcachedesc(repo),
411 411 duration,
412 412 )
413 413 return max_rev
414 414
415 415
416 416 class branchcache(_BaseBranchCache):
417 417 """Branchmap info for a local repo or repoview"""
418 418
419 419 _base_filename = b"branch2"
420 420
421 421 def __init__(
422 422 self,
423 423 repo: "localrepo.localrepository",
424 424 entries: Union[
425 425 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
426 426 ] = (),
427 427 tipnode: Optional[bytes] = None,
428 428 tiprev: Optional[int] = nullrev,
429 429 filteredhash: Optional[bytes] = None,
430 430 closednodes: Optional[Set[bytes]] = None,
431 431 hasnode: Optional[Callable[[bytes], bool]] = None,
432 432 verify_node: bool = False,
433 433 ) -> None:
434 434 """hasnode is a function which can be used to verify whether changelog
435 435 has a given node or not. If it's not provided, we assume that every node
436 436 we have exists in changelog"""
437 437 self._filtername = repo.filtername
438 438 if tipnode is None:
439 439 self.tipnode = repo.nullid
440 440 else:
441 441 self.tipnode = tipnode
442 442 self.tiprev = tiprev
443 443 self.filteredhash = filteredhash
444 444 self._dirty = False
445 445
446 446 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
447 447 # closednodes is a set of nodes that close their branch. If the branch
448 448 # cache has been updated, it may contain nodes that are no longer
449 449 # heads.
450 450
451 451 # Do we need to verify branch at all ?
452 452 self._verify_node = verify_node
453 453 # branches for which nodes are verified
454 454 self._verifiedbranches = set()
455 455 self._hasnode = None
456 456 if self._verify_node:
457 457 self._hasnode = repo.changelog.hasnode
458 458
459 459 def validfor(self, repo):
460 460 """check that cache contents are valid for (a subset of) this repo
461 461
462 462 - False when the order of changesets changed or if we detect a strip.
463 463 - True when cache is up-to-date for the current repo or its subset."""
464 464 try:
465 465 node = repo.changelog.node(self.tiprev)
466 466 except IndexError:
467 467 # changesets were stripped and now we don't even have enough to
468 468 # find tiprev
469 469 return False
470 470 if self.tipnode != node:
471 471 # tiprev doesn't correspond to tipnode: repo was stripped, or this
472 472 # repo has a different order of changesets
473 473 return False
474 474 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
475 475 # hashes don't match if this repo view has a different set of filtered
476 476 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
477 477 # history was rewritten)
478 478 return self.filteredhash == tiphash
479 479
480 480 @classmethod
481 481 def fromfile(cls, repo):
482 482 f = None
483 483 try:
484 484 f = repo.cachevfs(cls._filename(repo))
485 485 lineiter = iter(f)
486 486 init_kwargs = cls._load_header(repo, lineiter)
487 487 bcache = cls(
488 488 repo,
489 489 verify_node=True,
490 490 **init_kwargs,
491 491 )
492 492 if not bcache.validfor(repo):
493 493 # invalidate the cache
494 494 raise ValueError('tip differs')
495 495 bcache._load_heads(repo, lineiter)
496 496 except (IOError, OSError):
497 497 return None
498 498
499 499 except Exception as inst:
500 500 if repo.ui.debugflag:
501 501 msg = b'invalid %s: %s\n'
502 502 msg %= (
503 503 _branchcachedesc(repo),
504 504 stringutil.forcebytestr(inst),
505 505 )
506 506 repo.ui.debug(msg)
507 507 bcache = None
508 508
509 509 finally:
510 510 if f:
511 511 f.close()
512 512
513 513 return bcache
514 514
515 515 @classmethod
516 516 def _load_header(cls, repo, lineiter) -> "dict[str, Any]":
517 517 """parse the head of a branchmap file
518 518
519 519 return parameters to pass to a newly created class instance.
520 520 """
521 521 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
522 522 last, lrev = cachekey[:2]
523 523 last, lrev = bin(last), int(lrev)
524 524 filteredhash = None
525 525 if len(cachekey) > 2:
526 526 filteredhash = bin(cachekey[2])
527 527 return {
528 528 "tipnode": last,
529 529 "tiprev": lrev,
530 530 "filteredhash": filteredhash,
531 531 }
532 532
533 533 def _load_heads(self, repo, lineiter):
534 534 """fully loads the branchcache by reading from the file using the line
535 535 iterator passed"""
536 536 for line in lineiter:
537 537 line = line.rstrip(b'\n')
538 538 if not line:
539 539 continue
540 540 node, state, label = line.split(b" ", 2)
541 541 if state not in b'oc':
542 542 raise ValueError('invalid branch state')
543 543 label = encoding.tolocal(label.strip())
544 544 node = bin(node)
545 545 self._entries.setdefault(label, []).append(node)
546 546 if state == b'c':
547 547 self._closednodes.add(node)
548 548
549 549 @classmethod
550 550 def _filename(cls, repo):
551 551 """name of a branchcache file for a given repo or repoview"""
552 552 filename = cls._base_filename
553 553 if repo.filtername:
554 554 filename = b'%s-%s' % (filename, repo.filtername)
555 555 return filename
556 556
557 557 def copy(self, repo):
558 558 """return a deep copy of the branchcache object"""
559 559 assert repo.filtername != self._filtername
560 560 other = type(self)(
561 561 repo=repo,
562 562 # we always do a shally copy of self._entries, and the values is
563 563 # always replaced, so no need to deepcopy until the above remains
564 564 # true.
565 565 entries=self._entries,
566 566 tipnode=self.tipnode,
567 567 tiprev=self.tiprev,
568 568 filteredhash=self.filteredhash,
569 569 closednodes=set(self._closednodes),
570 570 verify_node=self._verify_node,
571 571 )
572 # we copy will likely schedule a write anyway, but that does not seems
573 # to hurt to overschedule
574 other._dirty = self._dirty
575 572 # also copy information about the current verification state
576 573 other._verifiedbranches = set(self._verifiedbranches)
577 574 return other
578 575
579 576 def write(self, repo):
580 577 assert self._filtername == repo.filtername, (
581 578 self._filtername,
582 579 repo.filtername,
583 580 )
584 581 tr = repo.currenttransaction()
585 582 if not getattr(tr, 'finalized', True):
586 583 # Avoid premature writing.
587 584 #
588 585 # (The cache warming setup by localrepo will update the file later.)
589 586 return
590 587 try:
591 588 filename = self._filename(repo)
592 589 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
593 590 self._write_header(f)
594 591 nodecount = self._write_heads(f)
595 592 repo.ui.log(
596 593 b'branchcache',
597 594 b'wrote %s with %d labels and %d nodes\n',
598 595 _branchcachedesc(repo),
599 596 len(self._entries),
600 597 nodecount,
601 598 )
602 599 self._dirty = False
603 600 except (IOError, OSError, error.Abort) as inst:
604 601 # Abort may be raised by read only opener, so log and continue
605 602 repo.ui.debug(
606 603 b"couldn't write branch cache: %s\n"
607 604 % stringutil.forcebytestr(inst)
608 605 )
609 606
610 607 def _write_header(self, fp) -> None:
611 608 """write the branch cache header to a file"""
612 609 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
613 610 if self.filteredhash is not None:
614 611 cachekey.append(hex(self.filteredhash))
615 612 fp.write(b" ".join(cachekey) + b'\n')
616 613
617 614 def _write_heads(self, fp) -> int:
618 615 """write list of heads to a file
619 616
620 617 Return the number of heads written."""
621 618 nodecount = 0
622 619 for label, nodes in sorted(self._entries.items()):
623 620 label = encoding.fromlocal(label)
624 621 for node in nodes:
625 622 nodecount += 1
626 623 if node in self._closednodes:
627 624 state = b'c'
628 625 else:
629 626 state = b'o'
630 627 fp.write(b"%s %s %s\n" % (hex(node), state, label))
631 628 return nodecount
632 629
633 630 def _verifybranch(self, branch):
634 631 """verify head nodes for the given branch."""
635 632 if not self._verify_node:
636 633 return
637 634 if branch not in self._entries or branch in self._verifiedbranches:
638 635 return
639 636 assert self._hasnode is not None
640 637 for n in self._entries[branch]:
641 638 if not self._hasnode(n):
642 639 _unknownnode(n)
643 640
644 641 self._verifiedbranches.add(branch)
645 642
646 643 def _verifyall(self):
647 644 """verifies nodes of all the branches"""
648 645 for b in self._entries.keys():
649 646 if b not in self._verifiedbranches:
650 647 self._verifybranch(b)
651 648
652 649 def __getitem__(self, key):
653 650 self._verifybranch(key)
654 651 return super().__getitem__(key)
655 652
656 653 def __contains__(self, key):
657 654 self._verifybranch(key)
658 655 return super().__contains__(key)
659 656
660 657 def iteritems(self):
661 658 self._verifyall()
662 659 return super().iteritems()
663 660
664 661 items = iteritems
665 662
666 663 def iterheads(self):
667 664 """returns all the heads"""
668 665 self._verifyall()
669 666 return super().iterheads()
670 667
671 668 def hasbranch(self, label):
672 669 """checks whether a branch of this name exists or not"""
673 670 self._verifybranch(label)
674 671 return super().hasbranch(label)
675 672
676 673 def branchheads(self, branch, closed=False):
677 674 self._verifybranch(branch)
678 675 return super().branchheads(branch, closed=closed)
679 676
680 677 def update(self, repo, revgen):
681 678 assert self._filtername == repo.filtername, (
682 679 self._filtername,
683 680 repo.filtername,
684 681 )
685 682 cl = repo.changelog
686 683 max_rev = super().update(repo, revgen)
687 684 # new tip revision which we found after iterating items from new
688 685 # branches
689 686 if max_rev is not None and max_rev > self.tiprev:
690 687 self.tiprev = max_rev
691 688 self.tipnode = cl.node(max_rev)
692 689
693 690 if not self.validfor(repo):
694 691 # old cache key is now invalid for the repo, but we've just updated
695 692 # the cache and we assume it's valid, so let's make the cache key
696 693 # valid as well by recomputing it from the cached data
697 694 self.tipnode = repo.nullid
698 695 self.tiprev = nullrev
699 696 for heads in self.iterheads():
700 697 if not heads:
701 698 # all revisions on a branch are obsolete
702 699 continue
703 700 # note: tiprev is not necessarily the tip revision of repo,
704 701 # because the tip could be obsolete (i.e. not a head)
705 702 tiprev = max(cl.rev(node) for node in heads)
706 703 if tiprev > self.tiprev:
707 704 self.tipnode = cl.node(tiprev)
708 705 self.tiprev = tiprev
709 706 self.filteredhash = scmutil.filteredhash(
710 707 repo, self.tiprev, needobsolete=True
711 708 )
712 709 self._dirty = True
713 710 self.write(repo)
714 711
715 712
716 713 class remotebranchcache(_BaseBranchCache):
717 714 """Branchmap info for a remote connection, should not write locally"""
718 715
719 716 def __init__(
720 717 self,
721 718 repo: "localrepo.localrepository",
722 719 entries: Union[
723 720 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
724 721 ] = (),
725 722 closednodes: Optional[Set[bytes]] = None,
726 723 ) -> None:
727 724 super().__init__(repo=repo, entries=entries, closed_nodes=closednodes)
728 725
729 726
730 727 # Revision branch info cache
731 728
732 729 _rbcversion = b'-v1'
733 730 _rbcnames = b'rbc-names' + _rbcversion
734 731 _rbcrevs = b'rbc-revs' + _rbcversion
735 732 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
736 733 _rbcrecfmt = b'>4sI'
737 734 _rbcrecsize = calcsize(_rbcrecfmt)
738 735 _rbcmininc = 64 * _rbcrecsize
739 736 _rbcnodelen = 4
740 737 _rbcbranchidxmask = 0x7FFFFFFF
741 738 _rbccloseflag = 0x80000000
742 739
743 740
744 741 class rbcrevs:
745 742 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
746 743
747 744 def __init__(self, revs):
748 745 self._prefix = revs
749 746 self._rest = bytearray()
750 747
751 748 def __len__(self):
752 749 return len(self._prefix) + len(self._rest)
753 750
754 751 def unpack_record(self, rbcrevidx):
755 752 if rbcrevidx < len(self._prefix):
756 753 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
757 754 else:
758 755 return unpack_from(
759 756 _rbcrecfmt,
760 757 util.buffer(self._rest),
761 758 rbcrevidx - len(self._prefix),
762 759 )
763 760
764 761 def make_mutable(self):
765 762 if len(self._prefix) > 0:
766 763 entirety = bytearray()
767 764 entirety[:] = self._prefix
768 765 entirety.extend(self._rest)
769 766 self._rest = entirety
770 767 self._prefix = bytearray()
771 768
772 769 def truncate(self, pos):
773 770 self.make_mutable()
774 771 del self._rest[pos:]
775 772
776 773 def pack_into(self, rbcrevidx, node, branchidx):
777 774 if rbcrevidx < len(self._prefix):
778 775 self.make_mutable()
779 776 buf = self._rest
780 777 start_offset = rbcrevidx - len(self._prefix)
781 778 end_offset = start_offset + _rbcrecsize
782 779
783 780 if len(self._rest) < end_offset:
784 781 # bytearray doesn't allocate extra space at least in Python 3.7.
785 782 # When multiple changesets are added in a row, precise resize would
786 783 # result in quadratic complexity. Overallocate to compensate by
787 784 # using the classic doubling technique for dynamic arrays instead.
788 785 # If there was a gap in the map before, less space will be reserved.
789 786 self._rest.extend(b'\0' * end_offset)
790 787 return pack_into(
791 788 _rbcrecfmt,
792 789 buf,
793 790 start_offset,
794 791 node,
795 792 branchidx,
796 793 )
797 794
798 795 def extend(self, extension):
799 796 return self._rest.extend(extension)
800 797
801 798 def slice(self, begin, end):
802 799 if begin < len(self._prefix):
803 800 acc = bytearray()
804 801 acc[:] = self._prefix[begin:end]
805 802 acc.extend(
806 803 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
807 804 )
808 805 return acc
809 806 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
810 807
811 808
812 809 class revbranchcache:
813 810 """Persistent cache, mapping from revision number to branch name and close.
814 811 This is a low level cache, independent of filtering.
815 812
816 813 Branch names are stored in rbc-names in internal encoding separated by 0.
817 814 rbc-names is append-only, and each branch name is only stored once and will
818 815 thus have a unique index.
819 816
820 817 The branch info for each revision is stored in rbc-revs as constant size
821 818 records. The whole file is read into memory, but it is only 'parsed' on
822 819 demand. The file is usually append-only but will be truncated if repo
823 820 modification is detected.
824 821 The record for each revision contains the first 4 bytes of the
825 822 corresponding node hash, and the record is only used if it still matches.
826 823 Even a completely trashed rbc-revs fill thus still give the right result
827 824 while converging towards full recovery ... assuming no incorrectly matching
828 825 node hashes.
829 826 The record also contains 4 bytes where 31 bits contains the index of the
830 827 branch and the last bit indicate that it is a branch close commit.
831 828 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
832 829 and will grow with it but be 1/8th of its size.
833 830 """
834 831
835 832 def __init__(self, repo, readonly=True):
836 833 assert repo.filtername is None
837 834 self._repo = repo
838 835 self._names = [] # branch names in local encoding with static index
839 836 self._rbcrevs = rbcrevs(bytearray())
840 837 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
841 838 try:
842 839 bndata = repo.cachevfs.read(_rbcnames)
843 840 self._rbcsnameslen = len(bndata) # for verification before writing
844 841 if bndata:
845 842 self._names = [
846 843 encoding.tolocal(bn) for bn in bndata.split(b'\0')
847 844 ]
848 845 except (IOError, OSError):
849 846 if readonly:
850 847 # don't try to use cache - fall back to the slow path
851 848 self.branchinfo = self._branchinfo
852 849
853 850 if self._names:
854 851 try:
855 852 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
856 853 with repo.cachevfs(_rbcrevs) as fp:
857 854 data = util.buffer(util.mmapread(fp))
858 855 else:
859 856 data = repo.cachevfs.read(_rbcrevs)
860 857 self._rbcrevs = rbcrevs(data)
861 858 except (IOError, OSError) as inst:
862 859 repo.ui.debug(
863 860 b"couldn't read revision branch cache: %s\n"
864 861 % stringutil.forcebytestr(inst)
865 862 )
866 863 # remember number of good records on disk
867 864 self._rbcrevslen = min(
868 865 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
869 866 )
870 867 if self._rbcrevslen == 0:
871 868 self._names = []
872 869 self._rbcnamescount = len(self._names) # number of names read at
873 870 # _rbcsnameslen
874 871
875 872 def _clear(self):
876 873 self._rbcsnameslen = 0
877 874 del self._names[:]
878 875 self._rbcnamescount = 0
879 876 self._rbcrevslen = len(self._repo.changelog)
880 877 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
881 878 util.clearcachedproperty(self, b'_namesreverse')
882 879
883 880 @util.propertycache
884 881 def _namesreverse(self):
885 882 return {b: r for r, b in enumerate(self._names)}
886 883
887 884 def branchinfo(self, rev):
888 885 """Return branch name and close flag for rev, using and updating
889 886 persistent cache."""
890 887 changelog = self._repo.changelog
891 888 rbcrevidx = rev * _rbcrecsize
892 889
893 890 # avoid negative index, changelog.read(nullrev) is fast without cache
894 891 if rev == nullrev:
895 892 return changelog.branchinfo(rev)
896 893
897 894 # if requested rev isn't allocated, grow and cache the rev info
898 895 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
899 896 return self._branchinfo(rev)
900 897
901 898 # fast path: extract data from cache, use it if node is matching
902 899 reponode = changelog.node(rev)[:_rbcnodelen]
903 900 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
904 901 close = bool(branchidx & _rbccloseflag)
905 902 if close:
906 903 branchidx &= _rbcbranchidxmask
907 904 if cachenode == b'\0\0\0\0':
908 905 pass
909 906 elif cachenode == reponode:
910 907 try:
911 908 return self._names[branchidx], close
912 909 except IndexError:
913 910 # recover from invalid reference to unknown branch
914 911 self._repo.ui.debug(
915 912 b"referenced branch names not found"
916 913 b" - rebuilding revision branch cache from scratch\n"
917 914 )
918 915 self._clear()
919 916 else:
920 917 # rev/node map has changed, invalidate the cache from here up
921 918 self._repo.ui.debug(
922 919 b"history modification detected - truncating "
923 920 b"revision branch cache to revision %d\n" % rev
924 921 )
925 922 truncate = rbcrevidx + _rbcrecsize
926 923 self._rbcrevs.truncate(truncate)
927 924 self._rbcrevslen = min(self._rbcrevslen, truncate)
928 925
929 926 # fall back to slow path and make sure it will be written to disk
930 927 return self._branchinfo(rev)
931 928
932 929 def _branchinfo(self, rev):
933 930 """Retrieve branch info from changelog and update _rbcrevs"""
934 931 changelog = self._repo.changelog
935 932 b, close = changelog.branchinfo(rev)
936 933 if b in self._namesreverse:
937 934 branchidx = self._namesreverse[b]
938 935 else:
939 936 branchidx = len(self._names)
940 937 self._names.append(b)
941 938 self._namesreverse[b] = branchidx
942 939 reponode = changelog.node(rev)
943 940 if close:
944 941 branchidx |= _rbccloseflag
945 942 self._setcachedata(rev, reponode, branchidx)
946 943 return b, close
947 944
948 945 def setdata(self, rev, changelogrevision):
949 946 """add new data information to the cache"""
950 947 branch, close = changelogrevision.branchinfo
951 948
952 949 if branch in self._namesreverse:
953 950 branchidx = self._namesreverse[branch]
954 951 else:
955 952 branchidx = len(self._names)
956 953 self._names.append(branch)
957 954 self._namesreverse[branch] = branchidx
958 955 if close:
959 956 branchidx |= _rbccloseflag
960 957 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
961 958 # If no cache data were readable (non exists, bad permission, etc)
962 959 # the cache was bypassing itself by setting:
963 960 #
964 961 # self.branchinfo = self._branchinfo
965 962 #
966 963 # Since we now have data in the cache, we need to drop this bypassing.
967 964 if 'branchinfo' in vars(self):
968 965 del self.branchinfo
969 966
970 967 def _setcachedata(self, rev, node, branchidx):
971 968 """Writes the node's branch data to the in-memory cache data."""
972 969 if rev == nullrev:
973 970 return
974 971 rbcrevidx = rev * _rbcrecsize
975 972 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
976 973 self._rbcrevslen = min(self._rbcrevslen, rev)
977 974
978 975 tr = self._repo.currenttransaction()
979 976 if tr:
980 977 tr.addfinalize(b'write-revbranchcache', self.write)
981 978
982 979 def write(self, tr=None):
983 980 """Save branch cache if it is dirty."""
984 981 repo = self._repo
985 982 wlock = None
986 983 step = b''
987 984 try:
988 985 # write the new names
989 986 if self._rbcnamescount < len(self._names):
990 987 wlock = repo.wlock(wait=False)
991 988 step = b' names'
992 989 self._writenames(repo)
993 990
994 991 # write the new revs
995 992 start = self._rbcrevslen * _rbcrecsize
996 993 if start != len(self._rbcrevs):
997 994 step = b''
998 995 if wlock is None:
999 996 wlock = repo.wlock(wait=False)
1000 997 self._writerevs(repo, start)
1001 998
1002 999 except (IOError, OSError, error.Abort, error.LockError) as inst:
1003 1000 repo.ui.debug(
1004 1001 b"couldn't write revision branch cache%s: %s\n"
1005 1002 % (step, stringutil.forcebytestr(inst))
1006 1003 )
1007 1004 finally:
1008 1005 if wlock is not None:
1009 1006 wlock.release()
1010 1007
1011 1008 def _writenames(self, repo):
1012 1009 """write the new branch names to revbranchcache"""
1013 1010 if self._rbcnamescount != 0:
1014 1011 f = repo.cachevfs.open(_rbcnames, b'ab')
1015 1012 if f.tell() == self._rbcsnameslen:
1016 1013 f.write(b'\0')
1017 1014 else:
1018 1015 f.close()
1019 1016 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
1020 1017 self._rbcnamescount = 0
1021 1018 self._rbcrevslen = 0
1022 1019 if self._rbcnamescount == 0:
1023 1020 # before rewriting names, make sure references are removed
1024 1021 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
1025 1022 f = repo.cachevfs.open(_rbcnames, b'wb')
1026 1023 f.write(
1027 1024 b'\0'.join(
1028 1025 encoding.fromlocal(b)
1029 1026 for b in self._names[self._rbcnamescount :]
1030 1027 )
1031 1028 )
1032 1029 self._rbcsnameslen = f.tell()
1033 1030 f.close()
1034 1031 self._rbcnamescount = len(self._names)
1035 1032
1036 1033 def _writerevs(self, repo, start):
1037 1034 """write the new revs to revbranchcache"""
1038 1035 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
1039 1036 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
1040 1037 if f.tell() != start:
1041 1038 repo.ui.debug(
1042 1039 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
1043 1040 )
1044 1041 f.seek(start)
1045 1042 if f.tell() != start:
1046 1043 start = 0
1047 1044 f.seek(start)
1048 1045 f.truncate()
1049 1046 end = revs * _rbcrecsize
1050 1047 f.write(self._rbcrevs.slice(start, end))
1051 1048 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now