##// END OF EJS Templates
branchcache: pass a "verify_node" attribut to __init__ instead of hasnode...
marmoute -
r52345:55158761 default
parent child Browse files
Show More
@@ -1,964 +1,973 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import struct
10 10
11 11 from .node import (
12 12 bin,
13 13 hex,
14 14 nullrev,
15 15 )
16 16
17 17 from typing import (
18 18 Callable,
19 19 Dict,
20 20 Iterable,
21 21 List,
22 22 Optional,
23 23 Set,
24 24 TYPE_CHECKING,
25 25 Tuple,
26 26 Union,
27 27 )
28 28
29 29 from . import (
30 30 encoding,
31 31 error,
32 32 obsolete,
33 33 scmutil,
34 34 util,
35 35 )
36 36
37 37 from .utils import (
38 38 repoviewutil,
39 39 stringutil,
40 40 )
41 41
42 42 if TYPE_CHECKING:
43 43 from . import localrepo
44 44
45 45 assert [localrepo]
46 46
47 47 subsettable = repoviewutil.subsettable
48 48
49 49 calcsize = struct.calcsize
50 50 pack_into = struct.pack_into
51 51 unpack_from = struct.unpack_from
52 52
53 53
54 54 class BranchMapCache:
55 55 """mapping of filtered views of repo with their branchcache"""
56 56
57 57 def __init__(self):
58 58 self._per_filter = {}
59 59
60 60 def __getitem__(self, repo):
61 61 self.updatecache(repo)
62 62 bcache = self._per_filter[repo.filtername]
63 63 assert bcache._filtername == repo.filtername, (
64 64 bcache._filtername,
65 65 repo.filtername,
66 66 )
67 67 return bcache
68 68
69 69 def update_disk(self, repo):
70 70 """ensure and up-to-date cache is (or will be) written on disk
71 71
72 72 The cache for this repository view is updated if needed and written on
73 73 disk.
74 74
75 75 If a transaction is in progress, the writing is schedule to transaction
76 76 close. See the `BranchMapCache.write_delayed` method.
77 77
78 78 This method exist independently of __getitem__ as it is sometime useful
79 79 to signal that we have no intend to use the data in memory yet.
80 80 """
81 81 self.updatecache(repo)
82 82 bcache = self._per_filter[repo.filtername]
83 83 assert bcache._filtername == repo.filtername, (
84 84 bcache._filtername,
85 85 repo.filtername,
86 86 )
87 87 bcache.write(repo)
88 88
89 89 def updatecache(self, repo):
90 90 """Update the cache for the given filtered view on a repository"""
91 91 # This can trigger updates for the caches for subsets of the filtered
92 92 # view, e.g. when there is no cache for this filtered view or the cache
93 93 # is stale.
94 94
95 95 cl = repo.changelog
96 96 filtername = repo.filtername
97 97 bcache = self._per_filter.get(filtername)
98 98 if bcache is None or not bcache.validfor(repo):
99 99 # cache object missing or cache object stale? Read from disk
100 100 bcache = branchcache.fromfile(repo)
101 101
102 102 revs = []
103 103 if bcache is None:
104 104 # no (fresh) cache available anymore, perhaps we can re-use
105 105 # the cache for a subset, then extend that to add info on missing
106 106 # revisions.
107 107 subsetname = subsettable.get(filtername)
108 108 if subsetname is not None:
109 109 subset = repo.filtered(subsetname)
110 110 bcache = self[subset].copy(repo)
111 111 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
112 112 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
113 113 else:
114 114 # nothing to fall back on, start empty.
115 115 bcache = branchcache(repo)
116 116
117 117 revs.extend(cl.revs(start=bcache.tiprev + 1))
118 118 if revs:
119 119 bcache.update(repo, revs)
120 120
121 121 assert bcache.validfor(repo), filtername
122 122 self._per_filter[repo.filtername] = bcache
123 123
124 124 def replace(self, repo, remotebranchmap):
125 125 """Replace the branchmap cache for a repo with a branch mapping.
126 126
127 127 This is likely only called during clone with a branch map from a
128 128 remote.
129 129
130 130 """
131 131 cl = repo.changelog
132 132 clrev = cl.rev
133 133 clbranchinfo = cl.branchinfo
134 134 rbheads = []
135 135 closed = set()
136 136 for bheads in remotebranchmap.values():
137 137 rbheads += bheads
138 138 for h in bheads:
139 139 r = clrev(h)
140 140 b, c = clbranchinfo(r)
141 141 if c:
142 142 closed.add(h)
143 143
144 144 if rbheads:
145 145 rtiprev = max((int(clrev(node)) for node in rbheads))
146 146 cache = branchcache(
147 147 repo,
148 148 remotebranchmap,
149 149 repo[rtiprev].node(),
150 150 rtiprev,
151 151 closednodes=closed,
152 152 )
153 153
154 154 # Try to stick it as low as possible
155 155 # filter above served are unlikely to be fetch from a clone
156 156 for candidate in (b'base', b'immutable', b'served'):
157 157 rview = repo.filtered(candidate)
158 158 if cache.validfor(rview):
159 159 cache = self._per_filter[candidate] = cache.copy(rview)
160 160 cache.write(rview)
161 161 return
162 162
163 163 def clear(self):
164 164 self._per_filter.clear()
165 165
166 166 def write_delayed(self, repo):
167 167 unfi = repo.unfiltered()
168 168 for filtername, cache in self._per_filter.items():
169 169 if cache._delayed:
170 170 repo = unfi.filtered(filtername)
171 171 cache.write(repo)
172 172
173 173
174 174 def _unknownnode(node):
175 175 """raises ValueError when branchcache found a node which does not exists"""
176 176 raise ValueError('node %s does not exist' % node.hex())
177 177
178 178
179 179 def _branchcachedesc(repo):
180 180 if repo.filtername is not None:
181 181 return b'branch cache (%s)' % repo.filtername
182 182 else:
183 183 return b'branch cache'
184 184
185 185
186 186 class branchcache:
187 187 """A dict like object that hold branches heads cache.
188 188
189 189 This cache is used to avoid costly computations to determine all the
190 190 branch heads of a repo.
191 191
192 192 The cache is serialized on disk in the following format:
193 193
194 194 <tip hex node> <tip rev number> [optional filtered repo hex hash]
195 195 <branch head hex node> <open/closed state> <branch name>
196 196 <branch head hex node> <open/closed state> <branch name>
197 197 ...
198 198
199 199 The first line is used to check if the cache is still valid. If the
200 200 branch cache is for a filtered repo view, an optional third hash is
201 201 included that hashes the hashes of all filtered and obsolete revisions.
202 202
203 203 The open/closed state is represented by a single letter 'o' or 'c'.
204 204 This field can be used to avoid changelog reads when determining if a
205 205 branch head closes a branch or not.
206 206 """
207 207
208 208 def __init__(
209 209 self,
210 210 repo: "localrepo.localrepository",
211 211 entries: Union[
212 212 Dict[bytes, List[bytes]], Iterable[Tuple[bytes, List[bytes]]]
213 213 ] = (),
214 214 tipnode: Optional[bytes] = None,
215 215 tiprev: Optional[int] = nullrev,
216 216 filteredhash: Optional[bytes] = None,
217 217 closednodes: Optional[Set[bytes]] = None,
218 218 hasnode: Optional[Callable[[bytes], bool]] = None,
219 verify_node: bool = False,
219 220 ) -> None:
220 221 """hasnode is a function which can be used to verify whether changelog
221 222 has a given node or not. If it's not provided, we assume that every node
222 223 we have exists in changelog"""
223 224 self._filtername = repo.filtername
224 225 self._delayed = False
225 226 if tipnode is None:
226 227 self.tipnode = repo.nullid
227 228 else:
228 229 self.tipnode = tipnode
229 230 self.tiprev = tiprev
230 231 self.filteredhash = filteredhash
231 232 # closednodes is a set of nodes that close their branch. If the branch
232 233 # cache has been updated, it may contain nodes that are no longer
233 234 # heads.
234 235 if closednodes is None:
235 236 self._closednodes = set()
236 237 else:
237 238 self._closednodes = closednodes
238 239 self._entries = dict(entries)
240 # Do we need to verify branch at all ?
241 self._verify_node = verify_node
239 242 # whether closed nodes are verified or not
240 243 self._closedverified = False
241 244 # branches for which nodes are verified
242 245 self._verifiedbranches = set()
243 self._hasnode = hasnode
244 if self._hasnode is None:
245 self._hasnode = lambda x: True
246 self._hasnode = None
247 if self._verify_node:
248 self._hasnode = repo.changelog.hasnode
246 249
247 250 def _verifyclosed(self):
248 251 """verify the closed nodes we have"""
252 if not self._verify_node:
253 return
249 254 if self._closedverified:
250 255 return
256 assert self._hasnode is not None
251 257 for node in self._closednodes:
252 258 if not self._hasnode(node):
253 259 _unknownnode(node)
254 260
255 261 self._closedverified = True
256 262
257 263 def _verifybranch(self, branch):
258 264 """verify head nodes for the given branch."""
265 if not self._verify_node:
266 return
259 267 if branch not in self._entries or branch in self._verifiedbranches:
260 268 return
269 assert self._hasnode is not None
261 270 for n in self._entries[branch]:
262 271 if not self._hasnode(n):
263 272 _unknownnode(n)
264 273
265 274 self._verifiedbranches.add(branch)
266 275
267 276 def _verifyall(self):
268 277 """verifies nodes of all the branches"""
269 278 needverification = set(self._entries.keys()) - self._verifiedbranches
270 279 for b in needverification:
271 280 self._verifybranch(b)
272 281
273 282 def __iter__(self):
274 283 return iter(self._entries)
275 284
276 285 def __setitem__(self, key, value):
277 286 self._entries[key] = value
278 287
279 288 def __getitem__(self, key):
280 289 self._verifybranch(key)
281 290 return self._entries[key]
282 291
283 292 def __contains__(self, key):
284 293 self._verifybranch(key)
285 294 return key in self._entries
286 295
287 296 def iteritems(self):
288 297 for k, v in self._entries.items():
289 298 self._verifybranch(k)
290 299 yield k, v
291 300
292 301 items = iteritems
293 302
294 303 def hasbranch(self, label):
295 304 """checks whether a branch of this name exists or not"""
296 305 self._verifybranch(label)
297 306 return label in self._entries
298 307
299 308 @classmethod
300 309 def fromfile(cls, repo):
301 310 f = None
302 311 try:
303 312 f = repo.cachevfs(cls._filename(repo))
304 313 lineiter = iter(f)
305 314 cachekey = next(lineiter).rstrip(b'\n').split(b" ", 2)
306 315 last, lrev = cachekey[:2]
307 316 last, lrev = bin(last), int(lrev)
308 317 filteredhash = None
309 hasnode = repo.changelog.hasnode
310 318 if len(cachekey) > 2:
311 319 filteredhash = bin(cachekey[2])
312 320 bcache = cls(
313 321 repo,
314 322 tipnode=last,
315 323 tiprev=lrev,
316 324 filteredhash=filteredhash,
317 hasnode=hasnode,
325 verify_node=True,
318 326 )
319 327 if not bcache.validfor(repo):
320 328 # invalidate the cache
321 329 raise ValueError('tip differs')
322 330 bcache.load(repo, lineiter)
323 331 except (IOError, OSError):
324 332 return None
325 333
326 334 except Exception as inst:
327 335 if repo.ui.debugflag:
328 336 msg = b'invalid %s: %s\n'
329 337 repo.ui.debug(
330 338 msg
331 339 % (
332 340 _branchcachedesc(repo),
333 341 stringutil.forcebytestr(inst),
334 342 )
335 343 )
336 344 bcache = None
337 345
338 346 finally:
339 347 if f:
340 348 f.close()
341 349
342 350 return bcache
343 351
344 352 def load(self, repo, lineiter):
345 353 """fully loads the branchcache by reading from the file using the line
346 354 iterator passed"""
347 355 for line in lineiter:
348 356 line = line.rstrip(b'\n')
349 357 if not line:
350 358 continue
351 359 node, state, label = line.split(b" ", 2)
352 360 if state not in b'oc':
353 361 raise ValueError('invalid branch state')
354 362 label = encoding.tolocal(label.strip())
355 363 node = bin(node)
356 364 self._entries.setdefault(label, []).append(node)
357 365 if state == b'c':
358 366 self._closednodes.add(node)
359 367
360 368 @staticmethod
361 369 def _filename(repo):
362 370 """name of a branchcache file for a given repo or repoview"""
363 371 filename = b"branch2"
364 372 if repo.filtername:
365 373 filename = b'%s-%s' % (filename, repo.filtername)
366 374 return filename
367 375
368 376 def validfor(self, repo):
369 377 """check that cache contents are valid for (a subset of) this repo
370 378
371 379 - False when the order of changesets changed or if we detect a strip.
372 380 - True when cache is up-to-date for the current repo or its subset."""
373 381 try:
374 382 node = repo.changelog.node(self.tiprev)
375 383 except IndexError:
376 384 # changesets were stripped and now we don't even have enough to
377 385 # find tiprev
378 386 return False
379 387 if self.tipnode != node:
380 388 # tiprev doesn't correspond to tipnode: repo was stripped, or this
381 389 # repo has a different order of changesets
382 390 return False
383 391 tiphash = scmutil.filteredhash(repo, self.tiprev, needobsolete=True)
384 392 # hashes don't match if this repo view has a different set of filtered
385 393 # revisions (e.g. due to phase changes) or obsolete revisions (e.g.
386 394 # history was rewritten)
387 395 return self.filteredhash == tiphash
388 396
389 397 def _branchtip(self, heads):
390 398 """Return tuple with last open head in heads and false,
391 399 otherwise return last closed head and true."""
392 400 tip = heads[-1]
393 401 closed = True
394 402 for h in reversed(heads):
395 403 if h not in self._closednodes:
396 404 tip = h
397 405 closed = False
398 406 break
399 407 return tip, closed
400 408
401 409 def branchtip(self, branch):
402 410 """Return the tipmost open head on branch head, otherwise return the
403 411 tipmost closed head on branch.
404 412 Raise KeyError for unknown branch."""
405 413 return self._branchtip(self[branch])[0]
406 414
407 415 def iteropen(self, nodes):
408 416 return (n for n in nodes if n not in self._closednodes)
409 417
410 418 def branchheads(self, branch, closed=False):
411 419 self._verifybranch(branch)
412 420 heads = self._entries[branch]
413 421 if not closed:
414 422 heads = list(self.iteropen(heads))
415 423 return heads
416 424
417 425 def iterbranches(self):
418 426 for bn, heads in self.items():
419 427 yield (bn, heads) + self._branchtip(heads)
420 428
421 429 def iterheads(self):
422 430 """returns all the heads"""
423 431 self._verifyall()
424 432 return self._entries.values()
425 433
426 434 def copy(self, repo):
427 435 """return an deep copy of the branchcache object"""
428 436 return type(self)(
429 437 repo,
430 438 self._entries,
431 439 self.tipnode,
432 440 self.tiprev,
433 441 self.filteredhash,
434 442 self._closednodes,
443 verify_node=self._verify_node,
435 444 )
436 445
437 446 def write(self, repo):
438 447 assert self._filtername == repo.filtername, (
439 448 self._filtername,
440 449 repo.filtername,
441 450 )
442 451 tr = repo.currenttransaction()
443 452 if not getattr(tr, 'finalized', True):
444 453 # Avoid premature writing.
445 454 #
446 455 # (The cache warming setup by localrepo will update the file later.)
447 456 self._delayed = True
448 457 return
449 458 try:
450 459 filename = self._filename(repo)
451 460 with repo.cachevfs(filename, b"w", atomictemp=True) as f:
452 461 cachekey = [hex(self.tipnode), b'%d' % self.tiprev]
453 462 if self.filteredhash is not None:
454 463 cachekey.append(hex(self.filteredhash))
455 464 f.write(b" ".join(cachekey) + b'\n')
456 465 nodecount = 0
457 466 for label, nodes in sorted(self._entries.items()):
458 467 label = encoding.fromlocal(label)
459 468 for node in nodes:
460 469 nodecount += 1
461 470 if node in self._closednodes:
462 471 state = b'c'
463 472 else:
464 473 state = b'o'
465 474 f.write(b"%s %s %s\n" % (hex(node), state, label))
466 475 repo.ui.log(
467 476 b'branchcache',
468 477 b'wrote %s with %d labels and %d nodes\n',
469 478 _branchcachedesc(repo),
470 479 len(self._entries),
471 480 nodecount,
472 481 )
473 482 self._delayed = False
474 483 except (IOError, OSError, error.Abort) as inst:
475 484 # Abort may be raised by read only opener, so log and continue
476 485 repo.ui.debug(
477 486 b"couldn't write branch cache: %s\n"
478 487 % stringutil.forcebytestr(inst)
479 488 )
480 489
481 490 def update(self, repo, revgen):
482 491 """Given a branchhead cache, self, that may have extra nodes or be
483 492 missing heads, and a generator of nodes that are strictly a superset of
484 493 heads missing, this function updates self to be correct.
485 494 """
486 495 assert self._filtername == repo.filtername, (
487 496 self._filtername,
488 497 repo.filtername,
489 498 )
490 499 starttime = util.timer()
491 500 cl = repo.changelog
492 501 # collect new branch entries
493 502 newbranches = {}
494 503 getbranchinfo = repo.revbranchcache().branchinfo
495 504 for r in revgen:
496 505 branch, closesbranch = getbranchinfo(r)
497 506 newbranches.setdefault(branch, []).append(r)
498 507 if closesbranch:
499 508 self._closednodes.add(cl.node(r))
500 509
501 510 # new tip revision which we found after iterating items from new
502 511 # branches
503 512 ntiprev = self.tiprev
504 513
505 514 # Delay fetching the topological heads until they are needed.
506 515 # A repository without non-continous branches can skip this part.
507 516 topoheads = None
508 517
509 518 # If a changeset is visible, its parents must be visible too, so
510 519 # use the faster unfiltered parent accessor.
511 520 parentrevs = repo.unfiltered().changelog.parentrevs
512 521
513 522 # Faster than using ctx.obsolete()
514 523 obsrevs = obsolete.getrevs(repo, b'obsolete')
515 524
516 525 for branch, newheadrevs in newbranches.items():
517 526 # For every branch, compute the new branchheads.
518 527 # A branchhead is a revision such that no descendant is on
519 528 # the same branch.
520 529 #
521 530 # The branchheads are computed iteratively in revision order.
522 531 # This ensures topological order, i.e. parents are processed
523 532 # before their children. Ancestors are inclusive here, i.e.
524 533 # any revision is an ancestor of itself.
525 534 #
526 535 # Core observations:
527 536 # - The current revision is always a branchhead for the
528 537 # repository up to that point.
529 538 # - It is the first revision of the branch if and only if
530 539 # there was no branchhead before. In that case, it is the
531 540 # only branchhead as there are no possible ancestors on
532 541 # the same branch.
533 542 # - If a parent is on the same branch, a branchhead can
534 543 # only be an ancestor of that parent, if it is parent
535 544 # itself. Otherwise it would have been removed as ancestor
536 545 # of that parent before.
537 546 # - Therefore, if all parents are on the same branch, they
538 547 # can just be removed from the branchhead set.
539 548 # - If one parent is on the same branch and the other is not
540 549 # and there was exactly one branchhead known, the existing
541 550 # branchhead can only be an ancestor if it is the parent.
542 551 # Otherwise it would have been removed as ancestor of
543 552 # the parent before. The other parent therefore can't have
544 553 # a branchhead as ancestor.
545 554 # - In all other cases, the parents on different branches
546 555 # could have a branchhead as ancestor. Those parents are
547 556 # kept in the "uncertain" set. If all branchheads are also
548 557 # topological heads, they can't have descendants and further
549 558 # checks can be skipped. Otherwise, the ancestors of the
550 559 # "uncertain" set are removed from branchheads.
551 560 # This computation is heavy and avoided if at all possible.
552 561 bheads = self._entries.get(branch, [])
553 562 bheadset = {cl.rev(node) for node in bheads}
554 563 uncertain = set()
555 564 for newrev in sorted(newheadrevs):
556 565 if newrev in obsrevs:
557 566 # We ignore obsolete changesets as they shouldn't be
558 567 # considered heads.
559 568 continue
560 569
561 570 if not bheadset:
562 571 bheadset.add(newrev)
563 572 continue
564 573
565 574 parents = [p for p in parentrevs(newrev) if p != nullrev]
566 575 samebranch = set()
567 576 otherbranch = set()
568 577 obsparents = set()
569 578 for p in parents:
570 579 if p in obsrevs:
571 580 # We ignored this obsolete changeset earlier, but now
572 581 # that it has non-ignored children, we need to make
573 582 # sure their ancestors are not considered heads. To
574 583 # achieve that, we will simply treat this obsolete
575 584 # changeset as a parent from other branch.
576 585 obsparents.add(p)
577 586 elif p in bheadset or getbranchinfo(p)[0] == branch:
578 587 samebranch.add(p)
579 588 else:
580 589 otherbranch.add(p)
581 590 if not (len(bheadset) == len(samebranch) == 1):
582 591 uncertain.update(otherbranch)
583 592 uncertain.update(obsparents)
584 593 bheadset.difference_update(samebranch)
585 594 bheadset.add(newrev)
586 595
587 596 if uncertain:
588 597 if topoheads is None:
589 598 topoheads = set(cl.headrevs())
590 599 if bheadset - topoheads:
591 600 floorrev = min(bheadset)
592 601 if floorrev <= max(uncertain):
593 602 ancestors = set(cl.ancestors(uncertain, floorrev))
594 603 bheadset -= ancestors
595 604 if bheadset:
596 605 self[branch] = [cl.node(rev) for rev in sorted(bheadset)]
597 606 tiprev = max(newheadrevs)
598 607 if tiprev > ntiprev:
599 608 ntiprev = tiprev
600 609
601 610 if ntiprev > self.tiprev:
602 611 self.tiprev = ntiprev
603 612 self.tipnode = cl.node(ntiprev)
604 613
605 614 if not self.validfor(repo):
606 615 # old cache key is now invalid for the repo, but we've just updated
607 616 # the cache and we assume it's valid, so let's make the cache key
608 617 # valid as well by recomputing it from the cached data
609 618 self.tipnode = repo.nullid
610 619 self.tiprev = nullrev
611 620 for heads in self.iterheads():
612 621 if not heads:
613 622 # all revisions on a branch are obsolete
614 623 continue
615 624 # note: tiprev is not necessarily the tip revision of repo,
616 625 # because the tip could be obsolete (i.e. not a head)
617 626 tiprev = max(cl.rev(node) for node in heads)
618 627 if tiprev > self.tiprev:
619 628 self.tipnode = cl.node(tiprev)
620 629 self.tiprev = tiprev
621 630 self.filteredhash = scmutil.filteredhash(
622 631 repo, self.tiprev, needobsolete=True
623 632 )
624 633
625 634 duration = util.timer() - starttime
626 635 repo.ui.log(
627 636 b'branchcache',
628 637 b'updated %s in %.4f seconds\n',
629 638 _branchcachedesc(repo),
630 639 duration,
631 640 )
632 641
633 642 self.write(repo)
634 643
635 644
636 645 class remotebranchcache(branchcache):
637 646 """Branchmap info for a remote connection, should not write locally"""
638 647
639 648 def write(self, repo):
640 649 pass
641 650
642 651
643 652 # Revision branch info cache
644 653
645 654 _rbcversion = b'-v1'
646 655 _rbcnames = b'rbc-names' + _rbcversion
647 656 _rbcrevs = b'rbc-revs' + _rbcversion
648 657 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
649 658 _rbcrecfmt = b'>4sI'
650 659 _rbcrecsize = calcsize(_rbcrecfmt)
651 660 _rbcmininc = 64 * _rbcrecsize
652 661 _rbcnodelen = 4
653 662 _rbcbranchidxmask = 0x7FFFFFFF
654 663 _rbccloseflag = 0x80000000
655 664
656 665
657 666 class rbcrevs:
658 667 """a byte string consisting of an immutable prefix followed by a mutable suffix"""
659 668
660 669 def __init__(self, revs):
661 670 self._prefix = revs
662 671 self._rest = bytearray()
663 672
664 673 def __len__(self):
665 674 return len(self._prefix) + len(self._rest)
666 675
667 676 def unpack_record(self, rbcrevidx):
668 677 if rbcrevidx < len(self._prefix):
669 678 return unpack_from(_rbcrecfmt, util.buffer(self._prefix), rbcrevidx)
670 679 else:
671 680 return unpack_from(
672 681 _rbcrecfmt,
673 682 util.buffer(self._rest),
674 683 rbcrevidx - len(self._prefix),
675 684 )
676 685
677 686 def make_mutable(self):
678 687 if len(self._prefix) > 0:
679 688 entirety = bytearray()
680 689 entirety[:] = self._prefix
681 690 entirety.extend(self._rest)
682 691 self._rest = entirety
683 692 self._prefix = bytearray()
684 693
685 694 def truncate(self, pos):
686 695 self.make_mutable()
687 696 del self._rest[pos:]
688 697
689 698 def pack_into(self, rbcrevidx, node, branchidx):
690 699 if rbcrevidx < len(self._prefix):
691 700 self.make_mutable()
692 701 buf = self._rest
693 702 start_offset = rbcrevidx - len(self._prefix)
694 703 end_offset = start_offset + _rbcrecsize
695 704
696 705 if len(self._rest) < end_offset:
697 706 # bytearray doesn't allocate extra space at least in Python 3.7.
698 707 # When multiple changesets are added in a row, precise resize would
699 708 # result in quadratic complexity. Overallocate to compensate by
700 709 # using the classic doubling technique for dynamic arrays instead.
701 710 # If there was a gap in the map before, less space will be reserved.
702 711 self._rest.extend(b'\0' * end_offset)
703 712 return pack_into(
704 713 _rbcrecfmt,
705 714 buf,
706 715 start_offset,
707 716 node,
708 717 branchidx,
709 718 )
710 719
711 720 def extend(self, extension):
712 721 return self._rest.extend(extension)
713 722
714 723 def slice(self, begin, end):
715 724 if begin < len(self._prefix):
716 725 acc = bytearray()
717 726 acc[:] = self._prefix[begin:end]
718 727 acc.extend(
719 728 self._rest[begin - len(self._prefix) : end - len(self._prefix)]
720 729 )
721 730 return acc
722 731 return self._rest[begin - len(self._prefix) : end - len(self._prefix)]
723 732
724 733
725 734 class revbranchcache:
726 735 """Persistent cache, mapping from revision number to branch name and close.
727 736 This is a low level cache, independent of filtering.
728 737
729 738 Branch names are stored in rbc-names in internal encoding separated by 0.
730 739 rbc-names is append-only, and each branch name is only stored once and will
731 740 thus have a unique index.
732 741
733 742 The branch info for each revision is stored in rbc-revs as constant size
734 743 records. The whole file is read into memory, but it is only 'parsed' on
735 744 demand. The file is usually append-only but will be truncated if repo
736 745 modification is detected.
737 746 The record for each revision contains the first 4 bytes of the
738 747 corresponding node hash, and the record is only used if it still matches.
739 748 Even a completely trashed rbc-revs fill thus still give the right result
740 749 while converging towards full recovery ... assuming no incorrectly matching
741 750 node hashes.
742 751 The record also contains 4 bytes where 31 bits contains the index of the
743 752 branch and the last bit indicate that it is a branch close commit.
744 753 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
745 754 and will grow with it but be 1/8th of its size.
746 755 """
747 756
748 757 def __init__(self, repo, readonly=True):
749 758 assert repo.filtername is None
750 759 self._repo = repo
751 760 self._names = [] # branch names in local encoding with static index
752 761 self._rbcrevs = rbcrevs(bytearray())
753 762 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
754 763 try:
755 764 bndata = repo.cachevfs.read(_rbcnames)
756 765 self._rbcsnameslen = len(bndata) # for verification before writing
757 766 if bndata:
758 767 self._names = [
759 768 encoding.tolocal(bn) for bn in bndata.split(b'\0')
760 769 ]
761 770 except (IOError, OSError):
762 771 if readonly:
763 772 # don't try to use cache - fall back to the slow path
764 773 self.branchinfo = self._branchinfo
765 774
766 775 if self._names:
767 776 try:
768 777 if repo.ui.configbool(b'format', b'mmap-revbranchcache'):
769 778 with repo.cachevfs(_rbcrevs) as fp:
770 779 data = util.buffer(util.mmapread(fp))
771 780 else:
772 781 data = repo.cachevfs.read(_rbcrevs)
773 782 self._rbcrevs = rbcrevs(data)
774 783 except (IOError, OSError) as inst:
775 784 repo.ui.debug(
776 785 b"couldn't read revision branch cache: %s\n"
777 786 % stringutil.forcebytestr(inst)
778 787 )
779 788 # remember number of good records on disk
780 789 self._rbcrevslen = min(
781 790 len(self._rbcrevs) // _rbcrecsize, len(repo.changelog)
782 791 )
783 792 if self._rbcrevslen == 0:
784 793 self._names = []
785 794 self._rbcnamescount = len(self._names) # number of names read at
786 795 # _rbcsnameslen
787 796
788 797 def _clear(self):
789 798 self._rbcsnameslen = 0
790 799 del self._names[:]
791 800 self._rbcnamescount = 0
792 801 self._rbcrevslen = len(self._repo.changelog)
793 802 self._rbcrevs = rbcrevs(bytearray(self._rbcrevslen * _rbcrecsize))
794 803 util.clearcachedproperty(self, b'_namesreverse')
795 804
796 805 @util.propertycache
797 806 def _namesreverse(self):
798 807 return {b: r for r, b in enumerate(self._names)}
799 808
800 809 def branchinfo(self, rev):
801 810 """Return branch name and close flag for rev, using and updating
802 811 persistent cache."""
803 812 changelog = self._repo.changelog
804 813 rbcrevidx = rev * _rbcrecsize
805 814
806 815 # avoid negative index, changelog.read(nullrev) is fast without cache
807 816 if rev == nullrev:
808 817 return changelog.branchinfo(rev)
809 818
810 819 # if requested rev isn't allocated, grow and cache the rev info
811 820 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
812 821 return self._branchinfo(rev)
813 822
814 823 # fast path: extract data from cache, use it if node is matching
815 824 reponode = changelog.node(rev)[:_rbcnodelen]
816 825 cachenode, branchidx = self._rbcrevs.unpack_record(rbcrevidx)
817 826 close = bool(branchidx & _rbccloseflag)
818 827 if close:
819 828 branchidx &= _rbcbranchidxmask
820 829 if cachenode == b'\0\0\0\0':
821 830 pass
822 831 elif cachenode == reponode:
823 832 try:
824 833 return self._names[branchidx], close
825 834 except IndexError:
826 835 # recover from invalid reference to unknown branch
827 836 self._repo.ui.debug(
828 837 b"referenced branch names not found"
829 838 b" - rebuilding revision branch cache from scratch\n"
830 839 )
831 840 self._clear()
832 841 else:
833 842 # rev/node map has changed, invalidate the cache from here up
834 843 self._repo.ui.debug(
835 844 b"history modification detected - truncating "
836 845 b"revision branch cache to revision %d\n" % rev
837 846 )
838 847 truncate = rbcrevidx + _rbcrecsize
839 848 self._rbcrevs.truncate(truncate)
840 849 self._rbcrevslen = min(self._rbcrevslen, truncate)
841 850
842 851 # fall back to slow path and make sure it will be written to disk
843 852 return self._branchinfo(rev)
844 853
845 854 def _branchinfo(self, rev):
846 855 """Retrieve branch info from changelog and update _rbcrevs"""
847 856 changelog = self._repo.changelog
848 857 b, close = changelog.branchinfo(rev)
849 858 if b in self._namesreverse:
850 859 branchidx = self._namesreverse[b]
851 860 else:
852 861 branchidx = len(self._names)
853 862 self._names.append(b)
854 863 self._namesreverse[b] = branchidx
855 864 reponode = changelog.node(rev)
856 865 if close:
857 866 branchidx |= _rbccloseflag
858 867 self._setcachedata(rev, reponode, branchidx)
859 868 return b, close
860 869
861 870 def setdata(self, rev, changelogrevision):
862 871 """add new data information to the cache"""
863 872 branch, close = changelogrevision.branchinfo
864 873
865 874 if branch in self._namesreverse:
866 875 branchidx = self._namesreverse[branch]
867 876 else:
868 877 branchidx = len(self._names)
869 878 self._names.append(branch)
870 879 self._namesreverse[branch] = branchidx
871 880 if close:
872 881 branchidx |= _rbccloseflag
873 882 self._setcachedata(rev, self._repo.changelog.node(rev), branchidx)
874 883 # If no cache data were readable (non exists, bad permission, etc)
875 884 # the cache was bypassing itself by setting:
876 885 #
877 886 # self.branchinfo = self._branchinfo
878 887 #
879 888 # Since we now have data in the cache, we need to drop this bypassing.
880 889 if 'branchinfo' in vars(self):
881 890 del self.branchinfo
882 891
883 892 def _setcachedata(self, rev, node, branchidx):
884 893 """Writes the node's branch data to the in-memory cache data."""
885 894 if rev == nullrev:
886 895 return
887 896 rbcrevidx = rev * _rbcrecsize
888 897 self._rbcrevs.pack_into(rbcrevidx, node, branchidx)
889 898 self._rbcrevslen = min(self._rbcrevslen, rev)
890 899
891 900 tr = self._repo.currenttransaction()
892 901 if tr:
893 902 tr.addfinalize(b'write-revbranchcache', self.write)
894 903
895 904 def write(self, tr=None):
896 905 """Save branch cache if it is dirty."""
897 906 repo = self._repo
898 907 wlock = None
899 908 step = b''
900 909 try:
901 910 # write the new names
902 911 if self._rbcnamescount < len(self._names):
903 912 wlock = repo.wlock(wait=False)
904 913 step = b' names'
905 914 self._writenames(repo)
906 915
907 916 # write the new revs
908 917 start = self._rbcrevslen * _rbcrecsize
909 918 if start != len(self._rbcrevs):
910 919 step = b''
911 920 if wlock is None:
912 921 wlock = repo.wlock(wait=False)
913 922 self._writerevs(repo, start)
914 923
915 924 except (IOError, OSError, error.Abort, error.LockError) as inst:
916 925 repo.ui.debug(
917 926 b"couldn't write revision branch cache%s: %s\n"
918 927 % (step, stringutil.forcebytestr(inst))
919 928 )
920 929 finally:
921 930 if wlock is not None:
922 931 wlock.release()
923 932
924 933 def _writenames(self, repo):
925 934 """write the new branch names to revbranchcache"""
926 935 if self._rbcnamescount != 0:
927 936 f = repo.cachevfs.open(_rbcnames, b'ab')
928 937 if f.tell() == self._rbcsnameslen:
929 938 f.write(b'\0')
930 939 else:
931 940 f.close()
932 941 repo.ui.debug(b"%s changed - rewriting it\n" % _rbcnames)
933 942 self._rbcnamescount = 0
934 943 self._rbcrevslen = 0
935 944 if self._rbcnamescount == 0:
936 945 # before rewriting names, make sure references are removed
937 946 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
938 947 f = repo.cachevfs.open(_rbcnames, b'wb')
939 948 f.write(
940 949 b'\0'.join(
941 950 encoding.fromlocal(b)
942 951 for b in self._names[self._rbcnamescount :]
943 952 )
944 953 )
945 954 self._rbcsnameslen = f.tell()
946 955 f.close()
947 956 self._rbcnamescount = len(self._names)
948 957
949 958 def _writerevs(self, repo, start):
950 959 """write the new revs to revbranchcache"""
951 960 revs = min(len(repo.changelog), len(self._rbcrevs) // _rbcrecsize)
952 961 with repo.cachevfs.open(_rbcrevs, b'ab') as f:
953 962 if f.tell() != start:
954 963 repo.ui.debug(
955 964 b"truncating cache/%s to %d\n" % (_rbcrevs, start)
956 965 )
957 966 f.seek(start)
958 967 if f.tell() != start:
959 968 start = 0
960 969 f.seek(start)
961 970 f.truncate()
962 971 end = revs * _rbcrecsize
963 972 f.write(self._rbcrevs.slice(start, end))
964 973 self._rbcrevslen = revs
General Comments 0
You need to be logged in to leave comments. Login now