##// END OF EJS Templates
branchmap: move __init__ up in branchcache class...
Pulkit Goyal -
r41826:bfc49f1d default
parent child Browse files
Show More
@@ -1,595 +1,596 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33
34 34 ### Nearest subset relation
35 35 # Nearest subset of filter X is a filter Y so that:
36 36 # * Y is included in X,
37 37 # * X - Y is as small as possible.
38 38 # This create and ordering used for branchmap purpose.
39 39 # the ordering may be partial
40 40 subsettable = {None: 'visible',
41 41 'visible-hidden': 'visible',
42 42 'visible': 'served',
43 43 'served': 'immutable',
44 44 'immutable': 'base'}
45 45
46 46
47 47 class BranchMapCache(object):
48 48 """Cache mapping"""
49 49 def __init__(self):
50 50 self._per_filter = {}
51 51
52 52 def __getitem__(self, repo):
53 53 self.updatecache(repo)
54 54 return self._per_filter[repo.filtername]
55 55
56 56 def updatecache(self, repo):
57 57 """Update the cache for the given filtered view on a repository"""
58 58 # This can trigger updates for the caches for subsets of the filtered
59 59 # view, e.g. when there is no cache for this filtered view or the cache
60 60 # is stale.
61 61
62 62 cl = repo.changelog
63 63 filtername = repo.filtername
64 64 bcache = self._per_filter.get(filtername)
65 65 if bcache is None or not bcache.validfor(repo):
66 66 # cache object missing or cache object stale? Read from disk
67 67 bcache = branchcache.fromfile(repo)
68 68
69 69 revs = []
70 70 if bcache is None:
71 71 # no (fresh) cache available anymore, perhaps we can re-use
72 72 # the cache for a subset, then extend that to add info on missing
73 73 # revisions.
74 74 subsetname = subsettable.get(filtername)
75 75 if subsetname is not None:
76 76 subset = repo.filtered(subsetname)
77 77 bcache = self[subset].copy()
78 78 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
79 79 revs.extend(r for r in extrarevs if r <= bcache.tiprev)
80 80 else:
81 81 # nothing to fall back on, start empty.
82 82 bcache = branchcache()
83 83
84 84 revs.extend(cl.revs(start=bcache.tiprev + 1))
85 85 if revs:
86 86 bcache.update(repo, revs)
87 87
88 88 assert bcache.validfor(repo), filtername
89 89 self._per_filter[repo.filtername] = bcache
90 90
91 91 def replace(self, repo, remotebranchmap):
92 92 """Replace the branchmap cache for a repo with a branch mapping.
93 93
94 94 This is likely only called during clone with a branch map from a
95 95 remote.
96 96
97 97 """
98 98 cl = repo.changelog
99 99 clrev = cl.rev
100 100 clbranchinfo = cl.branchinfo
101 101 rbheads = []
102 102 closed = []
103 103 for bheads in remotebranchmap.itervalues():
104 104 rbheads += bheads
105 105 for h in bheads:
106 106 r = clrev(h)
107 107 b, c = clbranchinfo(r)
108 108 if c:
109 109 closed.append(h)
110 110
111 111 if rbheads:
112 112 rtiprev = max((int(clrev(node)) for node in rbheads))
113 113 cache = branchcache(
114 114 remotebranchmap, repo[rtiprev].node(), rtiprev,
115 115 closednodes=closed)
116 116
117 117 # Try to stick it as low as possible
118 118 # filter above served are unlikely to be fetch from a clone
119 119 for candidate in ('base', 'immutable', 'served'):
120 120 rview = repo.filtered(candidate)
121 121 if cache.validfor(rview):
122 122 self._per_filter[candidate] = cache
123 123 cache.write(rview)
124 124 return
125 125
126 126 def clear(self):
127 127 self._per_filter.clear()
128 128
129 129
130 130 class branchcache(dict):
131 131 """A dict like object that hold branches heads cache.
132 132
133 133 This cache is used to avoid costly computations to determine all the
134 134 branch heads of a repo.
135 135
136 136 The cache is serialized on disk in the following format:
137 137
138 138 <tip hex node> <tip rev number> [optional filtered repo hex hash]
139 139 <branch head hex node> <open/closed state> <branch name>
140 140 <branch head hex node> <open/closed state> <branch name>
141 141 ...
142 142
143 143 The first line is used to check if the cache is still valid. If the
144 144 branch cache is for a filtered repo view, an optional third hash is
145 145 included that hashes the hashes of all filtered revisions.
146 146
147 147 The open/closed state is represented by a single letter 'o' or 'c'.
148 148 This field can be used to avoid changelog reads when determining if a
149 149 branch head closes a branch or not.
150 150 """
151
152 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
153 filteredhash=None, closednodes=None):
154 super(branchcache, self).__init__(entries)
155 self.tipnode = tipnode
156 self.tiprev = tiprev
157 self.filteredhash = filteredhash
158 # closednodes is a set of nodes that close their branch. If the branch
159 # cache has been updated, it may contain nodes that are no longer
160 # heads.
161 if closednodes is None:
162 self._closednodes = set()
163 else:
164 self._closednodes = closednodes
165
151 166 @classmethod
152 167 def fromfile(cls, repo):
153 168 f = None
154 169 try:
155 170 f = repo.cachevfs(cls._filename(repo))
156 171 lineiter = iter(f)
157 172 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
158 173 last, lrev = cachekey[:2]
159 174 last, lrev = bin(last), int(lrev)
160 175 filteredhash = None
161 176 if len(cachekey) > 2:
162 177 filteredhash = bin(cachekey[2])
163 178 bcache = cls(tipnode=last, tiprev=lrev, filteredhash=filteredhash)
164 179 if not bcache.validfor(repo):
165 180 # invalidate the cache
166 181 raise ValueError(r'tip differs')
167 182 cl = repo.changelog
168 183 for line in lineiter:
169 184 line = line.rstrip('\n')
170 185 if not line:
171 186 continue
172 187 node, state, label = line.split(" ", 2)
173 188 if state not in 'oc':
174 189 raise ValueError(r'invalid branch state')
175 190 label = encoding.tolocal(label.strip())
176 191 node = bin(node)
177 192 if not cl.hasnode(node):
178 193 raise ValueError(
179 194 r'node %s does not exist' % pycompat.sysstr(hex(node)))
180 195 bcache.setdefault(label, []).append(node)
181 196 if state == 'c':
182 197 bcache._closednodes.add(node)
183 198
184 199 except (IOError, OSError):
185 200 return None
186 201
187 202 except Exception as inst:
188 203 if repo.ui.debugflag:
189 204 msg = 'invalid branchheads cache'
190 205 if repo.filtername is not None:
191 206 msg += ' (%s)' % repo.filtername
192 207 msg += ': %s\n'
193 208 repo.ui.debug(msg % pycompat.bytestr(inst))
194 209 bcache = None
195 210
196 211 finally:
197 212 if f:
198 213 f.close()
199 214
200 215 return bcache
201 216
202 217 @staticmethod
203 218 def _filename(repo):
204 219 """name of a branchcache file for a given repo or repoview"""
205 220 filename = "branch2"
206 221 if repo.filtername:
207 222 filename = '%s-%s' % (filename, repo.filtername)
208 223 return filename
209 224
210 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
211 filteredhash=None, closednodes=None):
212 super(branchcache, self).__init__(entries)
213 self.tipnode = tipnode
214 self.tiprev = tiprev
215 self.filteredhash = filteredhash
216 # closednodes is a set of nodes that close their branch. If the branch
217 # cache has been updated, it may contain nodes that are no longer
218 # heads.
219 if closednodes is None:
220 self._closednodes = set()
221 else:
222 self._closednodes = closednodes
223
224 225 def validfor(self, repo):
225 226 """Is the cache content valid regarding a repo
226 227
227 228 - False when cached tipnode is unknown or if we detect a strip.
228 229 - True when cache is up to date or a subset of current repo."""
229 230 try:
230 231 return ((self.tipnode == repo.changelog.node(self.tiprev))
231 232 and (self.filteredhash == \
232 233 scmutil.filteredhash(repo, self.tiprev)))
233 234 except IndexError:
234 235 return False
235 236
236 237 def _branchtip(self, heads):
237 238 '''Return tuple with last open head in heads and false,
238 239 otherwise return last closed head and true.'''
239 240 tip = heads[-1]
240 241 closed = True
241 242 for h in reversed(heads):
242 243 if h not in self._closednodes:
243 244 tip = h
244 245 closed = False
245 246 break
246 247 return tip, closed
247 248
248 249 def branchtip(self, branch):
249 250 '''Return the tipmost open head on branch head, otherwise return the
250 251 tipmost closed head on branch.
251 252 Raise KeyError for unknown branch.'''
252 253 return self._branchtip(self[branch])[0]
253 254
254 255 def iteropen(self, nodes):
255 256 return (n for n in nodes if n not in self._closednodes)
256 257
257 258 def branchheads(self, branch, closed=False):
258 259 heads = self[branch]
259 260 if not closed:
260 261 heads = list(self.iteropen(heads))
261 262 return heads
262 263
263 264 def iterbranches(self):
264 265 for bn, heads in self.iteritems():
265 266 yield (bn, heads) + self._branchtip(heads)
266 267
267 268 def copy(self):
268 269 """return an deep copy of the branchcache object"""
269 270 return type(self)(
270 271 self, self.tipnode, self.tiprev, self.filteredhash,
271 272 self._closednodes)
272 273
273 274 def write(self, repo):
274 275 try:
275 276 f = repo.cachevfs(self._filename(repo), "w", atomictemp=True)
276 277 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
277 278 if self.filteredhash is not None:
278 279 cachekey.append(hex(self.filteredhash))
279 280 f.write(" ".join(cachekey) + '\n')
280 281 nodecount = 0
281 282 for label, nodes in sorted(self.iteritems()):
282 283 for node in nodes:
283 284 nodecount += 1
284 285 if node in self._closednodes:
285 286 state = 'c'
286 287 else:
287 288 state = 'o'
288 289 f.write("%s %s %s\n" % (hex(node), state,
289 290 encoding.fromlocal(label)))
290 291 f.close()
291 292 repo.ui.log('branchcache',
292 293 'wrote %s branch cache with %d labels and %d nodes\n',
293 294 repo.filtername, len(self), nodecount)
294 295 except (IOError, OSError, error.Abort) as inst:
295 296 # Abort may be raised by read only opener, so log and continue
296 297 repo.ui.debug("couldn't write branch cache: %s\n" %
297 298 stringutil.forcebytestr(inst))
298 299
299 300 def update(self, repo, revgen):
300 301 """Given a branchhead cache, self, that may have extra nodes or be
301 302 missing heads, and a generator of nodes that are strictly a superset of
302 303 heads missing, this function updates self to be correct.
303 304 """
304 305 starttime = util.timer()
305 306 cl = repo.changelog
306 307 # collect new branch entries
307 308 newbranches = {}
308 309 getbranchinfo = repo.revbranchcache().branchinfo
309 310 for r in revgen:
310 311 branch, closesbranch = getbranchinfo(r)
311 312 newbranches.setdefault(branch, []).append(r)
312 313 if closesbranch:
313 314 self._closednodes.add(cl.node(r))
314 315
315 316 # fetch current topological heads to speed up filtering
316 317 topoheads = set(cl.headrevs())
317 318
318 319 # if older branchheads are reachable from new ones, they aren't
319 320 # really branchheads. Note checking parents is insufficient:
320 321 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
321 322 for branch, newheadrevs in newbranches.iteritems():
322 323 bheads = self.setdefault(branch, [])
323 324 bheadset = set(cl.rev(node) for node in bheads)
324 325
325 326 # This have been tested True on all internal usage of this function.
326 327 # run it again in case of doubt
327 328 # assert not (set(bheadrevs) & set(newheadrevs))
328 329 bheadset.update(newheadrevs)
329 330
330 331 # This prunes out two kinds of heads - heads that are superseded by
331 332 # a head in newheadrevs, and newheadrevs that are not heads because
332 333 # an existing head is their descendant.
333 334 uncertain = bheadset - topoheads
334 335 if uncertain:
335 336 floorrev = min(uncertain)
336 337 ancestors = set(cl.ancestors(newheadrevs, floorrev))
337 338 bheadset -= ancestors
338 339 bheadrevs = sorted(bheadset)
339 340 self[branch] = [cl.node(rev) for rev in bheadrevs]
340 341 tiprev = bheadrevs[-1]
341 342 if tiprev > self.tiprev:
342 343 self.tipnode = cl.node(tiprev)
343 344 self.tiprev = tiprev
344 345
345 346 if not self.validfor(repo):
346 347 # cache key are not valid anymore
347 348 self.tipnode = nullid
348 349 self.tiprev = nullrev
349 350 for heads in self.values():
350 351 tiprev = max(cl.rev(node) for node in heads)
351 352 if tiprev > self.tiprev:
352 353 self.tipnode = cl.node(tiprev)
353 354 self.tiprev = tiprev
354 355 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
355 356
356 357 duration = util.timer() - starttime
357 358 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
358 359 repo.filtername, duration)
359 360
360 361 self.write(repo)
361 362
362 363
363 364 class remotebranchcache(branchcache):
364 365 """Branchmap info for a remote connection, should not write locally"""
365 366 def write(self, repo):
366 367 pass
367 368
368 369
369 370 # Revision branch info cache
370 371
371 372 _rbcversion = '-v1'
372 373 _rbcnames = 'rbc-names' + _rbcversion
373 374 _rbcrevs = 'rbc-revs' + _rbcversion
374 375 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
375 376 _rbcrecfmt = '>4sI'
376 377 _rbcrecsize = calcsize(_rbcrecfmt)
377 378 _rbcnodelen = 4
378 379 _rbcbranchidxmask = 0x7fffffff
379 380 _rbccloseflag = 0x80000000
380 381
381 382 class revbranchcache(object):
382 383 """Persistent cache, mapping from revision number to branch name and close.
383 384 This is a low level cache, independent of filtering.
384 385
385 386 Branch names are stored in rbc-names in internal encoding separated by 0.
386 387 rbc-names is append-only, and each branch name is only stored once and will
387 388 thus have a unique index.
388 389
389 390 The branch info for each revision is stored in rbc-revs as constant size
390 391 records. The whole file is read into memory, but it is only 'parsed' on
391 392 demand. The file is usually append-only but will be truncated if repo
392 393 modification is detected.
393 394 The record for each revision contains the first 4 bytes of the
394 395 corresponding node hash, and the record is only used if it still matches.
395 396 Even a completely trashed rbc-revs fill thus still give the right result
396 397 while converging towards full recovery ... assuming no incorrectly matching
397 398 node hashes.
398 399 The record also contains 4 bytes where 31 bits contains the index of the
399 400 branch and the last bit indicate that it is a branch close commit.
400 401 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
401 402 and will grow with it but be 1/8th of its size.
402 403 """
403 404
404 405 def __init__(self, repo, readonly=True):
405 406 assert repo.filtername is None
406 407 self._repo = repo
407 408 self._names = [] # branch names in local encoding with static index
408 409 self._rbcrevs = bytearray()
409 410 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
410 411 try:
411 412 bndata = repo.cachevfs.read(_rbcnames)
412 413 self._rbcsnameslen = len(bndata) # for verification before writing
413 414 if bndata:
414 415 self._names = [encoding.tolocal(bn)
415 416 for bn in bndata.split('\0')]
416 417 except (IOError, OSError):
417 418 if readonly:
418 419 # don't try to use cache - fall back to the slow path
419 420 self.branchinfo = self._branchinfo
420 421
421 422 if self._names:
422 423 try:
423 424 data = repo.cachevfs.read(_rbcrevs)
424 425 self._rbcrevs[:] = data
425 426 except (IOError, OSError) as inst:
426 427 repo.ui.debug("couldn't read revision branch cache: %s\n" %
427 428 stringutil.forcebytestr(inst))
428 429 # remember number of good records on disk
429 430 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
430 431 len(repo.changelog))
431 432 if self._rbcrevslen == 0:
432 433 self._names = []
433 434 self._rbcnamescount = len(self._names) # number of names read at
434 435 # _rbcsnameslen
435 436
436 437 def _clear(self):
437 438 self._rbcsnameslen = 0
438 439 del self._names[:]
439 440 self._rbcnamescount = 0
440 441 self._rbcrevslen = len(self._repo.changelog)
441 442 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
442 443 util.clearcachedproperty(self, '_namesreverse')
443 444
444 445 @util.propertycache
445 446 def _namesreverse(self):
446 447 return dict((b, r) for r, b in enumerate(self._names))
447 448
448 449 def branchinfo(self, rev):
449 450 """Return branch name and close flag for rev, using and updating
450 451 persistent cache."""
451 452 changelog = self._repo.changelog
452 453 rbcrevidx = rev * _rbcrecsize
453 454
454 455 # avoid negative index, changelog.read(nullrev) is fast without cache
455 456 if rev == nullrev:
456 457 return changelog.branchinfo(rev)
457 458
458 459 # if requested rev isn't allocated, grow and cache the rev info
459 460 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
460 461 return self._branchinfo(rev)
461 462
462 463 # fast path: extract data from cache, use it if node is matching
463 464 reponode = changelog.node(rev)[:_rbcnodelen]
464 465 cachenode, branchidx = unpack_from(
465 466 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
466 467 close = bool(branchidx & _rbccloseflag)
467 468 if close:
468 469 branchidx &= _rbcbranchidxmask
469 470 if cachenode == '\0\0\0\0':
470 471 pass
471 472 elif cachenode == reponode:
472 473 try:
473 474 return self._names[branchidx], close
474 475 except IndexError:
475 476 # recover from invalid reference to unknown branch
476 477 self._repo.ui.debug("referenced branch names not found"
477 478 " - rebuilding revision branch cache from scratch\n")
478 479 self._clear()
479 480 else:
480 481 # rev/node map has changed, invalidate the cache from here up
481 482 self._repo.ui.debug("history modification detected - truncating "
482 483 "revision branch cache to revision %d\n" % rev)
483 484 truncate = rbcrevidx + _rbcrecsize
484 485 del self._rbcrevs[truncate:]
485 486 self._rbcrevslen = min(self._rbcrevslen, truncate)
486 487
487 488 # fall back to slow path and make sure it will be written to disk
488 489 return self._branchinfo(rev)
489 490
490 491 def _branchinfo(self, rev):
491 492 """Retrieve branch info from changelog and update _rbcrevs"""
492 493 changelog = self._repo.changelog
493 494 b, close = changelog.branchinfo(rev)
494 495 if b in self._namesreverse:
495 496 branchidx = self._namesreverse[b]
496 497 else:
497 498 branchidx = len(self._names)
498 499 self._names.append(b)
499 500 self._namesreverse[b] = branchidx
500 501 reponode = changelog.node(rev)
501 502 if close:
502 503 branchidx |= _rbccloseflag
503 504 self._setcachedata(rev, reponode, branchidx)
504 505 return b, close
505 506
506 507 def setdata(self, branch, rev, node, close):
507 508 """add new data information to the cache"""
508 509 if branch in self._namesreverse:
509 510 branchidx = self._namesreverse[branch]
510 511 else:
511 512 branchidx = len(self._names)
512 513 self._names.append(branch)
513 514 self._namesreverse[branch] = branchidx
514 515 if close:
515 516 branchidx |= _rbccloseflag
516 517 self._setcachedata(rev, node, branchidx)
517 518 # If no cache data were readable (non exists, bad permission, etc)
518 519 # the cache was bypassing itself by setting:
519 520 #
520 521 # self.branchinfo = self._branchinfo
521 522 #
522 523 # Since we now have data in the cache, we need to drop this bypassing.
523 524 if r'branchinfo' in vars(self):
524 525 del self.branchinfo
525 526
526 527 def _setcachedata(self, rev, node, branchidx):
527 528 """Writes the node's branch data to the in-memory cache data."""
528 529 if rev == nullrev:
529 530 return
530 531 rbcrevidx = rev * _rbcrecsize
531 532 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
532 533 self._rbcrevs.extend('\0' *
533 534 (len(self._repo.changelog) * _rbcrecsize -
534 535 len(self._rbcrevs)))
535 536 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
536 537 self._rbcrevslen = min(self._rbcrevslen, rev)
537 538
538 539 tr = self._repo.currenttransaction()
539 540 if tr:
540 541 tr.addfinalize('write-revbranchcache', self.write)
541 542
542 543 def write(self, tr=None):
543 544 """Save branch cache if it is dirty."""
544 545 repo = self._repo
545 546 wlock = None
546 547 step = ''
547 548 try:
548 549 if self._rbcnamescount < len(self._names):
549 550 step = ' names'
550 551 wlock = repo.wlock(wait=False)
551 552 if self._rbcnamescount != 0:
552 553 f = repo.cachevfs.open(_rbcnames, 'ab')
553 554 if f.tell() == self._rbcsnameslen:
554 555 f.write('\0')
555 556 else:
556 557 f.close()
557 558 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
558 559 self._rbcnamescount = 0
559 560 self._rbcrevslen = 0
560 561 if self._rbcnamescount == 0:
561 562 # before rewriting names, make sure references are removed
562 563 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
563 564 f = repo.cachevfs.open(_rbcnames, 'wb')
564 565 f.write('\0'.join(encoding.fromlocal(b)
565 566 for b in self._names[self._rbcnamescount:]))
566 567 self._rbcsnameslen = f.tell()
567 568 f.close()
568 569 self._rbcnamescount = len(self._names)
569 570
570 571 start = self._rbcrevslen * _rbcrecsize
571 572 if start != len(self._rbcrevs):
572 573 step = ''
573 574 if wlock is None:
574 575 wlock = repo.wlock(wait=False)
575 576 revs = min(len(repo.changelog),
576 577 len(self._rbcrevs) // _rbcrecsize)
577 578 f = repo.cachevfs.open(_rbcrevs, 'ab')
578 579 if f.tell() != start:
579 580 repo.ui.debug("truncating cache/%s to %d\n"
580 581 % (_rbcrevs, start))
581 582 f.seek(start)
582 583 if f.tell() != start:
583 584 start = 0
584 585 f.seek(start)
585 586 f.truncate()
586 587 end = revs * _rbcrecsize
587 588 f.write(self._rbcrevs[start:end])
588 589 f.close()
589 590 self._rbcrevslen = revs
590 591 except (IOError, OSError, error.Abort, error.LockError) as inst:
591 592 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
592 593 % (step, stringutil.forcebytestr(inst)))
593 594 finally:
594 595 if wlock is not None:
595 596 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now