##// END OF EJS Templates
branchmap: avoid changelog and attribute lookups in replacecache()...
Gregory Szorc -
r40411:76d4272b default
parent child Browse files
Show More
@@ -1,556 +1,559 b''
1 1 # branchmap.py - logic to computes, maintain and stores branchmap for local repo
2 2 #
3 3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import struct
11 11
12 12 from .node import (
13 13 bin,
14 14 hex,
15 15 nullid,
16 16 nullrev,
17 17 )
18 18 from . import (
19 19 encoding,
20 20 error,
21 21 pycompat,
22 22 scmutil,
23 23 util,
24 24 )
25 25 from .utils import (
26 26 stringutil,
27 27 )
28 28
29 29 calcsize = struct.calcsize
30 30 pack_into = struct.pack_into
31 31 unpack_from = struct.unpack_from
32 32
33 33 def _filename(repo):
34 34 """name of a branchcache file for a given repo or repoview"""
35 35 filename = "branch2"
36 36 if repo.filtername:
37 37 filename = '%s-%s' % (filename, repo.filtername)
38 38 return filename
39 39
40 40 def read(repo):
41 41 f = None
42 42 try:
43 43 f = repo.cachevfs(_filename(repo))
44 44 lineiter = iter(f)
45 45 cachekey = next(lineiter).rstrip('\n').split(" ", 2)
46 46 last, lrev = cachekey[:2]
47 47 last, lrev = bin(last), int(lrev)
48 48 filteredhash = None
49 49 if len(cachekey) > 2:
50 50 filteredhash = bin(cachekey[2])
51 51 partial = branchcache(tipnode=last, tiprev=lrev,
52 52 filteredhash=filteredhash)
53 53 if not partial.validfor(repo):
54 54 # invalidate the cache
55 55 raise ValueError(r'tip differs')
56 56 cl = repo.changelog
57 57 for l in lineiter:
58 58 l = l.rstrip('\n')
59 59 if not l:
60 60 continue
61 61 node, state, label = l.split(" ", 2)
62 62 if state not in 'oc':
63 63 raise ValueError(r'invalid branch state')
64 64 label = encoding.tolocal(label.strip())
65 65 node = bin(node)
66 66 if not cl.hasnode(node):
67 67 raise ValueError(
68 68 r'node %s does not exist' % pycompat.sysstr(hex(node)))
69 69 partial.setdefault(label, []).append(node)
70 70 if state == 'c':
71 71 partial._closednodes.add(node)
72 72
73 73 except (IOError, OSError):
74 74 return None
75 75
76 76 except Exception as inst:
77 77 if repo.ui.debugflag:
78 78 msg = 'invalid branchheads cache'
79 79 if repo.filtername is not None:
80 80 msg += ' (%s)' % repo.filtername
81 81 msg += ': %s\n'
82 82 repo.ui.debug(msg % pycompat.bytestr(inst))
83 83 partial = None
84 84
85 85 finally:
86 86 if f:
87 87 f.close()
88 88
89 89 return partial
90 90
91 91 ### Nearest subset relation
92 92 # Nearest subset of filter X is a filter Y so that:
93 93 # * Y is included in X,
94 94 # * X - Y is as small as possible.
95 95 # This create and ordering used for branchmap purpose.
96 96 # the ordering may be partial
97 97 subsettable = {None: 'visible',
98 98 'visible-hidden': 'visible',
99 99 'visible': 'served',
100 100 'served': 'immutable',
101 101 'immutable': 'base'}
102 102
103 103 def updatecache(repo):
104 104 cl = repo.changelog
105 105 filtername = repo.filtername
106 106 partial = repo._branchcaches.get(filtername)
107 107
108 108 revs = []
109 109 if partial is None or not partial.validfor(repo):
110 110 partial = read(repo)
111 111 if partial is None:
112 112 subsetname = subsettable.get(filtername)
113 113 if subsetname is None:
114 114 partial = branchcache()
115 115 else:
116 116 subset = repo.filtered(subsetname)
117 117 partial = subset.branchmap().copy()
118 118 extrarevs = subset.changelog.filteredrevs - cl.filteredrevs
119 119 revs.extend(r for r in extrarevs if r <= partial.tiprev)
120 120 revs.extend(cl.revs(start=partial.tiprev + 1))
121 121 if revs:
122 122 partial.update(repo, revs)
123 123 partial.write(repo)
124 124
125 125 assert partial.validfor(repo), filtername
126 126 repo._branchcaches[repo.filtername] = partial
127 127
128 128 def replacecache(repo, bm):
129 129 """Replace the branchmap cache for a repo with a branch mapping.
130 130
131 131 This is likely only called during clone with a branch map from a remote.
132 132 """
133 cl = repo.changelog
134 clrev = cl.rev
135 clbranchinfo = cl.branchinfo
133 136 rbheads = []
134 137 closed = []
135 138 for bheads in bm.itervalues():
136 139 rbheads.extend(bheads)
137 140 for h in bheads:
138 r = repo.changelog.rev(h)
139 b, c = repo.changelog.branchinfo(r)
141 r = clrev(h)
142 b, c = clbranchinfo(r)
140 143 if c:
141 144 closed.append(h)
142 145
143 146 if rbheads:
144 rtiprev = max((int(repo.changelog.rev(node))
147 rtiprev = max((int(clrev(node))
145 148 for node in rbheads))
146 149 cache = branchcache(bm,
147 150 repo[rtiprev].node(),
148 151 rtiprev,
149 152 closednodes=closed)
150 153
151 154 # Try to stick it as low as possible
152 155 # filter above served are unlikely to be fetch from a clone
153 156 for candidate in ('base', 'immutable', 'served'):
154 157 rview = repo.filtered(candidate)
155 158 if cache.validfor(rview):
156 159 repo._branchcaches[candidate] = cache
157 160 cache.write(rview)
158 161 break
159 162
160 163 class branchcache(dict):
161 164 """A dict like object that hold branches heads cache.
162 165
163 166 This cache is used to avoid costly computations to determine all the
164 167 branch heads of a repo.
165 168
166 169 The cache is serialized on disk in the following format:
167 170
168 171 <tip hex node> <tip rev number> [optional filtered repo hex hash]
169 172 <branch head hex node> <open/closed state> <branch name>
170 173 <branch head hex node> <open/closed state> <branch name>
171 174 ...
172 175
173 176 The first line is used to check if the cache is still valid. If the
174 177 branch cache is for a filtered repo view, an optional third hash is
175 178 included that hashes the hashes of all filtered revisions.
176 179
177 180 The open/closed state is represented by a single letter 'o' or 'c'.
178 181 This field can be used to avoid changelog reads when determining if a
179 182 branch head closes a branch or not.
180 183 """
181 184
182 185 def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev,
183 186 filteredhash=None, closednodes=None):
184 187 super(branchcache, self).__init__(entries)
185 188 self.tipnode = tipnode
186 189 self.tiprev = tiprev
187 190 self.filteredhash = filteredhash
188 191 # closednodes is a set of nodes that close their branch. If the branch
189 192 # cache has been updated, it may contain nodes that are no longer
190 193 # heads.
191 194 if closednodes is None:
192 195 self._closednodes = set()
193 196 else:
194 197 self._closednodes = closednodes
195 198
196 199 def validfor(self, repo):
197 200 """Is the cache content valid regarding a repo
198 201
199 202 - False when cached tipnode is unknown or if we detect a strip.
200 203 - True when cache is up to date or a subset of current repo."""
201 204 try:
202 205 return ((self.tipnode == repo.changelog.node(self.tiprev))
203 206 and (self.filteredhash == \
204 207 scmutil.filteredhash(repo, self.tiprev)))
205 208 except IndexError:
206 209 return False
207 210
208 211 def _branchtip(self, heads):
209 212 '''Return tuple with last open head in heads and false,
210 213 otherwise return last closed head and true.'''
211 214 tip = heads[-1]
212 215 closed = True
213 216 for h in reversed(heads):
214 217 if h not in self._closednodes:
215 218 tip = h
216 219 closed = False
217 220 break
218 221 return tip, closed
219 222
220 223 def branchtip(self, branch):
221 224 '''Return the tipmost open head on branch head, otherwise return the
222 225 tipmost closed head on branch.
223 226 Raise KeyError for unknown branch.'''
224 227 return self._branchtip(self[branch])[0]
225 228
226 229 def iteropen(self, nodes):
227 230 return (n for n in nodes if n not in self._closednodes)
228 231
229 232 def branchheads(self, branch, closed=False):
230 233 heads = self[branch]
231 234 if not closed:
232 235 heads = list(self.iteropen(heads))
233 236 return heads
234 237
235 238 def iterbranches(self):
236 239 for bn, heads in self.iteritems():
237 240 yield (bn, heads) + self._branchtip(heads)
238 241
239 242 def copy(self):
240 243 """return an deep copy of the branchcache object"""
241 244 return branchcache(self, self.tipnode, self.tiprev, self.filteredhash,
242 245 self._closednodes)
243 246
244 247 def write(self, repo):
245 248 try:
246 249 f = repo.cachevfs(_filename(repo), "w", atomictemp=True)
247 250 cachekey = [hex(self.tipnode), '%d' % self.tiprev]
248 251 if self.filteredhash is not None:
249 252 cachekey.append(hex(self.filteredhash))
250 253 f.write(" ".join(cachekey) + '\n')
251 254 nodecount = 0
252 255 for label, nodes in sorted(self.iteritems()):
253 256 for node in nodes:
254 257 nodecount += 1
255 258 if node in self._closednodes:
256 259 state = 'c'
257 260 else:
258 261 state = 'o'
259 262 f.write("%s %s %s\n" % (hex(node), state,
260 263 encoding.fromlocal(label)))
261 264 f.close()
262 265 repo.ui.log('branchcache',
263 266 'wrote %s branch cache with %d labels and %d nodes\n',
264 267 repo.filtername, len(self), nodecount)
265 268 except (IOError, OSError, error.Abort) as inst:
266 269 # Abort may be raised by read only opener, so log and continue
267 270 repo.ui.debug("couldn't write branch cache: %s\n" %
268 271 stringutil.forcebytestr(inst))
269 272
270 273 def update(self, repo, revgen):
271 274 """Given a branchhead cache, self, that may have extra nodes or be
272 275 missing heads, and a generator of nodes that are strictly a superset of
273 276 heads missing, this function updates self to be correct.
274 277 """
275 278 starttime = util.timer()
276 279 cl = repo.changelog
277 280 # collect new branch entries
278 281 newbranches = {}
279 282 getbranchinfo = repo.revbranchcache().branchinfo
280 283 for r in revgen:
281 284 branch, closesbranch = getbranchinfo(r, changelog=cl)
282 285 newbranches.setdefault(branch, []).append(r)
283 286 if closesbranch:
284 287 self._closednodes.add(cl.node(r))
285 288
286 289 # fetch current topological heads to speed up filtering
287 290 topoheads = set(cl.headrevs())
288 291
289 292 # if older branchheads are reachable from new ones, they aren't
290 293 # really branchheads. Note checking parents is insufficient:
291 294 # 1 (branch a) -> 2 (branch b) -> 3 (branch a)
292 295 for branch, newheadrevs in newbranches.iteritems():
293 296 bheads = self.setdefault(branch, [])
294 297 bheadset = set(cl.rev(node) for node in bheads)
295 298
296 299 # This have been tested True on all internal usage of this function.
297 300 # run it again in case of doubt
298 301 # assert not (set(bheadrevs) & set(newheadrevs))
299 302 bheadset.update(newheadrevs)
300 303
301 304 # This prunes out two kinds of heads - heads that are superseded by
302 305 # a head in newheadrevs, and newheadrevs that are not heads because
303 306 # an existing head is their descendant.
304 307 uncertain = bheadset - topoheads
305 308 if uncertain:
306 309 floorrev = min(uncertain)
307 310 ancestors = set(cl.ancestors(newheadrevs, floorrev))
308 311 bheadset -= ancestors
309 312 bheadrevs = sorted(bheadset)
310 313 self[branch] = [cl.node(rev) for rev in bheadrevs]
311 314 tiprev = bheadrevs[-1]
312 315 if tiprev > self.tiprev:
313 316 self.tipnode = cl.node(tiprev)
314 317 self.tiprev = tiprev
315 318
316 319 if not self.validfor(repo):
317 320 # cache key are not valid anymore
318 321 self.tipnode = nullid
319 322 self.tiprev = nullrev
320 323 for heads in self.values():
321 324 tiprev = max(cl.rev(node) for node in heads)
322 325 if tiprev > self.tiprev:
323 326 self.tipnode = cl.node(tiprev)
324 327 self.tiprev = tiprev
325 328 self.filteredhash = scmutil.filteredhash(repo, self.tiprev)
326 329
327 330 duration = util.timer() - starttime
328 331 repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n',
329 332 repo.filtername, duration)
330 333
331 334 # Revision branch info cache
332 335
333 336 _rbcversion = '-v1'
334 337 _rbcnames = 'rbc-names' + _rbcversion
335 338 _rbcrevs = 'rbc-revs' + _rbcversion
336 339 # [4 byte hash prefix][4 byte branch name number with sign bit indicating open]
337 340 _rbcrecfmt = '>4sI'
338 341 _rbcrecsize = calcsize(_rbcrecfmt)
339 342 _rbcnodelen = 4
340 343 _rbcbranchidxmask = 0x7fffffff
341 344 _rbccloseflag = 0x80000000
342 345
343 346 class revbranchcache(object):
344 347 """Persistent cache, mapping from revision number to branch name and close.
345 348 This is a low level cache, independent of filtering.
346 349
347 350 Branch names are stored in rbc-names in internal encoding separated by 0.
348 351 rbc-names is append-only, and each branch name is only stored once and will
349 352 thus have a unique index.
350 353
351 354 The branch info for each revision is stored in rbc-revs as constant size
352 355 records. The whole file is read into memory, but it is only 'parsed' on
353 356 demand. The file is usually append-only but will be truncated if repo
354 357 modification is detected.
355 358 The record for each revision contains the first 4 bytes of the
356 359 corresponding node hash, and the record is only used if it still matches.
357 360 Even a completely trashed rbc-revs fill thus still give the right result
358 361 while converging towards full recovery ... assuming no incorrectly matching
359 362 node hashes.
360 363 The record also contains 4 bytes where 31 bits contains the index of the
361 364 branch and the last bit indicate that it is a branch close commit.
362 365 The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i
363 366 and will grow with it but be 1/8th of its size.
364 367 """
365 368
366 369 def __init__(self, repo, readonly=True):
367 370 assert repo.filtername is None
368 371 self._repo = repo
369 372 self._names = [] # branch names in local encoding with static index
370 373 self._rbcrevs = bytearray()
371 374 self._rbcsnameslen = 0 # length of names read at _rbcsnameslen
372 375 try:
373 376 bndata = repo.cachevfs.read(_rbcnames)
374 377 self._rbcsnameslen = len(bndata) # for verification before writing
375 378 if bndata:
376 379 self._names = [encoding.tolocal(bn)
377 380 for bn in bndata.split('\0')]
378 381 except (IOError, OSError):
379 382 if readonly:
380 383 # don't try to use cache - fall back to the slow path
381 384 self.branchinfo = self._branchinfo
382 385
383 386 if self._names:
384 387 try:
385 388 data = repo.cachevfs.read(_rbcrevs)
386 389 self._rbcrevs[:] = data
387 390 except (IOError, OSError) as inst:
388 391 repo.ui.debug("couldn't read revision branch cache: %s\n" %
389 392 stringutil.forcebytestr(inst))
390 393 # remember number of good records on disk
391 394 self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize,
392 395 len(repo.changelog))
393 396 if self._rbcrevslen == 0:
394 397 self._names = []
395 398 self._rbcnamescount = len(self._names) # number of names read at
396 399 # _rbcsnameslen
397 400 self._namesreverse = dict((b, r) for r, b in enumerate(self._names))
398 401
399 402 def _clear(self):
400 403 self._rbcsnameslen = 0
401 404 del self._names[:]
402 405 self._rbcnamescount = 0
403 406 self._namesreverse.clear()
404 407 self._rbcrevslen = len(self._repo.changelog)
405 408 self._rbcrevs = bytearray(self._rbcrevslen * _rbcrecsize)
406 409
407 410 def branchinfo(self, rev, changelog=None):
408 411 """Return branch name and close flag for rev, using and updating
409 412 persistent cache."""
410 413 changelog = changelog or self._repo.changelog
411 414 rbcrevidx = rev * _rbcrecsize
412 415
413 416 # avoid negative index, changelog.read(nullrev) is fast without cache
414 417 if rev == nullrev:
415 418 return changelog.branchinfo(rev)
416 419
417 420 # if requested rev isn't allocated, grow and cache the rev info
418 421 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
419 422 return self._branchinfo(rev, changelog=changelog)
420 423
421 424 # fast path: extract data from cache, use it if node is matching
422 425 reponode = changelog.node(rev)[:_rbcnodelen]
423 426 cachenode, branchidx = unpack_from(
424 427 _rbcrecfmt, util.buffer(self._rbcrevs), rbcrevidx)
425 428 close = bool(branchidx & _rbccloseflag)
426 429 if close:
427 430 branchidx &= _rbcbranchidxmask
428 431 if cachenode == '\0\0\0\0':
429 432 pass
430 433 elif cachenode == reponode:
431 434 try:
432 435 return self._names[branchidx], close
433 436 except IndexError:
434 437 # recover from invalid reference to unknown branch
435 438 self._repo.ui.debug("referenced branch names not found"
436 439 " - rebuilding revision branch cache from scratch\n")
437 440 self._clear()
438 441 else:
439 442 # rev/node map has changed, invalidate the cache from here up
440 443 self._repo.ui.debug("history modification detected - truncating "
441 444 "revision branch cache to revision %d\n" % rev)
442 445 truncate = rbcrevidx + _rbcrecsize
443 446 del self._rbcrevs[truncate:]
444 447 self._rbcrevslen = min(self._rbcrevslen, truncate)
445 448
446 449 # fall back to slow path and make sure it will be written to disk
447 450 return self._branchinfo(rev, changelog=changelog)
448 451
449 452 def _branchinfo(self, rev, changelog=None):
450 453 """Retrieve branch info from changelog and update _rbcrevs"""
451 454 changelog = changelog or self._repo.changelog
452 455 b, close = changelog.branchinfo(rev)
453 456 if b in self._namesreverse:
454 457 branchidx = self._namesreverse[b]
455 458 else:
456 459 branchidx = len(self._names)
457 460 self._names.append(b)
458 461 self._namesreverse[b] = branchidx
459 462 reponode = changelog.node(rev)
460 463 if close:
461 464 branchidx |= _rbccloseflag
462 465 self._setcachedata(rev, reponode, branchidx, changelog)
463 466 return b, close
464 467
465 468 def setdata(self, branch, rev, node, close):
466 469 """add new data information to the cache"""
467 470 if branch in self._namesreverse:
468 471 branchidx = self._namesreverse[branch]
469 472 else:
470 473 branchidx = len(self._names)
471 474 self._names.append(branch)
472 475 self._namesreverse[branch] = branchidx
473 476 if close:
474 477 branchidx |= _rbccloseflag
475 478 self._setcachedata(rev, node, branchidx)
476 479 # If no cache data were readable (non exists, bad permission, etc)
477 480 # the cache was bypassing itself by setting:
478 481 #
479 482 # self.branchinfo = self._branchinfo
480 483 #
481 484 # Since we now have data in the cache, we need to drop this bypassing.
482 485 if r'branchinfo' in vars(self):
483 486 del self.branchinfo
484 487
485 488 def _setcachedata(self, rev, node, branchidx, changelog=None):
486 489 """Writes the node's branch data to the in-memory cache data."""
487 490 if rev == nullrev:
488 491 return
489 492
490 493 changelog = changelog or self._repo.changelog
491 494 rbcrevidx = rev * _rbcrecsize
492 495 if len(self._rbcrevs) < rbcrevidx + _rbcrecsize:
493 496 self._rbcrevs.extend('\0' *
494 497 (len(changelog) * _rbcrecsize -
495 498 len(self._rbcrevs)))
496 499 pack_into(_rbcrecfmt, self._rbcrevs, rbcrevidx, node, branchidx)
497 500 self._rbcrevslen = min(self._rbcrevslen, rev)
498 501
499 502 tr = self._repo.currenttransaction()
500 503 if tr:
501 504 tr.addfinalize('write-revbranchcache', self.write)
502 505
503 506 def write(self, tr=None):
504 507 """Save branch cache if it is dirty."""
505 508 repo = self._repo
506 509 wlock = None
507 510 step = ''
508 511 try:
509 512 if self._rbcnamescount < len(self._names):
510 513 step = ' names'
511 514 wlock = repo.wlock(wait=False)
512 515 if self._rbcnamescount != 0:
513 516 f = repo.cachevfs.open(_rbcnames, 'ab')
514 517 if f.tell() == self._rbcsnameslen:
515 518 f.write('\0')
516 519 else:
517 520 f.close()
518 521 repo.ui.debug("%s changed - rewriting it\n" % _rbcnames)
519 522 self._rbcnamescount = 0
520 523 self._rbcrevslen = 0
521 524 if self._rbcnamescount == 0:
522 525 # before rewriting names, make sure references are removed
523 526 repo.cachevfs.unlinkpath(_rbcrevs, ignoremissing=True)
524 527 f = repo.cachevfs.open(_rbcnames, 'wb')
525 528 f.write('\0'.join(encoding.fromlocal(b)
526 529 for b in self._names[self._rbcnamescount:]))
527 530 self._rbcsnameslen = f.tell()
528 531 f.close()
529 532 self._rbcnamescount = len(self._names)
530 533
531 534 start = self._rbcrevslen * _rbcrecsize
532 535 if start != len(self._rbcrevs):
533 536 step = ''
534 537 if wlock is None:
535 538 wlock = repo.wlock(wait=False)
536 539 revs = min(len(repo.changelog),
537 540 len(self._rbcrevs) // _rbcrecsize)
538 541 f = repo.cachevfs.open(_rbcrevs, 'ab')
539 542 if f.tell() != start:
540 543 repo.ui.debug("truncating cache/%s to %d\n"
541 544 % (_rbcrevs, start))
542 545 f.seek(start)
543 546 if f.tell() != start:
544 547 start = 0
545 548 f.seek(start)
546 549 f.truncate()
547 550 end = revs * _rbcrecsize
548 551 f.write(self._rbcrevs[start:end])
549 552 f.close()
550 553 self._rbcrevslen = revs
551 554 except (IOError, OSError, error.Abort, error.LockError) as inst:
552 555 repo.ui.debug("couldn't write revision branch cache%s: %s\n"
553 556 % (step, stringutil.forcebytestr(inst)))
554 557 finally:
555 558 if wlock is not None:
556 559 wlock.release()
General Comments 0
You need to be logged in to leave comments. Login now