diff --git a/mercurial/branchmap.py b/mercurial/branchmap.py --- a/mercurial/branchmap.py +++ b/mercurial/branchmap.py @@ -9,6 +9,8 @@ from node import bin, hex, nullid, nullr import encoding import util import time +from array import array +from struct import calcsize, pack, unpack def _filename(repo): """name of a branchcache file for a given repo or repoview""" @@ -285,3 +287,150 @@ class branchcache(dict): duration = time.time() - starttime repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n', repo.filtername, duration) + +# Revision branch info cache + +_rbcversion = '-v1' +_rbcnames = 'cache/rbc-names' + _rbcversion +_rbcrevs = 'cache/rbc-revs' + _rbcversion +# [4 byte hash prefix][4 byte branch name number with sign bit indicating open] +_rbcrecfmt = '>4sI' +_rbcrecsize = calcsize(_rbcrecfmt) +_rbcnodelen = 4 +_rbcbranchidxmask = 0x7fffffff +_rbccloseflag = 0x80000000 + +class revbranchcache(object): + """Persistent cache, mapping from revision number to branch name and close. + This is a low level cache, independent of filtering. + + Branch names are stored in rbc-names in internal encoding separated by 0. + rbc-names is append-only, and each branch name is only stored once and will + thus have a unique index. + + The branch info for each revision is stored in rbc-revs as constant size + records. The whole file is read into memory, but it is only 'parsed' on + demand. The file is usually append-only but will be truncated if repo + modification is detected. + The record for each revision contains the first 4 bytes of the + corresponding node hash, and the record is only used if it still matches. + Even a completely trashed rbc-revs fill thus still give the right result + while converging towards full recovery ... assuming no incorrectly matching + node hashes. + The record also contains 4 bytes where 31 bits contains the index of the + branch and the last bit indicate that it is a branch close commit. + The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i + and will grow with it but be 1/8th of its size. + """ + + def __init__(self, repo): + assert repo.filtername is None + self._names = [] # branch names in local encoding with static index + self._rbcrevs = array('c') # structs of type _rbcrecfmt + self._rbcsnameslen = 0 + try: + bndata = repo.vfs.read(_rbcnames) + self._rbcsnameslen = len(bndata) # for verification before writing + self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')] + except (IOError, OSError), inst: + repo.ui.debug("couldn't read revision branch cache names: %s\n" % + inst) + if self._names: + try: + data = repo.vfs.read(_rbcrevs) + self._rbcrevs.fromstring(data) + except (IOError, OSError), inst: + repo.ui.debug("couldn't read revision branch cache: %s\n" % + inst) + # remember number of good records on disk + self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize, + len(repo.changelog)) + if self._rbcrevslen == 0: + self._names = [] + self._rbcnamescount = len(self._names) # number of good names on disk + self._namesreverse = dict((b, r) for r, b in enumerate(self._names)) + + def branchinfo(self, changelog, rev): + """Return branch name and close flag for rev, using and updating + persistent cache.""" + rbcrevidx = rev * _rbcrecsize + + # if requested rev is missing, add and populate all missing revs + if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: + first = len(self._rbcrevs) // _rbcrecsize + self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize - + len(self._rbcrevs))) + for r in xrange(first, len(changelog)): + self._branchinfo(r) + + # fast path: extract data from cache, use it if node is matching + reponode = changelog.node(rev)[:_rbcnodelen] + cachenode, branchidx = unpack( + _rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize)) + close = bool(branchidx & _rbccloseflag) + if close: + branchidx &= _rbcbranchidxmask + if cachenode == reponode: + return self._names[branchidx], close + # fall back to slow path and make sure it will be written to disk + self._rbcrevslen = min(self._rbcrevslen, rev) + return self._branchinfo(rev) + + def _branchinfo(self, changelog, rev): + """Retrieve branch info from changelog and update _rbcrevs""" + b, close = changelog.branchinfo(rev) + if b in self._namesreverse: + branchidx = self._namesreverse[b] + else: + branchidx = len(self._names) + self._names.append(b) + self._namesreverse[b] = branchidx + reponode = changelog.node(rev) + if close: + branchidx |= _rbccloseflag + rbcrevidx = rev * _rbcrecsize + rec = array('c') + rec.fromstring(pack(_rbcrecfmt, reponode, branchidx)) + self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec + return b, close + + def write(self, repo): + """Save branch cache if it is dirty.""" + if self._rbcnamescount < len(self._names): + try: + if self._rbcnamescount != 0: + f = repo.vfs.open(_rbcnames, 'ab') + if f.tell() == self._rbcsnameslen: + f.write('\0') + else: + f.close() + self._rbcnamescount = 0 + self._rbcrevslen = 0 + if self._rbcnamescount == 0: + f = repo.vfs.open(_rbcnames, 'wb') + f.write('\0'.join(encoding.fromlocal(b) + for b in self._names[self._rbcnamescount:])) + self._rbcsnameslen = f.tell() + f.close() + except (IOError, OSError, util.Abort), inst: + repo.ui.debug("couldn't write revision branch cache names: " + "%s\n" % inst) + return + self._rbcnamescount = len(self._names) + + start = self._rbcrevslen * _rbcrecsize + if start != len(self._rbcrevs): + self._rbcrevslen = min(len(repo.changelog), + len(self._rbcrevs) // _rbcrecsize) + try: + f = repo.vfs.open(_rbcrevs, 'ab') + if f.tell() != start: + f.seek(start) + f.truncate() + end = self._rbcrevslen * _rbcrecsize + f.write(self._rbcrevs[start:end]) + f.close() + except (IOError, OSError, util.Abort), inst: + repo.ui.debug("couldn't write revision branch cache: %s\n" % + inst) + return