branchmap.py
443 lines
| 17.3 KiB
| text/x-python
|
PythonLexer
/ mercurial / branchmap.py
Pierre-Yves David
|
r18116 | # branchmap.py - logic to computes, maintain and stores branchmap for local repo | ||
# | ||||
# Copyright 2005-2007 Matt Mackall <mpm@selenic.com> | ||||
# | ||||
# This software may be used and distributed according to the terms of the | ||||
# GNU General Public License version 2 or any later version. | ||||
Pierre-Yves David
|
r18117 | |||
Pierre-Yves David
|
r18118 | from node import bin, hex, nullid, nullrev | ||
Pierre-Yves David
|
r18117 | import encoding | ||
Augie Fackler
|
r20032 | import util | ||
Gregory Szorc
|
r21031 | import time | ||
Mads Kiilerich
|
r23785 | from array import array | ||
from struct import calcsize, pack, unpack | ||||
Pierre-Yves David
|
r18117 | |||
Pierre-Yves David
|
r18185 | def _filename(repo): | ||
Pierre-Yves David
|
r18187 | """name of a branchcache file for a given repo or repoview""" | ||
Brodie Rao
|
r20185 | filename = "cache/branch2" | ||
Pierre-Yves David
|
r18187 | if repo.filtername: | ||
filename = '%s-%s' % (filename, repo.filtername) | ||||
return filename | ||||
Pierre-Yves David
|
r18185 | |||
Pierre-Yves David
|
r18118 | def read(repo): | ||
try: | ||||
Pierre-Yves David
|
r18185 | f = repo.opener(_filename(repo)) | ||
Pierre-Yves David
|
r18118 | lines = f.read().split('\n') | ||
f.close() | ||||
except (IOError, OSError): | ||||
Pierre-Yves David
|
r18212 | return None | ||
Pierre-Yves David
|
r18118 | |||
try: | ||||
Pierre-Yves David
|
r18184 | cachekey = lines.pop(0).split(" ", 2) | ||
last, lrev = cachekey[:2] | ||||
Pierre-Yves David
|
r18118 | last, lrev = bin(last), int(lrev) | ||
Pierre-Yves David
|
r18184 | filteredhash = None | ||
if len(cachekey) > 2: | ||||
filteredhash = bin(cachekey[2]) | ||||
partial = branchcache(tipnode=last, tiprev=lrev, | ||||
filteredhash=filteredhash) | ||||
Pierre-Yves David
|
r18132 | if not partial.validfor(repo): | ||
Pierre-Yves David
|
r18118 | # invalidate the cache | ||
Pierre-Yves David
|
r18166 | raise ValueError('tip differs') | ||
Pierre-Yves David
|
r18118 | for l in lines: | ||
if not l: | ||||
continue | ||||
Brodie Rao
|
r20185 | node, state, label = l.split(" ", 2) | ||
if state not in 'oc': | ||||
raise ValueError('invalid branch state') | ||||
Pierre-Yves David
|
r18118 | label = encoding.tolocal(label.strip()) | ||
if not node in repo: | ||||
Pierre-Yves David
|
r18166 | raise ValueError('node %s does not exist' % node) | ||
Brodie Rao
|
r20185 | node = bin(node) | ||
partial.setdefault(label, []).append(node) | ||||
if state == 'c': | ||||
partial._closednodes.add(node) | ||||
Pierre-Yves David
|
r18118 | except KeyboardInterrupt: | ||
raise | ||||
except Exception, inst: | ||||
if repo.ui.debugflag: | ||||
Pierre-Yves David
|
r18188 | msg = 'invalid branchheads cache' | ||
if repo.filtername is not None: | ||||
msg += ' (%s)' % repo.filtername | ||||
msg += ': %s\n' | ||||
Matt Mackall
|
r21789 | repo.ui.debug(msg % inst) | ||
Pierre-Yves David
|
r18212 | partial = None | ||
Pierre-Yves David
|
r18126 | return partial | ||
Pierre-Yves David
|
r18118 | |||
Augie Fackler
|
r20032 | ### Nearest subset relation | ||
# Nearest subset of filter X is a filter Y so that: | ||||
# * Y is included in X, | ||||
# * X - Y is as small as possible. | ||||
# This create and ordering used for branchmap purpose. | ||||
# the ordering may be partial | ||||
subsettable = {None: 'visible', | ||||
'visible': 'served', | ||||
'served': 'immutable', | ||||
'immutable': 'base'} | ||||
Pierre-Yves David
|
r18121 | def updatecache(repo): | ||
cl = repo.changelog | ||||
Pierre-Yves David
|
r18189 | filtername = repo.filtername | ||
partial = repo._branchcaches.get(filtername) | ||||
Pierre-Yves David
|
r18121 | |||
Pierre-Yves David
|
r18234 | revs = [] | ||
Pierre-Yves David
|
r18132 | if partial is None or not partial.validfor(repo): | ||
Pierre-Yves David
|
r18126 | partial = read(repo) | ||
Pierre-Yves David
|
r18212 | if partial is None: | ||
Augie Fackler
|
r20032 | subsetname = subsettable.get(filtername) | ||
Pierre-Yves David
|
r18234 | if subsetname is None: | ||
partial = branchcache() | ||||
else: | ||||
subset = repo.filtered(subsetname) | ||||
partial = subset.branchmap().copy() | ||||
extrarevs = subset.changelog.filteredrevs - cl.filteredrevs | ||||
revs.extend(r for r in extrarevs if r <= partial.tiprev) | ||||
revs.extend(cl.revs(start=partial.tiprev + 1)) | ||||
Pierre-Yves David
|
r18218 | if revs: | ||
Pierre-Yves David
|
r18305 | partial.update(repo, revs) | ||
Pierre-Yves David
|
r18128 | partial.write(repo) | ||
Pierre-Yves David
|
r18451 | assert partial.validfor(repo), filtername | ||
Pierre-Yves David
|
r18189 | repo._branchcaches[repo.filtername] = partial | ||
Pierre-Yves David
|
r18124 | |||
class branchcache(dict): | ||||
Brodie Rao
|
r20181 | """A dict like object that hold branches heads cache. | ||
This cache is used to avoid costly computations to determine all the | ||||
branch heads of a repo. | ||||
The cache is serialized on disk in the following format: | ||||
<tip hex node> <tip rev number> [optional filtered repo hex hash] | ||||
Brodie Rao
|
r20185 | <branch head hex node> <open/closed state> <branch name> | ||
<branch head hex node> <open/closed state> <branch name> | ||||
Brodie Rao
|
r20181 | ... | ||
The first line is used to check if the cache is still valid. If the | ||||
branch cache is for a filtered repo view, an optional third hash is | ||||
included that hashes the hashes of all filtered revisions. | ||||
Brodie Rao
|
r20185 | |||
The open/closed state is represented by a single letter 'o' or 'c'. | ||||
This field can be used to avoid changelog reads when determining if a | ||||
branch head closes a branch or not. | ||||
Brodie Rao
|
r20181 | """ | ||
Pierre-Yves David
|
r18124 | |||
Pierre-Yves David
|
r18168 | def __init__(self, entries=(), tipnode=nullid, tiprev=nullrev, | ||
Brodie Rao
|
r20185 | filteredhash=None, closednodes=None): | ||
Pierre-Yves David
|
r18125 | super(branchcache, self).__init__(entries) | ||
self.tipnode = tipnode | ||||
Pierre-Yves David
|
r18126 | self.tiprev = tiprev | ||
Pierre-Yves David
|
r18168 | self.filteredhash = filteredhash | ||
Brodie Rao
|
r20185 | # closednodes is a set of nodes that close their branch. If the branch | ||
# cache has been updated, it may contain nodes that are no longer | ||||
# heads. | ||||
if closednodes is None: | ||||
self._closednodes = set() | ||||
else: | ||||
self._closednodes = closednodes | ||||
Mads Kiilerich
|
r23786 | self._revbranchcache = None | ||
Pierre-Yves David
|
r18168 | |||
def _hashfiltered(self, repo): | ||||
"""build hash of revision filtered in the current cache | ||||
Mads Kiilerich
|
r18644 | Tracking tipnode and tiprev is not enough to ensure validity of the | ||
Pierre-Yves David
|
r18168 | cache as they do not help to distinct cache that ignored various | ||
revision bellow tiprev. | ||||
To detect such difference, we build a cache of all ignored revisions. | ||||
""" | ||||
cl = repo.changelog | ||||
if not cl.filteredrevs: | ||||
return None | ||||
key = None | ||||
revs = sorted(r for r in cl.filteredrevs if r <= self.tiprev) | ||||
if revs: | ||||
s = util.sha1() | ||||
for rev in revs: | ||||
s.update('%s;' % rev) | ||||
key = s.digest() | ||||
return key | ||||
Pierre-Yves David
|
r18128 | |||
Pierre-Yves David
|
r18132 | def validfor(self, repo): | ||
Mads Kiilerich
|
r18644 | """Is the cache content valid regarding a repo | ||
Pierre-Yves David
|
r18132 | |||
Mads Kiilerich
|
r18644 | - False when cached tipnode is unknown or if we detect a strip. | ||
Pierre-Yves David
|
r18132 | - True when cache is up to date or a subset of current repo.""" | ||
try: | ||||
Pierre-Yves David
|
r18168 | return ((self.tipnode == repo.changelog.node(self.tiprev)) | ||
and (self.filteredhash == self._hashfiltered(repo))) | ||||
Pierre-Yves David
|
r18132 | except IndexError: | ||
return False | ||||
Brodie Rao
|
r20186 | def _branchtip(self, heads): | ||
Mads Kiilerich
|
r20245 | '''Return tuple with last open head in heads and false, | ||
otherwise return last closed head and true.''' | ||||
Brodie Rao
|
r20186 | tip = heads[-1] | ||
closed = True | ||||
for h in reversed(heads): | ||||
if h not in self._closednodes: | ||||
tip = h | ||||
closed = False | ||||
break | ||||
return tip, closed | ||||
def branchtip(self, branch): | ||||
Mads Kiilerich
|
r20245 | '''Return the tipmost open head on branch head, otherwise return the | ||
tipmost closed head on branch. | ||||
Raise KeyError for unknown branch.''' | ||||
Brodie Rao
|
r20186 | return self._branchtip(self[branch])[0] | ||
Brodie Rao
|
r20188 | def branchheads(self, branch, closed=False): | ||
heads = self[branch] | ||||
if not closed: | ||||
heads = [h for h in heads if h not in self._closednodes] | ||||
return heads | ||||
Brodie Rao
|
r20190 | def iterbranches(self): | ||
for bn, heads in self.iteritems(): | ||||
yield (bn, heads) + self._branchtip(heads) | ||||
Pierre-Yves David
|
r18232 | def copy(self): | ||
"""return an deep copy of the branchcache object""" | ||||
Brodie Rao
|
r20185 | return branchcache(self, self.tipnode, self.tiprev, self.filteredhash, | ||
self._closednodes) | ||||
Pierre-Yves David
|
r18132 | |||
Pierre-Yves David
|
r18128 | def write(self, repo): | ||
try: | ||||
Pierre-Yves David
|
r18185 | f = repo.opener(_filename(repo), "w", atomictemp=True) | ||
Pierre-Yves David
|
r18184 | cachekey = [hex(self.tipnode), str(self.tiprev)] | ||
if self.filteredhash is not None: | ||||
cachekey.append(hex(self.filteredhash)) | ||||
f.write(" ".join(cachekey) + '\n') | ||||
Gregory Szorc
|
r21031 | nodecount = 0 | ||
Mads Kiilerich
|
r18357 | for label, nodes in sorted(self.iteritems()): | ||
Pierre-Yves David
|
r18128 | for node in nodes: | ||
Gregory Szorc
|
r21031 | nodecount += 1 | ||
Brodie Rao
|
r20185 | if node in self._closednodes: | ||
state = 'c' | ||||
else: | ||||
state = 'o' | ||||
f.write("%s %s %s\n" % (hex(node), state, | ||||
encoding.fromlocal(label))) | ||||
Pierre-Yves David
|
r18128 | f.close() | ||
Gregory Szorc
|
r21031 | repo.ui.log('branchcache', | ||
'wrote %s branch cache with %d labels and %d nodes\n', | ||||
repo.filtername, len(self), nodecount) | ||||
Matt Mackall
|
r21788 | except (IOError, OSError, util.Abort), inst: | ||
repo.ui.debug("couldn't write branch cache: %s\n" % inst) | ||||
Pierre-Yves David
|
r18214 | # Abort may be raise by read only opener | ||
Pierre-Yves David
|
r18128 | pass | ||
Mads Kiilerich
|
r23786 | if self._revbranchcache: | ||
self._revbranchcache.write(repo.unfiltered()) | ||||
self._revbranchcache = None | ||||
Pierre-Yves David
|
r18131 | |||
Pierre-Yves David
|
r18305 | def update(self, repo, revgen): | ||
Pierre-Yves David
|
r18131 | """Given a branchhead cache, self, that may have extra nodes or be | ||
Pierre-Yves David
|
r20263 | missing heads, and a generator of nodes that are strictly a superset of | ||
Pierre-Yves David
|
r18131 | heads missing, this function updates self to be correct. | ||
""" | ||||
Gregory Szorc
|
r21031 | starttime = time.time() | ||
Pierre-Yves David
|
r18131 | cl = repo.changelog | ||
# collect new branch entries | ||||
newbranches = {} | ||||
Mads Kiilerich
|
r23786 | urepo = repo.unfiltered() | ||
self._revbranchcache = revbranchcache(urepo) | ||||
getbranchinfo = self._revbranchcache.branchinfo | ||||
ucl = urepo.changelog | ||||
Pierre-Yves David
|
r18307 | for r in revgen: | ||
Mads Kiilerich
|
r23786 | branch, closesbranch = getbranchinfo(ucl, r) | ||
Pierre-Yves David
|
r20262 | newbranches.setdefault(branch, []).append(r) | ||
Brodie Rao
|
r20185 | if closesbranch: | ||
Pierre-Yves David
|
r20262 | self._closednodes.add(cl.node(r)) | ||
Pierre-Yves David
|
r22357 | |||
# fetch current topological heads to speed up filtering | ||||
topoheads = set(cl.headrevs()) | ||||
Pierre-Yves David
|
r18131 | # if older branchheads are reachable from new ones, they aren't | ||
# really branchheads. Note checking parents is insufficient: | ||||
# 1 (branch a) -> 2 (branch b) -> 3 (branch a) | ||||
Pierre-Yves David
|
r20262 | for branch, newheadrevs in newbranches.iteritems(): | ||
Pierre-Yves David
|
r18131 | bheads = self.setdefault(branch, []) | ||
Pierre-Yves David
|
r20264 | bheadset = set(cl.rev(node) for node in bheads) | ||
Pierre-Yves David
|
r18131 | |||
Pierre-Yves David
|
r20263 | # This have been tested True on all internal usage of this function. | ||
# run it again in case of doubt | ||||
# assert not (set(bheadrevs) & set(newheadrevs)) | ||||
newheadrevs.sort() | ||||
Pierre-Yves David
|
r20264 | bheadset.update(newheadrevs) | ||
Pierre-Yves David
|
r18131 | |||
Pierre-Yves David
|
r22356 | # This prunes out two kinds of heads - heads that are superseded by | ||
# a head in newheadrevs, and newheadrevs that are not heads because | ||||
# an existing head is their descendant. | ||||
Pierre-Yves David
|
r22357 | uncertain = bheadset - topoheads | ||
if uncertain: | ||||
floorrev = min(uncertain) | ||||
ancestors = set(cl.ancestors(newheadrevs, floorrev)) | ||||
bheadset -= ancestors | ||||
Pierre-Yves David
|
r20264 | bheadrevs = sorted(bheadset) | ||
Pierre-Yves David
|
r18131 | self[branch] = [cl.node(rev) for rev in bheadrevs] | ||
Pierre-Yves David
|
r20263 | tiprev = bheadrevs[-1] | ||
Pierre-Yves David
|
r18131 | if tiprev > self.tiprev: | ||
self.tipnode = cl.node(tiprev) | ||||
self.tiprev = tiprev | ||||
Pierre-Yves David
|
r19838 | if not self.validfor(repo): | ||
Pierre-Yves David
|
r18131 | # cache key are not valid anymore | ||
self.tipnode = nullid | ||||
self.tiprev = nullrev | ||||
for heads in self.values(): | ||||
tiprev = max(cl.rev(node) for node in heads) | ||||
if tiprev > self.tiprev: | ||||
self.tipnode = cl.node(tiprev) | ||||
self.tiprev = tiprev | ||||
Pierre-Yves David
|
r18168 | self.filteredhash = self._hashfiltered(repo) | ||
Gregory Szorc
|
r21031 | |||
duration = time.time() - starttime | ||||
repo.ui.log('branchcache', 'updated %s branch cache in %.4f seconds\n', | ||||
repo.filtername, duration) | ||||
Mads Kiilerich
|
r23785 | |||
# Revision branch info cache | ||||
_rbcversion = '-v1' | ||||
_rbcnames = 'cache/rbc-names' + _rbcversion | ||||
_rbcrevs = 'cache/rbc-revs' + _rbcversion | ||||
# [4 byte hash prefix][4 byte branch name number with sign bit indicating open] | ||||
_rbcrecfmt = '>4sI' | ||||
_rbcrecsize = calcsize(_rbcrecfmt) | ||||
_rbcnodelen = 4 | ||||
_rbcbranchidxmask = 0x7fffffff | ||||
_rbccloseflag = 0x80000000 | ||||
class revbranchcache(object): | ||||
"""Persistent cache, mapping from revision number to branch name and close. | ||||
This is a low level cache, independent of filtering. | ||||
Branch names are stored in rbc-names in internal encoding separated by 0. | ||||
rbc-names is append-only, and each branch name is only stored once and will | ||||
thus have a unique index. | ||||
The branch info for each revision is stored in rbc-revs as constant size | ||||
records. The whole file is read into memory, but it is only 'parsed' on | ||||
demand. The file is usually append-only but will be truncated if repo | ||||
modification is detected. | ||||
The record for each revision contains the first 4 bytes of the | ||||
corresponding node hash, and the record is only used if it still matches. | ||||
Even a completely trashed rbc-revs fill thus still give the right result | ||||
while converging towards full recovery ... assuming no incorrectly matching | ||||
node hashes. | ||||
The record also contains 4 bytes where 31 bits contains the index of the | ||||
branch and the last bit indicate that it is a branch close commit. | ||||
The usage pattern for rbc-revs is thus somewhat similar to 00changelog.i | ||||
and will grow with it but be 1/8th of its size. | ||||
""" | ||||
def __init__(self, repo): | ||||
assert repo.filtername is None | ||||
self._names = [] # branch names in local encoding with static index | ||||
self._rbcrevs = array('c') # structs of type _rbcrecfmt | ||||
self._rbcsnameslen = 0 | ||||
try: | ||||
bndata = repo.vfs.read(_rbcnames) | ||||
self._rbcsnameslen = len(bndata) # for verification before writing | ||||
self._names = [encoding.tolocal(bn) for bn in bndata.split('\0')] | ||||
except (IOError, OSError), inst: | ||||
repo.ui.debug("couldn't read revision branch cache names: %s\n" % | ||||
inst) | ||||
if self._names: | ||||
try: | ||||
data = repo.vfs.read(_rbcrevs) | ||||
self._rbcrevs.fromstring(data) | ||||
except (IOError, OSError), inst: | ||||
repo.ui.debug("couldn't read revision branch cache: %s\n" % | ||||
inst) | ||||
# remember number of good records on disk | ||||
self._rbcrevslen = min(len(self._rbcrevs) // _rbcrecsize, | ||||
len(repo.changelog)) | ||||
if self._rbcrevslen == 0: | ||||
self._names = [] | ||||
self._rbcnamescount = len(self._names) # number of good names on disk | ||||
self._namesreverse = dict((b, r) for r, b in enumerate(self._names)) | ||||
def branchinfo(self, changelog, rev): | ||||
"""Return branch name and close flag for rev, using and updating | ||||
persistent cache.""" | ||||
rbcrevidx = rev * _rbcrecsize | ||||
# if requested rev is missing, add and populate all missing revs | ||||
if len(self._rbcrevs) < rbcrevidx + _rbcrecsize: | ||||
first = len(self._rbcrevs) // _rbcrecsize | ||||
self._rbcrevs.extend('\0' * (len(changelog) * _rbcrecsize - | ||||
len(self._rbcrevs))) | ||||
for r in xrange(first, len(changelog)): | ||||
Mads Kiilerich
|
r23786 | self._branchinfo(changelog, r) | ||
Mads Kiilerich
|
r23785 | |||
# fast path: extract data from cache, use it if node is matching | ||||
reponode = changelog.node(rev)[:_rbcnodelen] | ||||
cachenode, branchidx = unpack( | ||||
_rbcrecfmt, buffer(self._rbcrevs, rbcrevidx, _rbcrecsize)) | ||||
close = bool(branchidx & _rbccloseflag) | ||||
if close: | ||||
branchidx &= _rbcbranchidxmask | ||||
if cachenode == reponode: | ||||
return self._names[branchidx], close | ||||
# fall back to slow path and make sure it will be written to disk | ||||
self._rbcrevslen = min(self._rbcrevslen, rev) | ||||
Mads Kiilerich
|
r23786 | return self._branchinfo(changelog, rev) | ||
Mads Kiilerich
|
r23785 | |||
def _branchinfo(self, changelog, rev): | ||||
"""Retrieve branch info from changelog and update _rbcrevs""" | ||||
b, close = changelog.branchinfo(rev) | ||||
if b in self._namesreverse: | ||||
branchidx = self._namesreverse[b] | ||||
else: | ||||
branchidx = len(self._names) | ||||
self._names.append(b) | ||||
self._namesreverse[b] = branchidx | ||||
reponode = changelog.node(rev) | ||||
if close: | ||||
branchidx |= _rbccloseflag | ||||
rbcrevidx = rev * _rbcrecsize | ||||
rec = array('c') | ||||
rec.fromstring(pack(_rbcrecfmt, reponode, branchidx)) | ||||
self._rbcrevs[rbcrevidx:rbcrevidx + _rbcrecsize] = rec | ||||
return b, close | ||||
def write(self, repo): | ||||
"""Save branch cache if it is dirty.""" | ||||
if self._rbcnamescount < len(self._names): | ||||
try: | ||||
if self._rbcnamescount != 0: | ||||
f = repo.vfs.open(_rbcnames, 'ab') | ||||
if f.tell() == self._rbcsnameslen: | ||||
f.write('\0') | ||||
else: | ||||
f.close() | ||||
self._rbcnamescount = 0 | ||||
self._rbcrevslen = 0 | ||||
if self._rbcnamescount == 0: | ||||
f = repo.vfs.open(_rbcnames, 'wb') | ||||
f.write('\0'.join(encoding.fromlocal(b) | ||||
for b in self._names[self._rbcnamescount:])) | ||||
self._rbcsnameslen = f.tell() | ||||
f.close() | ||||
except (IOError, OSError, util.Abort), inst: | ||||
repo.ui.debug("couldn't write revision branch cache names: " | ||||
"%s\n" % inst) | ||||
return | ||||
self._rbcnamescount = len(self._names) | ||||
start = self._rbcrevslen * _rbcrecsize | ||||
if start != len(self._rbcrevs): | ||||
self._rbcrevslen = min(len(repo.changelog), | ||||
len(self._rbcrevs) // _rbcrecsize) | ||||
try: | ||||
f = repo.vfs.open(_rbcrevs, 'ab') | ||||
if f.tell() != start: | ||||
f.seek(start) | ||||
f.truncate() | ||||
end = self._rbcrevslen * _rbcrecsize | ||||
f.write(self._rbcrevs[start:end]) | ||||
f.close() | ||||
except (IOError, OSError, util.Abort), inst: | ||||
repo.ui.debug("couldn't write revision branch cache: %s\n" % | ||||
inst) | ||||
return | ||||