##// END OF EJS Templates
bdiff: improve worst case behavior by 100x....
bdiff: improve worst case behavior by 100x. on 5.8MB (244.000 lines) text file with similar lines, hash before this change made diff against empty file take 75 seconds. this change improves performance to 0.6 seconds. result is that clone of smallish repo (137MB) with some files like this takes 1 minute instead of 10 minutes. common case of diff is 10% slower now, probably because of worse cache locality. but diff does not affect overall performance in common case (less than 1% of runtime is in diff when it is working ok), so this tradeoff looks good.

File last commit:

r2273:f116ddea default
r2577:fa76c5d6 default
Show More
bundlerepo.py
232 lines | 8.6 KiB | text/x-python | PythonLexer
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 """
bundlerepo.py - repository class for viewing uncompressed bundles
This provides a read-only repository interface to bundles as if
they were part of the actual repository.
Copyright 2006 Benoit Boissinot <benoit.boissinot@ens-lyon.org>
This software may be used and distributed according to the terms
of the GNU General Public License, incorporated herein by reference.
"""
from node import *
from i18n import gettext as _
from demandload import demandload
Benoit Boissinot
add support for compressed bundle repositories...
r2273 demandload(globals(), "changegroup util os struct bz2 tempfile")
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 import localrepo, changelog, manifest, filelog, revlog
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 class bundlerevlog(revlog.revlog):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 def __init__(self, opener, indexfile, datafile, bundlefile,
linkmapper=None):
# How it works:
# to retrieve a revision, we need to know the offset of
# the revision in the bundlefile (an opened file).
#
# We store this offset in the index (start), to differentiate a
# rev in the bundle and from a rev in the revlog, we check
# len(index[r]). If the tuple is bigger than 7, it is a bundle
# (it is bigger since we store the node to which the delta is)
#
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 revlog.revlog.__init__(self, opener, indexfile, datafile)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 self.bundlefile = bundlefile
mason@suse.com
Fix bundle repos to use an index tuple consistent with revlogng...
r2074 self.basemap = {}
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 def chunkpositer():
for chunk in changegroup.chunkiter(bundlefile):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 pos = bundlefile.tell()
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 yield chunk, pos - len(chunk)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 n = self.count()
prev = None
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 for chunk, start in chunkpositer():
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 size = len(chunk)
if size < 80:
raise util.Abort("invalid changegroup")
start += 80
size -= 80
node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
if node in self.nodemap:
prev = node
continue
for p in (p1, p2):
if not p in self.nodemap:
Benoit Boissinot
fix errors spotted by pychecker (mostly scope problems)
r2256 raise revlog.RevlogError(_("unknown parent %s") % short(p1))
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 if linkmapper is None:
link = n
else:
link = linkmapper(cs)
if not prev:
prev = p1
# start, size, base is not used, link, p1, p2, delta ref
Chris Mason
Make bundlerepo use proper index format for revlogng...
r2101 if self.version == 0:
e = (start, size, None, link, p1, p2, node)
else:
e = (self.offset_type(start, 0), size, -1, None, link,
self.rev(p1), self.rev(p2), node)
mason@suse.com
Fix bundle repos to use an index tuple consistent with revlogng...
r2074 self.basemap[n] = prev
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 self.index.append(e)
self.nodemap[node] = n
prev = node
n += 1
def bundle(self, rev):
"""is rev from the bundle"""
if rev < 0:
return False
mason@suse.com
Fix bundle repos to use an index tuple consistent with revlogng...
r2074 return rev in self.basemap
def bundlebase(self, rev): return self.basemap[rev]
Benoit Boissinot
fix errors spotted by pychecker (mostly scope problems)
r2256 def chunk(self, rev, df=None, cachelen=4096):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 # Warning: in case of bundle, the diff is against bundlebase,
# not against rev - 1
# XXX: could use some caching
if not self.bundle(rev):
Benoit Boissinot
fix errors spotted by pychecker (mostly scope problems)
r2256 return revlog.revlog.chunk(self, rev, df, cachelen)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 self.bundlefile.seek(self.start(rev))
return self.bundlefile.read(self.length(rev))
def revdiff(self, rev1, rev2):
"""return or calculate a delta between two revisions"""
if self.bundle(rev1) and self.bundle(rev2):
# hot path for bundle
revb = self.rev(self.bundlebase(rev2))
if revb == rev1:
return self.chunk(rev2)
elif not self.bundle(rev1) and not self.bundle(rev2):
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 return revlog.revlog.chunk(self, rev1, rev2)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
return self.diff(self.revision(self.node(rev1)),
self.revision(self.node(rev2)))
def revision(self, node):
"""return an uncompressed revision of a given"""
if node == nullid: return ""
text = None
chain = []
iter_node = node
rev = self.rev(iter_node)
# reconstruct the revision if it is from a changegroup
while self.bundle(rev):
if self.cache and self.cache[0] == iter_node:
text = self.cache[2]
break
chain.append(rev)
iter_node = self.bundlebase(rev)
rev = self.rev(iter_node)
if text is None:
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 text = revlog.revlog.revision(self, iter_node)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
while chain:
delta = self.chunk(chain.pop())
text = self.patches(text, [delta])
p1, p2 = self.parents(node)
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 if node != revlog.hash(text, p1, p2):
Benoit Boissinot
fix errors spotted by pychecker (mostly scope problems)
r2256 raise revlog.RevlogError(_("integrity check failed on %s:%d")
Benoit Boissinot
indent: fix alignment
r2257 % (self.datafile, self.rev(node)))
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
Benoit Boissinot
cached data was wrong: rev is different from self.rev(node) in bundlerepo.py
r2129 self.cache = (node, self.rev(node), text)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 return text
def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
raise NotImplementedError
def addgroup(self, revs, linkmapper, transaction, unique=0):
raise NotImplementedError
def strip(self, rev, minlink):
raise NotImplementedError
def checksize(self):
raise NotImplementedError
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 class bundlechangelog(bundlerevlog, changelog.changelog):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 def __init__(self, opener, bundlefile):
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 changelog.changelog.__init__(self, opener)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 bundlerevlog.__init__(self, opener, "00changelog.i", "00changelog.d",
bundlefile)
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 class bundlemanifest(bundlerevlog, manifest.manifest):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 def __init__(self, opener, bundlefile, linkmapper):
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 manifest.manifest.__init__(self, opener)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 bundlerevlog.__init__(self, opener, self.indexfile, self.datafile,
bundlefile, linkmapper)
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 class bundlefilelog(bundlerevlog, filelog.filelog):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 def __init__(self, opener, path, bundlefile, linkmapper):
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 filelog.filelog.__init__(self, opener, path)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 bundlerevlog.__init__(self, opener, self.indexfile, self.datafile,
bundlefile, linkmapper)
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 class bundlerepository(localrepo.localrepository):
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 def __init__(self, ui, path, bundlename):
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 localrepo.localrepository.__init__(self, ui, path)
Benoit Boissinot
add support for compressed bundle repositories...
r2273 self.tempfile = None
self.bundlefile = open(bundlename, "rb")
Benoit Boissinot
use HG10UN header for uncompressed bundle...
r1980 header = self.bundlefile.read(6)
if not header.startswith("HG"):
raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
elif not header.startswith("HG10"):
raise util.Abort(_("%s: unknown bundle version") % bundlename)
elif header == "HG10BZ":
Benoit Boissinot
add support for compressed bundle repositories...
r2273 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
suffix=".hg10un", dir=self.path)
self.tempfile = temp
fptemp = os.fdopen(fdtemp, 'wb')
def generator(f):
zd = bz2.BZ2Decompressor()
zd.decompress("BZ")
for chunk in f:
yield zd.decompress(chunk)
gen = generator(util.filechunkiter(self.bundlefile, 4096))
try:
fptemp.write("HG10UN")
for chunk in gen:
fptemp.write(chunk)
finally:
fptemp.close()
self.bundlefile.close()
self.bundlefile = open(self.tempfile, "rb")
# seek right after the header
self.bundlefile.seek(6)
Benoit Boissinot
use HG10UN header for uncompressed bundle...
r1980 elif header == "HG10UN":
Benoit Boissinot
add support for compressed bundle repositories...
r2273 # nothing to do
Benoit Boissinot
use HG10UN header for uncompressed bundle...
r1980 pass
else:
raise util.Abort(_("%s: unknown bundle compression type")
% bundlename)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942 self.changelog = bundlechangelog(self.opener, self.bundlefile)
self.manifest = bundlemanifest(self.opener, self.bundlefile,
self.changelog.rev)
# dict with the mapping 'filename' -> position in the bundle
self.bundlefilespos = {}
while 1:
Thomas Arendsen Hein
make incoming work via ssh (issue139); move chunk code into separate module....
r1981 f = changegroup.getchunk(self.bundlefile)
if not f:
break
self.bundlefilespos[f] = self.bundlefile.tell()
for c in changegroup.chunkiter(self.bundlefile):
pass
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
def dev(self):
return -1
def file(self, f):
if f[0] == '/':
f = f[1:]
if f in self.bundlefilespos:
self.bundlefile.seek(self.bundlefilespos[f])
return bundlefilelog(self.opener, f, self.bundlefile,
self.changelog.rev)
else:
Benoit Boissinot
bundlerepo: remove relative import, fix a comment
r1946 return filelog.filelog(self.opener, f)
Benoit Boissinot
add bundlerepo.py: a read-only repo that can use uncompressed bundles...
r1942
Thomas Arendsen Hein
imported patch /home/thomas/fix-incoming-abortion4.patch
r1971 def close(self):
"""Close assigned bundle file immediately."""
self.bundlefile.close()
Benoit Boissinot
add support for compressed bundle repositories...
r2273
def __del__(self):
if not self.bundlefile.closed:
self.bundlefile.close()
if self.tempfile is not None:
os.unlink(self.tempfile)