diff --git a/mercurial/changelog.py b/mercurial/changelog.py --- a/mercurial/changelog.py +++ b/mercurial/changelog.py @@ -11,8 +11,9 @@ from demandload import demandload demandload(globals(), "os time util") class changelog(revlog): - def __init__(self, opener): - revlog.__init__(self, opener, "00changelog.i", "00changelog.d") + def __init__(self, opener, defversion=0): + revlog.__init__(self, opener, "00changelog.i", "00changelog.d", + defversion) def extract(self, text): if not text: diff --git a/mercurial/commands.py b/mercurial/commands.py --- a/mercurial/commands.py +++ b/mercurial/commands.py @@ -1268,7 +1268,7 @@ def copy(ui, repo, *pats, **opts): def debugancestor(ui, index, rev1, rev2): """find the ancestor revision of two revisions in a given index""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "", 0) a = r.ancestor(r.lookup(rev1), r.lookup(rev2)) ui.write("%d:%s\n" % (r.rev(a), hex(a))) @@ -1372,7 +1372,7 @@ def debugstate(ui, repo): def debugdata(ui, file_, rev): """dump the contents of an data file revision""" r = revlog.revlog(util.opener(os.getcwd(), audit=False), - file_[:-2] + ".i", file_) + file_[:-2] + ".i", file_, 0) try: ui.write(r.revision(r.lookup(rev))) except KeyError: @@ -1380,18 +1380,19 @@ def debugdata(ui, file_, rev): def debugindex(ui, file_): """dump the contents of an index file""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0) ui.write(" rev offset length base linkrev" + " nodeid p1 p2\n") for i in range(r.count()): - e = r.index[i] + node = r.node(i) + pp = r.parents(node) ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % ( - i, e[0], e[1], e[2], e[3], - short(e[6]), short(e[4]), short(e[5]))) + i, r.start(i), r.length(i), r.base(i), r.linkrev(node), + short(node), short(pp[0]), short(pp[1]))) def debugindexdot(ui, file_): """dump an index DAG as a .dot file""" - r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "") + r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0) ui.write("digraph G {\n") for i in range(r.count()): e = r.index[i] diff --git a/mercurial/filelog.py b/mercurial/filelog.py --- a/mercurial/filelog.py +++ b/mercurial/filelog.py @@ -11,10 +11,11 @@ from demandload import * demandload(globals(), "bdiff") class filelog(revlog): - def __init__(self, opener, path): + def __init__(self, opener, path, defversion=0): revlog.__init__(self, opener, os.path.join("data", self.encodedir(path + ".i")), - os.path.join("data", self.encodedir(path + ".d"))) + os.path.join("data", self.encodedir(path + ".d")), + defversion) # This avoids a collision between a file named foo and a dir named # foo.i or foo.d diff --git a/mercurial/localrepo.py b/mercurial/localrepo.py --- a/mercurial/localrepo.py +++ b/mercurial/localrepo.py @@ -10,8 +10,8 @@ import filelog, manifest, changelog, dir from node import * from i18n import gettext as _ from demandload import * -demandload(globals(), "re lock transaction tempfile stat mdiff errno ui") demandload(globals(), "appendfile changegroup") +demandload(globals(), "re lock transaction tempfile stat mdiff errno ui revlog") class localrepository(object): def __del__(self): @@ -35,8 +35,20 @@ class localrepository(object): self.ui = ui.ui(parentui=parentui) self.opener = util.opener(self.path) self.wopener = util.opener(self.root) - self.manifest = manifest.manifest(self.opener) - self.changelog = changelog.changelog(self.opener) + + try: + self.ui.readconfig(self.join("hgrc"), self.root) + except IOError: + pass + + v = self.ui.revlogopts + self.revlogversion = int(v.get('format', 0)) + for x in v.get('flags', "").split(): + self.revlogversion |= revlog.flagstr(x) + + self.manifest = manifest.manifest(self.opener, self.revlogversion) + self.changelog = changelog.changelog(self.opener, self.revlogversion) + self.revlogversion = self.changelog.version self.tagscache = None self.nodetagscache = None self.encodepats = None @@ -48,11 +60,6 @@ class localrepository(object): os.mkdir(self.join("data")) self.dirstate = dirstate.dirstate(self.opener, self.ui, self.root) - try: - self.ui.readconfig(self.join("hgrc"), self.root) - except IOError: - pass - def hook(self, name, throw=False, **args): def runhook(name, cmd): self.ui.note(_("running hook %s: %s\n") % (name, cmd)) @@ -167,7 +174,7 @@ class localrepository(object): def file(self, f): if f[0] == '/': f = f[1:] - return filelog.filelog(self.opener, f) + return filelog.filelog(self.opener, f, self.revlogversion) def getcwd(self): return self.dirstate.getcwd() diff --git a/mercurial/manifest.py b/mercurial/manifest.py --- a/mercurial/manifest.py +++ b/mercurial/manifest.py @@ -12,10 +12,11 @@ from demandload import * demandload(globals(), "bisect array") class manifest(revlog): - def __init__(self, opener): + def __init__(self, opener, defversion=0): self.mapcache = None self.listcache = None - revlog.__init__(self, opener, "00manifest.i", "00manifest.d") + revlog.__init__(self, opener, "00manifest.i", "00manifest.d", + defversion) def read(self, node): if node == nullid: return {} # don't upset local cache diff --git a/mercurial/revlog.py b/mercurial/revlog.py --- a/mercurial/revlog.py +++ b/mercurial/revlog.py @@ -16,6 +16,10 @@ from demandload import demandload demandload(globals(), "binascii changegroup errno heapq mdiff os") demandload(globals(), "sha struct zlib") +# revlog version strings +REVLOGV0 = 0 +REVLOGNG = 1 + def hash(text, p1, p2): """generate a hash from the given text and its parent hashes @@ -51,7 +55,19 @@ def decompress(bin): if t == 'u': return bin[1:] raise RevlogError(_("unknown compression type %r") % t) -indexformat = ">4l20s20s20s" +indexformatv0 = ">4l20s20s20s" +# index ng: +# 6 bytes offset +# 2 bytes flags +# 4 bytes compressed length +# 4 bytes uncompressed length +# 4 bytes: base rev +# 4 bytes link rev +# 4 bytes parent 1 rev +# 4 bytes parent 2 rev +# 32 bytes: nodeid +indexformatng = ">Qiiiiii20s12x" +versionformat = ">i" class lazyparser(object): """ @@ -63,18 +79,16 @@ class lazyparser(object): the lazy objects in revlog with the underlying objects for efficiency in cases where we look at most of the nodes. """ - def __init__(self, data, revlog): + def __init__(self, data, revlog, indexformat): self.data = data self.s = struct.calcsize(indexformat) + self.indexformat = indexformat self.l = len(data)/self.s self.index = [None] * self.l self.map = {nullid: -1} self.all = 0 self.revlog = revlog - def trunc(self, pos): - self.l = pos/self.s - def load(self, pos=None): if self.all: return if pos is not None: @@ -89,10 +103,11 @@ class lazyparser(object): self.revlog.nodemap = self.map while i < end: - d = self.data[i * self.s: (i + 1) * self.s] - e = struct.unpack(indexformat, d) - self.index[i] = e - self.map[e[6]] = i + if not self.index[i]: + d = self.data[i * self.s: (i + 1) * self.s] + e = struct.unpack(self.indexformat, d) + self.index[i] = e + self.map[e[-1]] = i i += 1 class lazyindex(object): @@ -108,12 +123,12 @@ class lazyindex(object): return self.p.index[pos] def __getitem__(self, pos): return self.p.index[pos] or self.load(pos) + def __setitem__(self, pos, item): + self.p.index[pos] = item def __delitem__(self, pos): del self.p.index[pos] def append(self, e): self.p.index.append(e) - def trunc(self, pos): - self.p.trunc(pos) class lazymap(object): """a lazy version of the node map""" @@ -133,10 +148,10 @@ class lazymap(object): yield nullid for i in xrange(self.p.l): try: - yield self.p.index[i][6] + yield self.p.index[i][-1] except: self.p.load(i) - yield self.p.index[i][6] + yield self.p.index[i][-1] def __getitem__(self, key): try: return self.p.map[key] @@ -178,7 +193,7 @@ class revlog(object): remove data, and can use some simple techniques to avoid the need for locking while reading. """ - def __init__(self, opener, indexfile, datafile): + def __init__(self, opener, indexfile, datafile, defversion=0): """ create a revlog object @@ -192,11 +207,14 @@ class revlog(object): self.indexstat = None self.cache = None self.chunkcache = None + self.defversion = defversion self.load() def load(self): + v = self.defversion try: f = self.opener(self.indexfile) + i = f.read() except IOError, inst: if inst.errno != errno.ENOENT: raise @@ -213,56 +231,103 @@ class revlog(object): and st.st_mtime == oldst.st_mtime and st.st_ctime == oldst.st_ctime): return - self.indexstat = st - i = f.read() + self.indexstat = st + if len(i) > 0: + v = struct.unpack(versionformat, i[:4])[0] + if v != 0: + flags = v & ~0xFFFF + fmt = v & 0xFFFF + if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)): + raise RevlogError( + _("unknown version format %d or flags %x on %s") % + (v, flags, self.indexfile)) + self.version = v + if v == 0: + self.indexformat = indexformatv0 + else: + self.indexformat = indexformatng - if i and i[:4] != "\0\0\0\0": - raise RevlogError(_("incompatible revlog signature on %s") % - self.indexfile) - - if len(i) > 10000: - # big index, let's parse it on demand - parser = lazyparser(i, self) - self.index = lazyindex(parser) - self.nodemap = lazymap(parser) + if i: + if st and st.st_size > 10000: + # big index, let's parse it on demand + parser = lazyparser(i, self, self.indexformat) + self.index = lazyindex(parser) + self.nodemap = lazymap(parser) + else: + self.parseindex(i) + if self.version != 0: + e = list(self.index[0]) + type = self.ngtype(e[0]) + e[0] = self.offset_type(0, type) + self.index[0] = e else: - s = struct.calcsize(indexformat) - l = len(i) / s - self.index = [None] * l - m = [None] * l + self.nodemap = { nullid: -1} + self.index = [] + + + def parseindex(self, data): + s = struct.calcsize(self.indexformat) + l = len(data) + self.index = [] + self.nodemap = {nullid: -1} + off = 0 + n = 0 + while off < l: + e = struct.unpack(self.indexformat, data[off:off + s]) + self.index.append(e) + self.nodemap[e[-1]] = n + n += 1 + off += s - n = 0 - for f in xrange(0, l * s, s): - # offset, size, base, linkrev, p1, p2, nodeid - e = struct.unpack(indexformat, i[f:f + s]) - m[n] = (e[6], n) - self.index[n] = e - n += 1 + def ngoffset(self, q): + if q & 0xFFFF: + raise RevlogError(_('%s: incompatible revision flag %x') % + (self.indexfile, type)) + return long(q >> 16) + + def ngtype(self, q): + return int(q & 0xFFFF) - self.nodemap = dict(m) - self.nodemap[nullid] = -1 + def offset_type(self, offset, type): + return long(long(offset) << 16 | type) + + def loadindexmap(self): + """loads both the map and the index from the lazy parser""" + if isinstance(self.index, lazyindex): + p = self.index.p + p.load() def tip(self): return self.node(len(self.index) - 1) def count(self): return len(self.index) - def node(self, rev): return (rev < 0) and nullid or self.index[rev][6] + def node(self, rev): + return (rev < 0) and nullid or self.index[rev][-1] def rev(self, node): try: return self.nodemap[node] except KeyError: raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node))) - def linkrev(self, node): return self.index[self.rev(node)][3] + def linkrev(self, node): return self.index[self.rev(node)][-4] def parents(self, node): if node == nullid: return (nullid, nullid) - return self.index[self.rev(node)][4:6] + r = self.rev(node) + d = self.index[r][-3:-1] + if self.version == 0: + return d + return [ self.node(x) for x in d ] + def start(self, rev): + if rev < 0: + return -1 + if self.version != 0: + return self.ngoffset(self.index[rev][0]) + return self.index[rev][0] + def end(self, rev): return self.start(rev) + self.length(rev) - def start(self, rev): return (rev < 0) and -1 or self.index[rev][0] def length(self, rev): if rev < 0: return 0 else: return self.index[rev][1] - def end(self, rev): return self.start(rev) + self.length(rev) - def base(self, rev): return (rev < 0) and rev or self.index[rev][2] + def base(self, rev): return (rev < 0) and rev or self.index[rev][-5] def reachable(self, rev, stop=None): reachable = {} @@ -501,18 +566,18 @@ class revlog(object): """apply a list of patches to a string""" return mdiff.patches(t, pl) - def chunk(self, rev): + def chunk(self, rev, df=None, cachelen=4096): start, length = self.start(rev), self.length(rev) end = start + length - - def loadcache(): - cache_length = max(4096 * 1024, length) # 4Mo - df = self.opener(self.datafile) + def loadcache(df): + cache_length = max(cachelen, length) # 4k + if not df: + df = self.opener(self.datafile) df.seek(start) self.chunkcache = (start, df.read(cache_length)) if not self.chunkcache: - loadcache() + loadcache(df) cache_start = self.chunkcache[0] cache_end = cache_start + len(self.chunkcache[1]) @@ -520,7 +585,7 @@ class revlog(object): # it is cached offset = start - cache_start else: - loadcache() + loadcache(df) offset = 0 #def checkchunk(): @@ -555,16 +620,18 @@ class revlog(object): rev = self.rev(node) base = self.base(rev) + df = self.opener(self.datafile) + # do we have useful data cached? if self.cache and self.cache[1] >= base and self.cache[1] < rev: base = self.cache[1] text = self.cache[2] else: - text = self.chunk(base) + text = self.chunk(base, df=df) bins = [] for r in xrange(base + 1, rev + 1): - bins.append(self.chunk(r)) + bins.append(self.chunk(r, df=df)) text = self.patches(text, bins) @@ -621,19 +688,30 @@ class revlog(object): if t >= 0: offset = self.end(t) - e = (offset, l, base, link, p1, p2, node) + if self.version == 0: + e = (offset, l, base, link, p1, p2, node) + else: + e = (self.offset_type(offset, 0), l, len(text), + base, link, self.rev(p1), self.rev(p2), node) self.index.append(e) self.nodemap[node] = n - entry = struct.pack(indexformat, *e) + entry = struct.pack(self.indexformat, *e) - transaction.add(self.datafile, e[0]) + transaction.add(self.datafile, offset) + transaction.add(self.indexfile, n * len(entry)) f = self.opener(self.datafile, "a") if data[0]: f.write(data[0]) f.write(data[1]) - transaction.add(self.indexfile, n * len(entry)) - self.opener(self.indexfile, "a").write(entry) + f = self.opener(self.indexfile, "a") + + if len(self.index) == 1 and self.version != 0: + l = struct.pack(versionformat, self.version) + f.write(l) + entry = entry[4:] + + f.write(entry) self.cache = (node, n, text) return node @@ -748,16 +826,12 @@ class revlog(object): base = prev = -1 start = end = measure = 0 if r: - base = self.base(t) - start = self.start(base) end = self.end(t) - measure = self.length(base) - prev = self.tip() + ifh = self.opener(self.indexfile, "a+") + transaction.add(self.indexfile, ifh.tell()) transaction.add(self.datafile, end) - transaction.add(self.indexfile, r * struct.calcsize(indexformat)) dfh = self.opener(self.datafile, "a") - ifh = self.opener(self.indexfile, "a") # loop through our set of deltas chain = None @@ -794,7 +868,8 @@ class revlog(object): if chain != prev or (end - start + len(cdelta)) > measure * 2: # flush our writes here so we can read it in revision - dfh.flush() + if dfh: + dfh.flush() ifh.flush() text = self.revision(chain) text = self.patches(text, [delta]) @@ -803,19 +878,21 @@ class revlog(object): raise RevlogError(_("consistency error adding group")) measure = len(text) else: - e = (end, len(cdelta), base, link, p1, p2, node) + if self.version == 0: + e = (end, len(cdelta), base, link, p1, p2, node) + else: + e = (self.offset_type(end, 0), len(cdelta), -1, base, + link, self.rev(p1), self.rev(p2), node) self.index.append(e) self.nodemap[node] = r dfh.write(cdelta) - ifh.write(struct.pack(indexformat, *e)) + ifh.write(struct.pack(self.indexformat, *e)) t, r, chain, prev = r, r + 1, node, node base = self.base(t) start = self.start(base) end = self.end(t) - dfh.close() - ifh.close() if node is None: raise RevlogError(_("group to be added is empty")) return node @@ -824,32 +901,34 @@ class revlog(object): if self.count() == 0 or rev >= self.count(): return + if isinstance(self.index, lazyindex): + self.loadindexmap() + # When stripping away a revision, we need to make sure it # does not actually belong to an older changeset. # The minlink parameter defines the oldest revision # we're allowed to strip away. - while minlink > self.index[rev][3]: + while minlink > self.index[rev][-4]: rev += 1 if rev >= self.count(): return # first truncate the files on disk end = self.start(rev) - self.opener(self.datafile, "a").truncate(end) - end = rev * struct.calcsize(indexformat) - self.opener(self.indexfile, "a").truncate(end) + df = self.opener(self.datafile, "a") + df.truncate(end) + end = rev * struct.calcsize(self.indexformat) + + indexf = self.opener(self.indexfile, "a") + indexf.truncate(end) # then reset internal state in memory to forget those revisions self.cache = None self.chunkcache = None - for p in self.index[rev:]: - del self.nodemap[p[6]] - del self.index[rev:] + for x in xrange(rev, self.count()): + del self.nodemap[self.node(x)] - # truncating the lazyindex also truncates the lazymap. - if isinstance(self.index, lazyindex): - self.index.trunc(end) - + del self.index[rev:] def checksize(self): expected = 0 @@ -870,7 +949,7 @@ class revlog(object): f = self.opener(self.indexfile) f.seek(0, 2) actual = f.tell() - s = struct.calcsize(indexformat) + s = struct.calcsize(self.indexformat) i = actual / s di = actual - (i * s) except IOError, inst: diff --git a/mercurial/statichttprepo.py b/mercurial/statichttprepo.py --- a/mercurial/statichttprepo.py +++ b/mercurial/statichttprepo.py @@ -32,6 +32,7 @@ class statichttprepository(localrepo.loc def __init__(self, ui, path): self.path = (path + "/.hg") self.ui = ui + self.revlogversion = 0 self.opener = opener(self.path) self.manifest = manifest.manifest(self.opener) self.changelog = changelog.changelog(self.opener) diff --git a/mercurial/ui.py b/mercurial/ui.py --- a/mercurial/ui.py +++ b/mercurial/ui.py @@ -29,6 +29,7 @@ class ui(object): self.diffcache = None self.header = [] self.prev_header = [] + self.revlogopts = self.configrevlog() else: # parentui may point to an ui object which is already a child self.parentui = parentui.parentui or parentui @@ -134,6 +135,12 @@ class ui(object): result.append(path) return result + def configrevlog(self): + ret = {} + for x in self.configitems("revlog"): + k = x[0].lower() + ret[k] = x[1] + return ret def diffopts(self): if self.diffcache: return self.diffcache