##// END OF EJS Templates
Implement revlogng....
mason@suse.com -
r2072:74d3f533 default
parent child Browse files
Show More
@@ -11,8 +11,9 b' from demandload import demandload'
11 11 demandload(globals(), "os time util")
12 12
13 13 class changelog(revlog):
14 def __init__(self, opener):
15 revlog.__init__(self, opener, "00changelog.i", "00changelog.d")
14 def __init__(self, opener, defversion=0):
15 revlog.__init__(self, opener, "00changelog.i", "00changelog.d",
16 defversion)
16 17
17 18 def extract(self, text):
18 19 if not text:
@@ -1268,7 +1268,7 b' def copy(ui, repo, *pats, **opts):'
1268 1268
1269 1269 def debugancestor(ui, index, rev1, rev2):
1270 1270 """find the ancestor revision of two revisions in a given index"""
1271 r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "")
1271 r = revlog.revlog(util.opener(os.getcwd(), audit=False), index, "", 0)
1272 1272 a = r.ancestor(r.lookup(rev1), r.lookup(rev2))
1273 1273 ui.write("%d:%s\n" % (r.rev(a), hex(a)))
1274 1274
@@ -1372,7 +1372,7 b' def debugstate(ui, repo):'
1372 1372 def debugdata(ui, file_, rev):
1373 1373 """dump the contents of an data file revision"""
1374 1374 r = revlog.revlog(util.opener(os.getcwd(), audit=False),
1375 file_[:-2] + ".i", file_)
1375 file_[:-2] + ".i", file_, 0)
1376 1376 try:
1377 1377 ui.write(r.revision(r.lookup(rev)))
1378 1378 except KeyError:
@@ -1380,18 +1380,19 b' def debugdata(ui, file_, rev):'
1380 1380
1381 1381 def debugindex(ui, file_):
1382 1382 """dump the contents of an index file"""
1383 r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "")
1383 r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0)
1384 1384 ui.write(" rev offset length base linkrev" +
1385 1385 " nodeid p1 p2\n")
1386 1386 for i in range(r.count()):
1387 e = r.index[i]
1387 node = r.node(i)
1388 pp = r.parents(node)
1388 1389 ui.write("% 6d % 9d % 7d % 6d % 7d %s %s %s\n" % (
1389 i, e[0], e[1], e[2], e[3],
1390 short(e[6]), short(e[4]), short(e[5])))
1390 i, r.start(i), r.length(i), r.base(i), r.linkrev(node),
1391 short(node), short(pp[0]), short(pp[1])))
1391 1392
1392 1393 def debugindexdot(ui, file_):
1393 1394 """dump an index DAG as a .dot file"""
1394 r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "")
1395 r = revlog.revlog(util.opener(os.getcwd(), audit=False), file_, "", 0)
1395 1396 ui.write("digraph G {\n")
1396 1397 for i in range(r.count()):
1397 1398 e = r.index[i]
@@ -11,10 +11,11 b' from demandload import *'
11 11 demandload(globals(), "bdiff")
12 12
13 13 class filelog(revlog):
14 def __init__(self, opener, path):
14 def __init__(self, opener, path, defversion=0):
15 15 revlog.__init__(self, opener,
16 16 os.path.join("data", self.encodedir(path + ".i")),
17 os.path.join("data", self.encodedir(path + ".d")))
17 os.path.join("data", self.encodedir(path + ".d")),
18 defversion)
18 19
19 20 # This avoids a collision between a file named foo and a dir named
20 21 # foo.i or foo.d
@@ -10,8 +10,8 b' import filelog, manifest, changelog, dir'
10 10 from node import *
11 11 from i18n import gettext as _
12 12 from demandload import *
13 demandload(globals(), "re lock transaction tempfile stat mdiff errno ui")
14 13 demandload(globals(), "appendfile changegroup")
14 demandload(globals(), "re lock transaction tempfile stat mdiff errno ui revlog")
15 15
16 16 class localrepository(object):
17 17 def __del__(self):
@@ -35,8 +35,20 b' class localrepository(object):'
35 35 self.ui = ui.ui(parentui=parentui)
36 36 self.opener = util.opener(self.path)
37 37 self.wopener = util.opener(self.root)
38 self.manifest = manifest.manifest(self.opener)
39 self.changelog = changelog.changelog(self.opener)
38
39 try:
40 self.ui.readconfig(self.join("hgrc"), self.root)
41 except IOError:
42 pass
43
44 v = self.ui.revlogopts
45 self.revlogversion = int(v.get('format', 0))
46 for x in v.get('flags', "").split():
47 self.revlogversion |= revlog.flagstr(x)
48
49 self.manifest = manifest.manifest(self.opener, self.revlogversion)
50 self.changelog = changelog.changelog(self.opener, self.revlogversion)
51 self.revlogversion = self.changelog.version
40 52 self.tagscache = None
41 53 self.nodetagscache = None
42 54 self.encodepats = None
@@ -48,11 +60,6 b' class localrepository(object):'
48 60 os.mkdir(self.join("data"))
49 61
50 62 self.dirstate = dirstate.dirstate(self.opener, self.ui, self.root)
51 try:
52 self.ui.readconfig(self.join("hgrc"), self.root)
53 except IOError:
54 pass
55
56 63 def hook(self, name, throw=False, **args):
57 64 def runhook(name, cmd):
58 65 self.ui.note(_("running hook %s: %s\n") % (name, cmd))
@@ -167,7 +174,7 b' class localrepository(object):'
167 174 def file(self, f):
168 175 if f[0] == '/':
169 176 f = f[1:]
170 return filelog.filelog(self.opener, f)
177 return filelog.filelog(self.opener, f, self.revlogversion)
171 178
172 179 def getcwd(self):
173 180 return self.dirstate.getcwd()
@@ -12,10 +12,11 b' from demandload import *'
12 12 demandload(globals(), "bisect array")
13 13
14 14 class manifest(revlog):
15 def __init__(self, opener):
15 def __init__(self, opener, defversion=0):
16 16 self.mapcache = None
17 17 self.listcache = None
18 revlog.__init__(self, opener, "00manifest.i", "00manifest.d")
18 revlog.__init__(self, opener, "00manifest.i", "00manifest.d",
19 defversion)
19 20
20 21 def read(self, node):
21 22 if node == nullid: return {} # don't upset local cache
@@ -16,6 +16,10 b' from demandload import demandload'
16 16 demandload(globals(), "binascii changegroup errno heapq mdiff os")
17 17 demandload(globals(), "sha struct zlib")
18 18
19 # revlog version strings
20 REVLOGV0 = 0
21 REVLOGNG = 1
22
19 23 def hash(text, p1, p2):
20 24 """generate a hash from the given text and its parent hashes
21 25
@@ -51,7 +55,19 b' def decompress(bin):'
51 55 if t == 'u': return bin[1:]
52 56 raise RevlogError(_("unknown compression type %r") % t)
53 57
54 indexformat = ">4l20s20s20s"
58 indexformatv0 = ">4l20s20s20s"
59 # index ng:
60 # 6 bytes offset
61 # 2 bytes flags
62 # 4 bytes compressed length
63 # 4 bytes uncompressed length
64 # 4 bytes: base rev
65 # 4 bytes link rev
66 # 4 bytes parent 1 rev
67 # 4 bytes parent 2 rev
68 # 32 bytes: nodeid
69 indexformatng = ">Qiiiiii20s12x"
70 versionformat = ">i"
55 71
56 72 class lazyparser(object):
57 73 """
@@ -63,18 +79,16 b' class lazyparser(object):'
63 79 the lazy objects in revlog with the underlying objects for
64 80 efficiency in cases where we look at most of the nodes.
65 81 """
66 def __init__(self, data, revlog):
82 def __init__(self, data, revlog, indexformat):
67 83 self.data = data
68 84 self.s = struct.calcsize(indexformat)
85 self.indexformat = indexformat
69 86 self.l = len(data)/self.s
70 87 self.index = [None] * self.l
71 88 self.map = {nullid: -1}
72 89 self.all = 0
73 90 self.revlog = revlog
74 91
75 def trunc(self, pos):
76 self.l = pos/self.s
77
78 92 def load(self, pos=None):
79 93 if self.all: return
80 94 if pos is not None:
@@ -89,10 +103,11 b' class lazyparser(object):'
89 103 self.revlog.nodemap = self.map
90 104
91 105 while i < end:
92 d = self.data[i * self.s: (i + 1) * self.s]
93 e = struct.unpack(indexformat, d)
94 self.index[i] = e
95 self.map[e[6]] = i
106 if not self.index[i]:
107 d = self.data[i * self.s: (i + 1) * self.s]
108 e = struct.unpack(self.indexformat, d)
109 self.index[i] = e
110 self.map[e[-1]] = i
96 111 i += 1
97 112
98 113 class lazyindex(object):
@@ -108,12 +123,12 b' class lazyindex(object):'
108 123 return self.p.index[pos]
109 124 def __getitem__(self, pos):
110 125 return self.p.index[pos] or self.load(pos)
126 def __setitem__(self, pos, item):
127 self.p.index[pos] = item
111 128 def __delitem__(self, pos):
112 129 del self.p.index[pos]
113 130 def append(self, e):
114 131 self.p.index.append(e)
115 def trunc(self, pos):
116 self.p.trunc(pos)
117 132
118 133 class lazymap(object):
119 134 """a lazy version of the node map"""
@@ -133,10 +148,10 b' class lazymap(object):'
133 148 yield nullid
134 149 for i in xrange(self.p.l):
135 150 try:
136 yield self.p.index[i][6]
151 yield self.p.index[i][-1]
137 152 except:
138 153 self.p.load(i)
139 yield self.p.index[i][6]
154 yield self.p.index[i][-1]
140 155 def __getitem__(self, key):
141 156 try:
142 157 return self.p.map[key]
@@ -178,7 +193,7 b' class revlog(object):'
178 193 remove data, and can use some simple techniques to avoid the need
179 194 for locking while reading.
180 195 """
181 def __init__(self, opener, indexfile, datafile):
196 def __init__(self, opener, indexfile, datafile, defversion=0):
182 197 """
183 198 create a revlog object
184 199
@@ -192,11 +207,14 b' class revlog(object):'
192 207 self.indexstat = None
193 208 self.cache = None
194 209 self.chunkcache = None
210 self.defversion = defversion
195 211 self.load()
196 212
197 213 def load(self):
214 v = self.defversion
198 215 try:
199 216 f = self.opener(self.indexfile)
217 i = f.read()
200 218 except IOError, inst:
201 219 if inst.errno != errno.ENOENT:
202 220 raise
@@ -213,56 +231,103 b' class revlog(object):'
213 231 and st.st_mtime == oldst.st_mtime
214 232 and st.st_ctime == oldst.st_ctime):
215 233 return
216 self.indexstat = st
217 i = f.read()
234 self.indexstat = st
235 if len(i) > 0:
236 v = struct.unpack(versionformat, i[:4])[0]
237 if v != 0:
238 flags = v & ~0xFFFF
239 fmt = v & 0xFFFF
240 if fmt != REVLOGNG or (flags & ~(REVLOGNGINLINEDATA)):
241 raise RevlogError(
242 _("unknown version format %d or flags %x on %s") %
243 (v, flags, self.indexfile))
244 self.version = v
245 if v == 0:
246 self.indexformat = indexformatv0
247 else:
248 self.indexformat = indexformatng
218 249
219 if i and i[:4] != "\0\0\0\0":
220 raise RevlogError(_("incompatible revlog signature on %s") %
221 self.indexfile)
222
223 if len(i) > 10000:
224 # big index, let's parse it on demand
225 parser = lazyparser(i, self)
226 self.index = lazyindex(parser)
227 self.nodemap = lazymap(parser)
250 if i:
251 if st and st.st_size > 10000:
252 # big index, let's parse it on demand
253 parser = lazyparser(i, self, self.indexformat)
254 self.index = lazyindex(parser)
255 self.nodemap = lazymap(parser)
256 else:
257 self.parseindex(i)
258 if self.version != 0:
259 e = list(self.index[0])
260 type = self.ngtype(e[0])
261 e[0] = self.offset_type(0, type)
262 self.index[0] = e
228 263 else:
229 s = struct.calcsize(indexformat)
230 l = len(i) / s
231 self.index = [None] * l
232 m = [None] * l
264 self.nodemap = { nullid: -1}
265 self.index = []
266
267
268 def parseindex(self, data):
269 s = struct.calcsize(self.indexformat)
270 l = len(data)
271 self.index = []
272 self.nodemap = {nullid: -1}
273 off = 0
274 n = 0
275 while off < l:
276 e = struct.unpack(self.indexformat, data[off:off + s])
277 self.index.append(e)
278 self.nodemap[e[-1]] = n
279 n += 1
280 off += s
233 281
234 n = 0
235 for f in xrange(0, l * s, s):
236 # offset, size, base, linkrev, p1, p2, nodeid
237 e = struct.unpack(indexformat, i[f:f + s])
238 m[n] = (e[6], n)
239 self.index[n] = e
240 n += 1
282 def ngoffset(self, q):
283 if q & 0xFFFF:
284 raise RevlogError(_('%s: incompatible revision flag %x') %
285 (self.indexfile, type))
286 return long(q >> 16)
287
288 def ngtype(self, q):
289 return int(q & 0xFFFF)
241 290
242 self.nodemap = dict(m)
243 self.nodemap[nullid] = -1
291 def offset_type(self, offset, type):
292 return long(long(offset) << 16 | type)
293
294 def loadindexmap(self):
295 """loads both the map and the index from the lazy parser"""
296 if isinstance(self.index, lazyindex):
297 p = self.index.p
298 p.load()
244 299
245 300 def tip(self): return self.node(len(self.index) - 1)
246 301 def count(self): return len(self.index)
247 def node(self, rev): return (rev < 0) and nullid or self.index[rev][6]
302 def node(self, rev):
303 return (rev < 0) and nullid or self.index[rev][-1]
248 304 def rev(self, node):
249 305 try:
250 306 return self.nodemap[node]
251 307 except KeyError:
252 308 raise RevlogError(_('%s: no node %s') % (self.indexfile, hex(node)))
253 def linkrev(self, node): return self.index[self.rev(node)][3]
309 def linkrev(self, node): return self.index[self.rev(node)][-4]
254 310 def parents(self, node):
255 311 if node == nullid: return (nullid, nullid)
256 return self.index[self.rev(node)][4:6]
312 r = self.rev(node)
313 d = self.index[r][-3:-1]
314 if self.version == 0:
315 return d
316 return [ self.node(x) for x in d ]
317 def start(self, rev):
318 if rev < 0:
319 return -1
320 if self.version != 0:
321 return self.ngoffset(self.index[rev][0])
322 return self.index[rev][0]
323 def end(self, rev): return self.start(rev) + self.length(rev)
257 324
258 def start(self, rev): return (rev < 0) and -1 or self.index[rev][0]
259 325 def length(self, rev):
260 326 if rev < 0:
261 327 return 0
262 328 else:
263 329 return self.index[rev][1]
264 def end(self, rev): return self.start(rev) + self.length(rev)
265 def base(self, rev): return (rev < 0) and rev or self.index[rev][2]
330 def base(self, rev): return (rev < 0) and rev or self.index[rev][-5]
266 331
267 332 def reachable(self, rev, stop=None):
268 333 reachable = {}
@@ -501,18 +566,18 b' class revlog(object):'
501 566 """apply a list of patches to a string"""
502 567 return mdiff.patches(t, pl)
503 568
504 def chunk(self, rev):
569 def chunk(self, rev, df=None, cachelen=4096):
505 570 start, length = self.start(rev), self.length(rev)
506 571 end = start + length
507
508 def loadcache():
509 cache_length = max(4096 * 1024, length) # 4Mo
510 df = self.opener(self.datafile)
572 def loadcache(df):
573 cache_length = max(cachelen, length) # 4k
574 if not df:
575 df = self.opener(self.datafile)
511 576 df.seek(start)
512 577 self.chunkcache = (start, df.read(cache_length))
513 578
514 579 if not self.chunkcache:
515 loadcache()
580 loadcache(df)
516 581
517 582 cache_start = self.chunkcache[0]
518 583 cache_end = cache_start + len(self.chunkcache[1])
@@ -520,7 +585,7 b' class revlog(object):'
520 585 # it is cached
521 586 offset = start - cache_start
522 587 else:
523 loadcache()
588 loadcache(df)
524 589 offset = 0
525 590
526 591 #def checkchunk():
@@ -555,16 +620,18 b' class revlog(object):'
555 620 rev = self.rev(node)
556 621 base = self.base(rev)
557 622
623 df = self.opener(self.datafile)
624
558 625 # do we have useful data cached?
559 626 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
560 627 base = self.cache[1]
561 628 text = self.cache[2]
562 629 else:
563 text = self.chunk(base)
630 text = self.chunk(base, df=df)
564 631
565 632 bins = []
566 633 for r in xrange(base + 1, rev + 1):
567 bins.append(self.chunk(r))
634 bins.append(self.chunk(r, df=df))
568 635
569 636 text = self.patches(text, bins)
570 637
@@ -621,19 +688,30 b' class revlog(object):'
621 688 if t >= 0:
622 689 offset = self.end(t)
623 690
624 e = (offset, l, base, link, p1, p2, node)
691 if self.version == 0:
692 e = (offset, l, base, link, p1, p2, node)
693 else:
694 e = (self.offset_type(offset, 0), l, len(text),
695 base, link, self.rev(p1), self.rev(p2), node)
625 696
626 697 self.index.append(e)
627 698 self.nodemap[node] = n
628 entry = struct.pack(indexformat, *e)
699 entry = struct.pack(self.indexformat, *e)
629 700
630 transaction.add(self.datafile, e[0])
701 transaction.add(self.datafile, offset)
702 transaction.add(self.indexfile, n * len(entry))
631 703 f = self.opener(self.datafile, "a")
632 704 if data[0]:
633 705 f.write(data[0])
634 706 f.write(data[1])
635 transaction.add(self.indexfile, n * len(entry))
636 self.opener(self.indexfile, "a").write(entry)
707 f = self.opener(self.indexfile, "a")
708
709 if len(self.index) == 1 and self.version != 0:
710 l = struct.pack(versionformat, self.version)
711 f.write(l)
712 entry = entry[4:]
713
714 f.write(entry)
637 715
638 716 self.cache = (node, n, text)
639 717 return node
@@ -748,16 +826,12 b' class revlog(object):'
748 826 base = prev = -1
749 827 start = end = measure = 0
750 828 if r:
751 base = self.base(t)
752 start = self.start(base)
753 829 end = self.end(t)
754 measure = self.length(base)
755 prev = self.tip()
756 830
831 ifh = self.opener(self.indexfile, "a+")
832 transaction.add(self.indexfile, ifh.tell())
757 833 transaction.add(self.datafile, end)
758 transaction.add(self.indexfile, r * struct.calcsize(indexformat))
759 834 dfh = self.opener(self.datafile, "a")
760 ifh = self.opener(self.indexfile, "a")
761 835
762 836 # loop through our set of deltas
763 837 chain = None
@@ -794,7 +868,8 b' class revlog(object):'
794 868
795 869 if chain != prev or (end - start + len(cdelta)) > measure * 2:
796 870 # flush our writes here so we can read it in revision
797 dfh.flush()
871 if dfh:
872 dfh.flush()
798 873 ifh.flush()
799 874 text = self.revision(chain)
800 875 text = self.patches(text, [delta])
@@ -803,19 +878,21 b' class revlog(object):'
803 878 raise RevlogError(_("consistency error adding group"))
804 879 measure = len(text)
805 880 else:
806 e = (end, len(cdelta), base, link, p1, p2, node)
881 if self.version == 0:
882 e = (end, len(cdelta), base, link, p1, p2, node)
883 else:
884 e = (self.offset_type(end, 0), len(cdelta), -1, base,
885 link, self.rev(p1), self.rev(p2), node)
807 886 self.index.append(e)
808 887 self.nodemap[node] = r
809 888 dfh.write(cdelta)
810 ifh.write(struct.pack(indexformat, *e))
889 ifh.write(struct.pack(self.indexformat, *e))
811 890
812 891 t, r, chain, prev = r, r + 1, node, node
813 892 base = self.base(t)
814 893 start = self.start(base)
815 894 end = self.end(t)
816 895
817 dfh.close()
818 ifh.close()
819 896 if node is None:
820 897 raise RevlogError(_("group to be added is empty"))
821 898 return node
@@ -824,32 +901,34 b' class revlog(object):'
824 901 if self.count() == 0 or rev >= self.count():
825 902 return
826 903
904 if isinstance(self.index, lazyindex):
905 self.loadindexmap()
906
827 907 # When stripping away a revision, we need to make sure it
828 908 # does not actually belong to an older changeset.
829 909 # The minlink parameter defines the oldest revision
830 910 # we're allowed to strip away.
831 while minlink > self.index[rev][3]:
911 while minlink > self.index[rev][-4]:
832 912 rev += 1
833 913 if rev >= self.count():
834 914 return
835 915
836 916 # first truncate the files on disk
837 917 end = self.start(rev)
838 self.opener(self.datafile, "a").truncate(end)
839 end = rev * struct.calcsize(indexformat)
840 self.opener(self.indexfile, "a").truncate(end)
918 df = self.opener(self.datafile, "a")
919 df.truncate(end)
920 end = rev * struct.calcsize(self.indexformat)
921
922 indexf = self.opener(self.indexfile, "a")
923 indexf.truncate(end)
841 924
842 925 # then reset internal state in memory to forget those revisions
843 926 self.cache = None
844 927 self.chunkcache = None
845 for p in self.index[rev:]:
846 del self.nodemap[p[6]]
847 del self.index[rev:]
928 for x in xrange(rev, self.count()):
929 del self.nodemap[self.node(x)]
848 930
849 # truncating the lazyindex also truncates the lazymap.
850 if isinstance(self.index, lazyindex):
851 self.index.trunc(end)
852
931 del self.index[rev:]
853 932
854 933 def checksize(self):
855 934 expected = 0
@@ -870,7 +949,7 b' class revlog(object):'
870 949 f = self.opener(self.indexfile)
871 950 f.seek(0, 2)
872 951 actual = f.tell()
873 s = struct.calcsize(indexformat)
952 s = struct.calcsize(self.indexformat)
874 953 i = actual / s
875 954 di = actual - (i * s)
876 955 except IOError, inst:
@@ -32,6 +32,7 b' class statichttprepository(localrepo.loc'
32 32 def __init__(self, ui, path):
33 33 self.path = (path + "/.hg")
34 34 self.ui = ui
35 self.revlogversion = 0
35 36 self.opener = opener(self.path)
36 37 self.manifest = manifest.manifest(self.opener)
37 38 self.changelog = changelog.changelog(self.opener)
@@ -29,6 +29,7 b' class ui(object):'
29 29 self.diffcache = None
30 30 self.header = []
31 31 self.prev_header = []
32 self.revlogopts = self.configrevlog()
32 33 else:
33 34 # parentui may point to an ui object which is already a child
34 35 self.parentui = parentui.parentui or parentui
@@ -134,6 +135,12 b' class ui(object):'
134 135 result.append(path)
135 136 return result
136 137
138 def configrevlog(self):
139 ret = {}
140 for x in self.configitems("revlog"):
141 k = x[0].lower()
142 ret[k] = x[1]
143 return ret
137 144 def diffopts(self):
138 145 if self.diffcache:
139 146 return self.diffcache
General Comments 0
You need to be logged in to leave comments. Login now