##// END OF EJS Templates
revlog: speed up chunkcache...
Matt Mackall -
r4988:14486eea default
parent child Browse files
Show More
@@ -1,251 +1,251 b''
1 """
1 """
2 bundlerepo.py - repository class for viewing uncompressed bundles
2 bundlerepo.py - repository class for viewing uncompressed bundles
3
3
4 This provides a read-only repository interface to bundles as if
4 This provides a read-only repository interface to bundles as if
5 they were part of the actual repository.
5 they were part of the actual repository.
6
6
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import changegroup, util, os, struct, bz2, tempfile
15 import changegroup, util, os, struct, bz2, tempfile
16
16
17 import localrepo, changelog, manifest, filelog, revlog
17 import localrepo, changelog, manifest, filelog, revlog
18
18
19 class bundlerevlog(revlog.revlog):
19 class bundlerevlog(revlog.revlog):
20 def __init__(self, opener, indexfile, bundlefile,
20 def __init__(self, opener, indexfile, bundlefile,
21 linkmapper=None):
21 linkmapper=None):
22 # How it works:
22 # How it works:
23 # to retrieve a revision, we need to know the offset of
23 # to retrieve a revision, we need to know the offset of
24 # the revision in the bundlefile (an opened file).
24 # the revision in the bundlefile (an opened file).
25 #
25 #
26 # We store this offset in the index (start), to differentiate a
26 # We store this offset in the index (start), to differentiate a
27 # rev in the bundle and from a rev in the revlog, we check
27 # rev in the bundle and from a rev in the revlog, we check
28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
29 # (it is bigger since we store the node to which the delta is)
29 # (it is bigger since we store the node to which the delta is)
30 #
30 #
31 revlog.revlog.__init__(self, opener, indexfile)
31 revlog.revlog.__init__(self, opener, indexfile)
32 self.bundlefile = bundlefile
32 self.bundlefile = bundlefile
33 self.basemap = {}
33 self.basemap = {}
34 def chunkpositer():
34 def chunkpositer():
35 for chunk in changegroup.chunkiter(bundlefile):
35 for chunk in changegroup.chunkiter(bundlefile):
36 pos = bundlefile.tell()
36 pos = bundlefile.tell()
37 yield chunk, pos - len(chunk)
37 yield chunk, pos - len(chunk)
38 n = self.count()
38 n = self.count()
39 prev = None
39 prev = None
40 for chunk, start in chunkpositer():
40 for chunk, start in chunkpositer():
41 size = len(chunk)
41 size = len(chunk)
42 if size < 80:
42 if size < 80:
43 raise util.Abort("invalid changegroup")
43 raise util.Abort("invalid changegroup")
44 start += 80
44 start += 80
45 size -= 80
45 size -= 80
46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
47 if node in self.nodemap:
47 if node in self.nodemap:
48 prev = node
48 prev = node
49 continue
49 continue
50 for p in (p1, p2):
50 for p in (p1, p2):
51 if not p in self.nodemap:
51 if not p in self.nodemap:
52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
53 if linkmapper is None:
53 if linkmapper is None:
54 link = n
54 link = n
55 else:
55 else:
56 link = linkmapper(cs)
56 link = linkmapper(cs)
57
57
58 if not prev:
58 if not prev:
59 prev = p1
59 prev = p1
60 # start, size, base is not used, link, p1, p2, delta ref
60 # start, size, base is not used, link, p1, p2, delta ref
61 e = (revlog.offset_type(start, 0), size, -1, None, link,
61 e = (revlog.offset_type(start, 0), size, -1, None, link,
62 self.rev(p1), self.rev(p2), node)
62 self.rev(p1), self.rev(p2), node)
63 self.basemap[n] = prev
63 self.basemap[n] = prev
64 self.index.insert(-1, e)
64 self.index.insert(-1, e)
65 self.nodemap[node] = n
65 self.nodemap[node] = n
66 prev = node
66 prev = node
67 n += 1
67 n += 1
68
68
69 def bundle(self, rev):
69 def bundle(self, rev):
70 """is rev from the bundle"""
70 """is rev from the bundle"""
71 if rev < 0:
71 if rev < 0:
72 return False
72 return False
73 return rev in self.basemap
73 return rev in self.basemap
74 def bundlebase(self, rev): return self.basemap[rev]
74 def bundlebase(self, rev): return self.basemap[rev]
75 def chunk(self, rev, df=None, cachelen=4096):
75 def chunk(self, rev, df=None, cachelen=4096):
76 # Warning: in case of bundle, the diff is against bundlebase,
76 # Warning: in case of bundle, the diff is against bundlebase,
77 # not against rev - 1
77 # not against rev - 1
78 # XXX: could use some caching
78 # XXX: could use some caching
79 if not self.bundle(rev):
79 if not self.bundle(rev):
80 return revlog.revlog.chunk(self, rev, df, cachelen)
80 return revlog.revlog.chunk(self, rev, df)
81 self.bundlefile.seek(self.start(rev))
81 self.bundlefile.seek(self.start(rev))
82 return self.bundlefile.read(self.length(rev))
82 return self.bundlefile.read(self.length(rev))
83
83
84 def revdiff(self, rev1, rev2):
84 def revdiff(self, rev1, rev2):
85 """return or calculate a delta between two revisions"""
85 """return or calculate a delta between two revisions"""
86 if self.bundle(rev1) and self.bundle(rev2):
86 if self.bundle(rev1) and self.bundle(rev2):
87 # hot path for bundle
87 # hot path for bundle
88 revb = self.rev(self.bundlebase(rev2))
88 revb = self.rev(self.bundlebase(rev2))
89 if revb == rev1:
89 if revb == rev1:
90 return self.chunk(rev2)
90 return self.chunk(rev2)
91 elif not self.bundle(rev1) and not self.bundle(rev2):
91 elif not self.bundle(rev1) and not self.bundle(rev2):
92 return revlog.revlog.revdiff(self, rev1, rev2)
92 return revlog.revlog.revdiff(self, rev1, rev2)
93
93
94 return self.diff(self.revision(self.node(rev1)),
94 return self.diff(self.revision(self.node(rev1)),
95 self.revision(self.node(rev2)))
95 self.revision(self.node(rev2)))
96
96
97 def revision(self, node):
97 def revision(self, node):
98 """return an uncompressed revision of a given"""
98 """return an uncompressed revision of a given"""
99 if node == nullid: return ""
99 if node == nullid: return ""
100
100
101 text = None
101 text = None
102 chain = []
102 chain = []
103 iter_node = node
103 iter_node = node
104 rev = self.rev(iter_node)
104 rev = self.rev(iter_node)
105 # reconstruct the revision if it is from a changegroup
105 # reconstruct the revision if it is from a changegroup
106 while self.bundle(rev):
106 while self.bundle(rev):
107 if self._cache and self._cache[0] == iter_node:
107 if self._cache and self._cache[0] == iter_node:
108 text = self._cache[2]
108 text = self._cache[2]
109 break
109 break
110 chain.append(rev)
110 chain.append(rev)
111 iter_node = self.bundlebase(rev)
111 iter_node = self.bundlebase(rev)
112 rev = self.rev(iter_node)
112 rev = self.rev(iter_node)
113 if text is None:
113 if text is None:
114 text = revlog.revlog.revision(self, iter_node)
114 text = revlog.revlog.revision(self, iter_node)
115
115
116 while chain:
116 while chain:
117 delta = self.chunk(chain.pop())
117 delta = self.chunk(chain.pop())
118 text = self.patches(text, [delta])
118 text = self.patches(text, [delta])
119
119
120 p1, p2 = self.parents(node)
120 p1, p2 = self.parents(node)
121 if node != revlog.hash(text, p1, p2):
121 if node != revlog.hash(text, p1, p2):
122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
123 % (self.datafile, self.rev(node)))
123 % (self.datafile, self.rev(node)))
124
124
125 self._cache = (node, self.rev(node), text)
125 self._cache = (node, self.rev(node), text)
126 return text
126 return text
127
127
128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 raise NotImplementedError
129 raise NotImplementedError
130 def addgroup(self, revs, linkmapper, transaction, unique=0):
130 def addgroup(self, revs, linkmapper, transaction, unique=0):
131 raise NotImplementedError
131 raise NotImplementedError
132 def strip(self, rev, minlink):
132 def strip(self, rev, minlink):
133 raise NotImplementedError
133 raise NotImplementedError
134 def checksize(self):
134 def checksize(self):
135 raise NotImplementedError
135 raise NotImplementedError
136
136
137 class bundlechangelog(bundlerevlog, changelog.changelog):
137 class bundlechangelog(bundlerevlog, changelog.changelog):
138 def __init__(self, opener, bundlefile):
138 def __init__(self, opener, bundlefile):
139 changelog.changelog.__init__(self, opener)
139 changelog.changelog.__init__(self, opener)
140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
141
141
142 class bundlemanifest(bundlerevlog, manifest.manifest):
142 class bundlemanifest(bundlerevlog, manifest.manifest):
143 def __init__(self, opener, bundlefile, linkmapper):
143 def __init__(self, opener, bundlefile, linkmapper):
144 manifest.manifest.__init__(self, opener)
144 manifest.manifest.__init__(self, opener)
145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
146 linkmapper)
146 linkmapper)
147
147
148 class bundlefilelog(bundlerevlog, filelog.filelog):
148 class bundlefilelog(bundlerevlog, filelog.filelog):
149 def __init__(self, opener, path, bundlefile, linkmapper):
149 def __init__(self, opener, path, bundlefile, linkmapper):
150 filelog.filelog.__init__(self, opener, path)
150 filelog.filelog.__init__(self, opener, path)
151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
152 linkmapper)
152 linkmapper)
153
153
154 class bundlerepository(localrepo.localrepository):
154 class bundlerepository(localrepo.localrepository):
155 def __init__(self, ui, path, bundlename):
155 def __init__(self, ui, path, bundlename):
156 localrepo.localrepository.__init__(self, ui, path)
156 localrepo.localrepository.__init__(self, ui, path)
157
157
158 self._url = 'bundle:' + bundlename
158 self._url = 'bundle:' + bundlename
159 if path: self._url += '+' + path
159 if path: self._url += '+' + path
160
160
161 self.tempfile = None
161 self.tempfile = None
162 self.bundlefile = open(bundlename, "rb")
162 self.bundlefile = open(bundlename, "rb")
163 header = self.bundlefile.read(6)
163 header = self.bundlefile.read(6)
164 if not header.startswith("HG"):
164 if not header.startswith("HG"):
165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
166 elif not header.startswith("HG10"):
166 elif not header.startswith("HG10"):
167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
168 elif header == "HG10BZ":
168 elif header == "HG10BZ":
169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
170 suffix=".hg10un", dir=self.path)
170 suffix=".hg10un", dir=self.path)
171 self.tempfile = temp
171 self.tempfile = temp
172 fptemp = os.fdopen(fdtemp, 'wb')
172 fptemp = os.fdopen(fdtemp, 'wb')
173 def generator(f):
173 def generator(f):
174 zd = bz2.BZ2Decompressor()
174 zd = bz2.BZ2Decompressor()
175 zd.decompress("BZ")
175 zd.decompress("BZ")
176 for chunk in f:
176 for chunk in f:
177 yield zd.decompress(chunk)
177 yield zd.decompress(chunk)
178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
179
179
180 try:
180 try:
181 fptemp.write("HG10UN")
181 fptemp.write("HG10UN")
182 for chunk in gen:
182 for chunk in gen:
183 fptemp.write(chunk)
183 fptemp.write(chunk)
184 finally:
184 finally:
185 fptemp.close()
185 fptemp.close()
186 self.bundlefile.close()
186 self.bundlefile.close()
187
187
188 self.bundlefile = open(self.tempfile, "rb")
188 self.bundlefile = open(self.tempfile, "rb")
189 # seek right after the header
189 # seek right after the header
190 self.bundlefile.seek(6)
190 self.bundlefile.seek(6)
191 elif header == "HG10UN":
191 elif header == "HG10UN":
192 # nothing to do
192 # nothing to do
193 pass
193 pass
194 else:
194 else:
195 raise util.Abort(_("%s: unknown bundle compression type")
195 raise util.Abort(_("%s: unknown bundle compression type")
196 % bundlename)
196 % bundlename)
197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
199 self.changelog.rev)
199 self.changelog.rev)
200 # dict with the mapping 'filename' -> position in the bundle
200 # dict with the mapping 'filename' -> position in the bundle
201 self.bundlefilespos = {}
201 self.bundlefilespos = {}
202 while 1:
202 while 1:
203 f = changegroup.getchunk(self.bundlefile)
203 f = changegroup.getchunk(self.bundlefile)
204 if not f:
204 if not f:
205 break
205 break
206 self.bundlefilespos[f] = self.bundlefile.tell()
206 self.bundlefilespos[f] = self.bundlefile.tell()
207 for c in changegroup.chunkiter(self.bundlefile):
207 for c in changegroup.chunkiter(self.bundlefile):
208 pass
208 pass
209
209
210 def url(self):
210 def url(self):
211 return self._url
211 return self._url
212
212
213 def dev(self):
213 def dev(self):
214 return -1
214 return -1
215
215
216 def file(self, f):
216 def file(self, f):
217 if f[0] == '/':
217 if f[0] == '/':
218 f = f[1:]
218 f = f[1:]
219 if f in self.bundlefilespos:
219 if f in self.bundlefilespos:
220 self.bundlefile.seek(self.bundlefilespos[f])
220 self.bundlefile.seek(self.bundlefilespos[f])
221 return bundlefilelog(self.sopener, f, self.bundlefile,
221 return bundlefilelog(self.sopener, f, self.bundlefile,
222 self.changelog.rev)
222 self.changelog.rev)
223 else:
223 else:
224 return filelog.filelog(self.sopener, f)
224 return filelog.filelog(self.sopener, f)
225
225
226 def close(self):
226 def close(self):
227 """Close assigned bundle file immediately."""
227 """Close assigned bundle file immediately."""
228 self.bundlefile.close()
228 self.bundlefile.close()
229
229
230 def __del__(self):
230 def __del__(self):
231 bundlefile = getattr(self, 'bundlefile', None)
231 bundlefile = getattr(self, 'bundlefile', None)
232 if bundlefile and not bundlefile.closed:
232 if bundlefile and not bundlefile.closed:
233 bundlefile.close()
233 bundlefile.close()
234 tempfile = getattr(self, 'tempfile', None)
234 tempfile = getattr(self, 'tempfile', None)
235 if tempfile is not None:
235 if tempfile is not None:
236 os.unlink(tempfile)
236 os.unlink(tempfile)
237
237
238 def instance(ui, path, create):
238 def instance(ui, path, create):
239 if create:
239 if create:
240 raise util.Abort(_('cannot create new bundle repository'))
240 raise util.Abort(_('cannot create new bundle repository'))
241 path = util.drop_scheme('file', path)
241 path = util.drop_scheme('file', path)
242 if path.startswith('bundle:'):
242 if path.startswith('bundle:'):
243 path = util.drop_scheme('bundle', path)
243 path = util.drop_scheme('bundle', path)
244 s = path.split("+", 1)
244 s = path.split("+", 1)
245 if len(s) == 1:
245 if len(s) == 1:
246 repopath, bundlename = "", s[0]
246 repopath, bundlename = "", s[0]
247 else:
247 else:
248 repopath, bundlename = s
248 repopath, bundlename = s
249 else:
249 else:
250 repopath, bundlename = '', path
250 repopath, bundlename = '', path
251 return bundlerepository(ui, repopath, bundlename)
251 return bundlerepository(ui, repopath, bundlename)
@@ -1,1253 +1,1257 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 # revlog flags
18 # revlog flags
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGNG = 1
20 REVLOGNG = 1
21 REVLOGNGINLINEDATA = (1 << 16)
21 REVLOGNGINLINEDATA = (1 << 16)
22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 REVLOG_DEFAULT_FORMAT = REVLOGNG
23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25
25
26 class RevlogError(Exception):
26 class RevlogError(Exception):
27 pass
27 pass
28 class LookupError(RevlogError):
28 class LookupError(RevlogError):
29 pass
29 pass
30
30
31 def getoffset(q):
31 def getoffset(q):
32 if q & 0xFFFF:
32 if q & 0xFFFF:
33 raise RevlogError(_('incompatible revision flag %x') % q)
33 raise RevlogError(_('incompatible revision flag %x') % q)
34 return int(q >> 16)
34 return int(q >> 16)
35
35
36 def gettype(q):
36 def gettype(q):
37 return int(q & 0xFFFF)
37 return int(q & 0xFFFF)
38
38
39 def offset_type(offset, type):
39 def offset_type(offset, type):
40 return long(long(offset) << 16 | type)
40 return long(long(offset) << 16 | type)
41
41
42 def hash(text, p1, p2):
42 def hash(text, p1, p2):
43 """generate a hash from the given text and its parent hashes
43 """generate a hash from the given text and its parent hashes
44
44
45 This hash combines both the current file contents and its history
45 This hash combines both the current file contents and its history
46 in a manner that makes it easy to distinguish nodes with the same
46 in a manner that makes it easy to distinguish nodes with the same
47 content in the revision graph.
47 content in the revision graph.
48 """
48 """
49 l = [p1, p2]
49 l = [p1, p2]
50 l.sort()
50 l.sort()
51 s = sha.new(l[0])
51 s = sha.new(l[0])
52 s.update(l[1])
52 s.update(l[1])
53 s.update(text)
53 s.update(text)
54 return s.digest()
54 return s.digest()
55
55
56 def compress(text):
56 def compress(text):
57 """ generate a possibly-compressed representation of text """
57 """ generate a possibly-compressed representation of text """
58 if not text:
58 if not text:
59 return ("", text)
59 return ("", text)
60 if len(text) < 44:
60 if len(text) < 44:
61 if text[0] == '\0':
61 if text[0] == '\0':
62 return ("", text)
62 return ("", text)
63 return ('u', text)
63 return ('u', text)
64 bin = zlib.compress(text)
64 bin = zlib.compress(text)
65 if len(bin) > len(text):
65 if len(bin) > len(text):
66 if text[0] == '\0':
66 if text[0] == '\0':
67 return ("", text)
67 return ("", text)
68 return ('u', text)
68 return ('u', text)
69 return ("", bin)
69 return ("", bin)
70
70
71 def decompress(bin):
71 def decompress(bin):
72 """ decompress the given input """
72 """ decompress the given input """
73 if not bin:
73 if not bin:
74 return bin
74 return bin
75 t = bin[0]
75 t = bin[0]
76 if t == '\0':
76 if t == '\0':
77 return bin
77 return bin
78 if t == 'x':
78 if t == 'x':
79 return zlib.decompress(bin)
79 return zlib.decompress(bin)
80 if t == 'u':
80 if t == 'u':
81 return bin[1:]
81 return bin[1:]
82 raise RevlogError(_("unknown compression type %r") % t)
82 raise RevlogError(_("unknown compression type %r") % t)
83
83
84 class lazyparser(object):
84 class lazyparser(object):
85 """
85 """
86 this class avoids the need to parse the entirety of large indices
86 this class avoids the need to parse the entirety of large indices
87 """
87 """
88
88
89 # lazyparser is not safe to use on windows if win32 extensions not
89 # lazyparser is not safe to use on windows if win32 extensions not
90 # available. it keeps file handle open, which make it not possible
90 # available. it keeps file handle open, which make it not possible
91 # to break hardlinks on local cloned repos.
91 # to break hardlinks on local cloned repos.
92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 hasattr(util, 'win32api'))
93 hasattr(util, 'win32api'))
94
94
95 def __init__(self, dataf, size):
95 def __init__(self, dataf, size):
96 self.dataf = dataf
96 self.dataf = dataf
97 self.s = struct.calcsize(indexformatng)
97 self.s = struct.calcsize(indexformatng)
98 self.datasize = size
98 self.datasize = size
99 self.l = size/self.s
99 self.l = size/self.s
100 self.index = [None] * self.l
100 self.index = [None] * self.l
101 self.map = {nullid: nullrev}
101 self.map = {nullid: nullrev}
102 self.allmap = 0
102 self.allmap = 0
103 self.all = 0
103 self.all = 0
104 self.mapfind_count = 0
104 self.mapfind_count = 0
105
105
106 def loadmap(self):
106 def loadmap(self):
107 """
107 """
108 during a commit, we need to make sure the rev being added is
108 during a commit, we need to make sure the rev being added is
109 not a duplicate. This requires loading the entire index,
109 not a duplicate. This requires loading the entire index,
110 which is fairly slow. loadmap can load up just the node map,
110 which is fairly slow. loadmap can load up just the node map,
111 which takes much less time.
111 which takes much less time.
112 """
112 """
113 if self.allmap:
113 if self.allmap:
114 return
114 return
115 end = self.datasize
115 end = self.datasize
116 self.allmap = 1
116 self.allmap = 1
117 cur = 0
117 cur = 0
118 count = 0
118 count = 0
119 blocksize = self.s * 256
119 blocksize = self.s * 256
120 self.dataf.seek(0)
120 self.dataf.seek(0)
121 while cur < end:
121 while cur < end:
122 data = self.dataf.read(blocksize)
122 data = self.dataf.read(blocksize)
123 off = 0
123 off = 0
124 for x in xrange(256):
124 for x in xrange(256):
125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 self.map[n] = count
126 self.map[n] = count
127 count += 1
127 count += 1
128 if count >= self.l:
128 if count >= self.l:
129 break
129 break
130 off += self.s
130 off += self.s
131 cur += blocksize
131 cur += blocksize
132
132
133 def loadblock(self, blockstart, blocksize, data=None):
133 def loadblock(self, blockstart, blocksize, data=None):
134 if self.all:
134 if self.all:
135 return
135 return
136 if data is None:
136 if data is None:
137 self.dataf.seek(blockstart)
137 self.dataf.seek(blockstart)
138 if blockstart + blocksize > self.datasize:
138 if blockstart + blocksize > self.datasize:
139 # the revlog may have grown since we've started running,
139 # the revlog may have grown since we've started running,
140 # but we don't have space in self.index for more entries.
140 # but we don't have space in self.index for more entries.
141 # limit blocksize so that we don't get too much data.
141 # limit blocksize so that we don't get too much data.
142 blocksize = max(self.datasize - blockstart, 0)
142 blocksize = max(self.datasize - blockstart, 0)
143 data = self.dataf.read(blocksize)
143 data = self.dataf.read(blocksize)
144 lend = len(data) / self.s
144 lend = len(data) / self.s
145 i = blockstart / self.s
145 i = blockstart / self.s
146 off = 0
146 off = 0
147 # lazyindex supports __delitem__
147 # lazyindex supports __delitem__
148 if lend > len(self.index) - i:
148 if lend > len(self.index) - i:
149 lend = len(self.index) - i
149 lend = len(self.index) - i
150 for x in xrange(lend):
150 for x in xrange(lend):
151 if self.index[i + x] == None:
151 if self.index[i + x] == None:
152 b = data[off : off + self.s]
152 b = data[off : off + self.s]
153 self.index[i + x] = b
153 self.index[i + x] = b
154 n = b[ngshaoffset:ngshaoffset + 20]
154 n = b[ngshaoffset:ngshaoffset + 20]
155 self.map[n] = i + x
155 self.map[n] = i + x
156 off += self.s
156 off += self.s
157
157
158 def findnode(self, node):
158 def findnode(self, node):
159 """search backwards through the index file for a specific node"""
159 """search backwards through the index file for a specific node"""
160 if self.allmap:
160 if self.allmap:
161 return None
161 return None
162
162
163 # hg log will cause many many searches for the manifest
163 # hg log will cause many many searches for the manifest
164 # nodes. After we get called a few times, just load the whole
164 # nodes. After we get called a few times, just load the whole
165 # thing.
165 # thing.
166 if self.mapfind_count > 8:
166 if self.mapfind_count > 8:
167 self.loadmap()
167 self.loadmap()
168 if node in self.map:
168 if node in self.map:
169 return node
169 return node
170 return None
170 return None
171 self.mapfind_count += 1
171 self.mapfind_count += 1
172 last = self.l - 1
172 last = self.l - 1
173 while self.index[last] != None:
173 while self.index[last] != None:
174 if last == 0:
174 if last == 0:
175 self.all = 1
175 self.all = 1
176 self.allmap = 1
176 self.allmap = 1
177 return None
177 return None
178 last -= 1
178 last -= 1
179 end = (last + 1) * self.s
179 end = (last + 1) * self.s
180 blocksize = self.s * 256
180 blocksize = self.s * 256
181 while end >= 0:
181 while end >= 0:
182 start = max(end - blocksize, 0)
182 start = max(end - blocksize, 0)
183 self.dataf.seek(start)
183 self.dataf.seek(start)
184 data = self.dataf.read(end - start)
184 data = self.dataf.read(end - start)
185 findend = end - start
185 findend = end - start
186 while True:
186 while True:
187 # we're searching backwards, so weh have to make sure
187 # we're searching backwards, so weh have to make sure
188 # we don't find a changeset where this node is a parent
188 # we don't find a changeset where this node is a parent
189 off = data.rfind(node, 0, findend)
189 off = data.rfind(node, 0, findend)
190 findend = off
190 findend = off
191 if off >= 0:
191 if off >= 0:
192 i = off / self.s
192 i = off / self.s
193 off = i * self.s
193 off = i * self.s
194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 if n == node:
195 if n == node:
196 self.map[n] = i + start / self.s
196 self.map[n] = i + start / self.s
197 return node
197 return node
198 else:
198 else:
199 break
199 break
200 end -= blocksize
200 end -= blocksize
201 return None
201 return None
202
202
203 def loadindex(self, i=None, end=None):
203 def loadindex(self, i=None, end=None):
204 if self.all:
204 if self.all:
205 return
205 return
206 all = False
206 all = False
207 if i == None:
207 if i == None:
208 blockstart = 0
208 blockstart = 0
209 blocksize = (512 / self.s) * self.s
209 blocksize = (512 / self.s) * self.s
210 end = self.datasize
210 end = self.datasize
211 all = True
211 all = True
212 else:
212 else:
213 if end:
213 if end:
214 blockstart = i * self.s
214 blockstart = i * self.s
215 end = end * self.s
215 end = end * self.s
216 blocksize = end - blockstart
216 blocksize = end - blockstart
217 else:
217 else:
218 blockstart = (i & ~63) * self.s
218 blockstart = (i & ~63) * self.s
219 blocksize = self.s * 64
219 blocksize = self.s * 64
220 end = blockstart + blocksize
220 end = blockstart + blocksize
221 while blockstart < end:
221 while blockstart < end:
222 self.loadblock(blockstart, blocksize)
222 self.loadblock(blockstart, blocksize)
223 blockstart += blocksize
223 blockstart += blocksize
224 if all:
224 if all:
225 self.all = True
225 self.all = True
226
226
227 class lazyindex(object):
227 class lazyindex(object):
228 """a lazy version of the index array"""
228 """a lazy version of the index array"""
229 def __init__(self, parser):
229 def __init__(self, parser):
230 self.p = parser
230 self.p = parser
231 def __len__(self):
231 def __len__(self):
232 return len(self.p.index)
232 return len(self.p.index)
233 def load(self, pos):
233 def load(self, pos):
234 if pos < 0:
234 if pos < 0:
235 pos += len(self.p.index)
235 pos += len(self.p.index)
236 self.p.loadindex(pos)
236 self.p.loadindex(pos)
237 return self.p.index[pos]
237 return self.p.index[pos]
238 def __getitem__(self, pos):
238 def __getitem__(self, pos):
239 ret = self.p.index[pos] or self.load(pos)
239 ret = self.p.index[pos] or self.load(pos)
240 if isinstance(ret, str):
240 if isinstance(ret, str):
241 ret = struct.unpack(indexformatng, ret)
241 ret = struct.unpack(indexformatng, ret)
242 return ret
242 return ret
243 def __setitem__(self, pos, item):
243 def __setitem__(self, pos, item):
244 self.p.index[pos] = item
244 self.p.index[pos] = item
245 def __delitem__(self, pos):
245 def __delitem__(self, pos):
246 del self.p.index[pos]
246 del self.p.index[pos]
247 def insert(self, pos, e):
247 def insert(self, pos, e):
248 self.p.index.insert(pos, e)
248 self.p.index.insert(pos, e)
249 def append(self, e):
249 def append(self, e):
250 self.p.index.append(e)
250 self.p.index.append(e)
251
251
252 class lazymap(object):
252 class lazymap(object):
253 """a lazy version of the node map"""
253 """a lazy version of the node map"""
254 def __init__(self, parser):
254 def __init__(self, parser):
255 self.p = parser
255 self.p = parser
256 def load(self, key):
256 def load(self, key):
257 n = self.p.findnode(key)
257 n = self.p.findnode(key)
258 if n == None:
258 if n == None:
259 raise KeyError(key)
259 raise KeyError(key)
260 def __contains__(self, key):
260 def __contains__(self, key):
261 if key in self.p.map:
261 if key in self.p.map:
262 return True
262 return True
263 self.p.loadmap()
263 self.p.loadmap()
264 return key in self.p.map
264 return key in self.p.map
265 def __iter__(self):
265 def __iter__(self):
266 yield nullid
266 yield nullid
267 for i in xrange(self.p.l):
267 for i in xrange(self.p.l):
268 ret = self.p.index[i]
268 ret = self.p.index[i]
269 if not ret:
269 if not ret:
270 self.p.loadindex(i)
270 self.p.loadindex(i)
271 ret = self.p.index[i]
271 ret = self.p.index[i]
272 if isinstance(ret, str):
272 if isinstance(ret, str):
273 ret = struct.unpack(indexformatng, ret)
273 ret = struct.unpack(indexformatng, ret)
274 yield ret[7]
274 yield ret[7]
275 def __getitem__(self, key):
275 def __getitem__(self, key):
276 try:
276 try:
277 return self.p.map[key]
277 return self.p.map[key]
278 except KeyError:
278 except KeyError:
279 try:
279 try:
280 self.load(key)
280 self.load(key)
281 return self.p.map[key]
281 return self.p.map[key]
282 except KeyError:
282 except KeyError:
283 raise KeyError("node " + hex(key))
283 raise KeyError("node " + hex(key))
284 def __setitem__(self, key, val):
284 def __setitem__(self, key, val):
285 self.p.map[key] = val
285 self.p.map[key] = val
286 def __delitem__(self, key):
286 def __delitem__(self, key):
287 del self.p.map[key]
287 del self.p.map[key]
288
288
289 indexformatv0 = ">4l20s20s20s"
289 indexformatv0 = ">4l20s20s20s"
290 v0shaoffset = 56
290 v0shaoffset = 56
291
291
292 class revlogoldio(object):
292 class revlogoldio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = struct.calcsize(indexformatv0)
294 self.size = struct.calcsize(indexformatv0)
295
295
296 def parseindex(self, fp, inline):
296 def parseindex(self, fp, inline):
297 s = self.size
297 s = self.size
298 index = []
298 index = []
299 nodemap = {nullid: nullrev}
299 nodemap = {nullid: nullrev}
300 n = off = 0
300 n = off = 0
301 data = fp.read()
301 data = fp.read()
302 l = len(data)
302 l = len(data)
303 while off + s <= l:
303 while off + s <= l:
304 cur = data[off:off + s]
304 cur = data[off:off + s]
305 off += s
305 off += s
306 e = struct.unpack(indexformatv0, cur)
306 e = struct.unpack(indexformatv0, cur)
307 # transform to revlogv1 format
307 # transform to revlogv1 format
308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 nodemap[e[4]], nodemap[e[5]], e[6])
309 nodemap[e[4]], nodemap[e[5]], e[6])
310 index.append(e2)
310 index.append(e2)
311 nodemap[e[6]] = n
311 nodemap[e[6]] = n
312 n += 1
312 n += 1
313
313
314 return index, nodemap, None
314 return index, nodemap, None
315
315
316 def packentry(self, entry, node, version):
316 def packentry(self, entry, node, version):
317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 node(entry[5]), node(entry[6]), entry[7])
318 node(entry[5]), node(entry[6]), entry[7])
319 return struct.pack(indexformatv0, *e2)
319 return struct.pack(indexformatv0, *e2)
320
320
321 # index ng:
321 # index ng:
322 # 6 bytes offset
322 # 6 bytes offset
323 # 2 bytes flags
323 # 2 bytes flags
324 # 4 bytes compressed length
324 # 4 bytes compressed length
325 # 4 bytes uncompressed length
325 # 4 bytes uncompressed length
326 # 4 bytes: base rev
326 # 4 bytes: base rev
327 # 4 bytes link rev
327 # 4 bytes link rev
328 # 4 bytes parent 1 rev
328 # 4 bytes parent 1 rev
329 # 4 bytes parent 2 rev
329 # 4 bytes parent 2 rev
330 # 32 bytes: nodeid
330 # 32 bytes: nodeid
331 indexformatng = ">Qiiiiii20s12x"
331 indexformatng = ">Qiiiiii20s12x"
332 ngshaoffset = 32
332 ngshaoffset = 32
333 versionformat = ">I"
333 versionformat = ">I"
334
334
335 class revlogio(object):
335 class revlogio(object):
336 def __init__(self):
336 def __init__(self):
337 self.size = struct.calcsize(indexformatng)
337 self.size = struct.calcsize(indexformatng)
338
338
339 def parseindex(self, fp, inline):
339 def parseindex(self, fp, inline):
340 try:
340 try:
341 size = util.fstat(fp).st_size
341 size = util.fstat(fp).st_size
342 except AttributeError:
342 except AttributeError:
343 size = 0
343 size = 0
344
344
345 if lazyparser.safe_to_use and not inline and size > 1000000:
345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 # big index, let's parse it on demand
346 # big index, let's parse it on demand
347 parser = lazyparser(fp, size)
347 parser = lazyparser(fp, size)
348 index = lazyindex(parser)
348 index = lazyindex(parser)
349 nodemap = lazymap(parser)
349 nodemap = lazymap(parser)
350 e = list(index[0])
350 e = list(index[0])
351 type = gettype(e[0])
351 type = gettype(e[0])
352 e[0] = offset_type(0, type)
352 e[0] = offset_type(0, type)
353 index[0] = e
353 index[0] = e
354 return index, nodemap, None
354 return index, nodemap, None
355
355
356 s = self.size
356 s = self.size
357 cache = None
357 cache = None
358 index = []
358 index = []
359 nodemap = {nullid: nullrev}
359 nodemap = {nullid: nullrev}
360 n = off = 0
360 n = off = 0
361 # if we're not using lazymap, always read the whole index
361 # if we're not using lazymap, always read the whole index
362 data = fp.read()
362 data = fp.read()
363 l = len(data)
363 l = len(data)
364 if inline:
364 if inline:
365 cache = (0, data)
365 cache = (0, data)
366 while off + s <= l:
366 while off + s <= l:
367 e = struct.unpack(indexformatng, data[off:off + s])
367 e = struct.unpack(indexformatng, data[off:off + s])
368 index.append(e)
368 index.append(e)
369 nodemap[e[7]] = n
369 nodemap[e[7]] = n
370 n += 1
370 n += 1
371 off += s
371 off += s
372 if inline:
372 if inline:
373 if e[1] < 0:
373 if e[1] < 0:
374 break
374 break
375 off += e[1]
375 off += e[1]
376
376
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381
381
382 return index, nodemap, cache
382 return index, nodemap, cache
383
383
384 def packentry(self, entry, node, version):
384 def packentry(self, entry, node, version):
385 p = struct.pack(indexformatng, *entry)
385 p = struct.pack(indexformatng, *entry)
386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
387 p = struct.pack(versionformat, version) + p[4:]
387 p = struct.pack(versionformat, version) + p[4:]
388 return p
388 return p
389
389
390 class revlog(object):
390 class revlog(object):
391 """
391 """
392 the underlying revision storage object
392 the underlying revision storage object
393
393
394 A revlog consists of two parts, an index and the revision data.
394 A revlog consists of two parts, an index and the revision data.
395
395
396 The index is a file with a fixed record size containing
396 The index is a file with a fixed record size containing
397 information on each revision, includings its nodeid (hash), the
397 information on each revision, includings its nodeid (hash), the
398 nodeids of its parents, the position and offset of its data within
398 nodeids of its parents, the position and offset of its data within
399 the data file, and the revision it's based on. Finally, each entry
399 the data file, and the revision it's based on. Finally, each entry
400 contains a linkrev entry that can serve as a pointer to external
400 contains a linkrev entry that can serve as a pointer to external
401 data.
401 data.
402
402
403 The revision data itself is a linear collection of data chunks.
403 The revision data itself is a linear collection of data chunks.
404 Each chunk represents a revision and is usually represented as a
404 Each chunk represents a revision and is usually represented as a
405 delta against the previous chunk. To bound lookup time, runs of
405 delta against the previous chunk. To bound lookup time, runs of
406 deltas are limited to about 2 times the length of the original
406 deltas are limited to about 2 times the length of the original
407 version data. This makes retrieval of a version proportional to
407 version data. This makes retrieval of a version proportional to
408 its size, or O(1) relative to the number of revisions.
408 its size, or O(1) relative to the number of revisions.
409
409
410 Both pieces of the revlog are written to in an append-only
410 Both pieces of the revlog are written to in an append-only
411 fashion, which means we never need to rewrite a file to insert or
411 fashion, which means we never need to rewrite a file to insert or
412 remove data, and can use some simple techniques to avoid the need
412 remove data, and can use some simple techniques to avoid the need
413 for locking while reading.
413 for locking while reading.
414 """
414 """
415 def __init__(self, opener, indexfile):
415 def __init__(self, opener, indexfile):
416 """
416 """
417 create a revlog object
417 create a revlog object
418
418
419 opener is a function that abstracts the file opening operation
419 opener is a function that abstracts the file opening operation
420 and can be used to implement COW semantics or the like.
420 and can be used to implement COW semantics or the like.
421 """
421 """
422 self.indexfile = indexfile
422 self.indexfile = indexfile
423 self.datafile = indexfile[:-2] + ".d"
423 self.datafile = indexfile[:-2] + ".d"
424 self.opener = opener
424 self.opener = opener
425 self._cache = None
425 self._cache = None
426 self._chunkcache = None
426 self._chunkcache = None
427 self.nodemap = {nullid: nullrev}
427 self.nodemap = {nullid: nullrev}
428 self.index = []
428 self.index = []
429
429
430 v = REVLOG_DEFAULT_VERSION
430 v = REVLOG_DEFAULT_VERSION
431 if hasattr(opener, "defversion"):
431 if hasattr(opener, "defversion"):
432 v = opener.defversion
432 v = opener.defversion
433 if v & REVLOGNG:
433 if v & REVLOGNG:
434 v |= REVLOGNGINLINEDATA
434 v |= REVLOGNGINLINEDATA
435
435
436 i = ""
436 i = ""
437 try:
437 try:
438 f = self.opener(self.indexfile)
438 f = self.opener(self.indexfile)
439 i = f.read(4)
439 i = f.read(4)
440 f.seek(0)
440 f.seek(0)
441 if len(i) > 0:
441 if len(i) > 0:
442 v = struct.unpack(versionformat, i)[0]
442 v = struct.unpack(versionformat, i)[0]
443 except IOError, inst:
443 except IOError, inst:
444 if inst.errno != errno.ENOENT:
444 if inst.errno != errno.ENOENT:
445 raise
445 raise
446
446
447 self.version = v
447 self.version = v
448 self._inline = v & REVLOGNGINLINEDATA
448 self._inline = v & REVLOGNGINLINEDATA
449 flags = v & ~0xFFFF
449 flags = v & ~0xFFFF
450 fmt = v & 0xFFFF
450 fmt = v & 0xFFFF
451 if fmt == REVLOGV0 and flags:
451 if fmt == REVLOGV0 and flags:
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
453 % (self.indexfile, flags >> 16))
453 % (self.indexfile, flags >> 16))
454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
456 % (self.indexfile, flags >> 16))
456 % (self.indexfile, flags >> 16))
457 elif fmt > REVLOGNG:
457 elif fmt > REVLOGNG:
458 raise RevlogError(_("index %s unknown format %d")
458 raise RevlogError(_("index %s unknown format %d")
459 % (self.indexfile, fmt))
459 % (self.indexfile, fmt))
460
460
461 self._io = revlogio()
461 self._io = revlogio()
462 if self.version == REVLOGV0:
462 if self.version == REVLOGV0:
463 self._io = revlogoldio()
463 self._io = revlogoldio()
464 if i:
464 if i:
465 d = self._io.parseindex(f, self._inline)
465 d = self._io.parseindex(f, self._inline)
466 self.index, self.nodemap, self._chunkcache = d
466 self.index, self.nodemap, self._chunkcache = d
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 def _loadindex(self, start, end):
471 def _loadindex(self, start, end):
472 """load a block of indexes all at once from the lazy parser"""
472 """load a block of indexes all at once from the lazy parser"""
473 if isinstance(self.index, lazyindex):
473 if isinstance(self.index, lazyindex):
474 self.index.p.loadindex(start, end)
474 self.index.p.loadindex(start, end)
475
475
476 def _loadindexmap(self):
476 def _loadindexmap(self):
477 """loads both the map and the index from the lazy parser"""
477 """loads both the map and the index from the lazy parser"""
478 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
479 p = self.index.p
479 p = self.index.p
480 p.loadindex()
480 p.loadindex()
481 self.nodemap = p.map
481 self.nodemap = p.map
482
482
483 def _loadmap(self):
483 def _loadmap(self):
484 """loads the map from the lazy parser"""
484 """loads the map from the lazy parser"""
485 if isinstance(self.nodemap, lazymap):
485 if isinstance(self.nodemap, lazymap):
486 self.nodemap.p.loadmap()
486 self.nodemap.p.loadmap()
487 self.nodemap = self.nodemap.p.map
487 self.nodemap = self.nodemap.p.map
488
488
489 def tip(self):
489 def tip(self):
490 return self.node(len(self.index) - 2)
490 return self.node(len(self.index) - 2)
491 def count(self):
491 def count(self):
492 return len(self.index) - 1
492 return len(self.index) - 1
493
493
494 def rev(self, node):
494 def rev(self, node):
495 try:
495 try:
496 return self.nodemap[node]
496 return self.nodemap[node]
497 except KeyError:
497 except KeyError:
498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
499 def node(self, rev):
499 def node(self, rev):
500 return self.index[rev][7]
500 return self.index[rev][7]
501 def linkrev(self, node):
501 def linkrev(self, node):
502 return self.index[self.rev(node)][4]
502 return self.index[self.rev(node)][4]
503 def parents(self, node):
503 def parents(self, node):
504 d = self.index[self.rev(node)][5:7]
504 d = self.index[self.rev(node)][5:7]
505 return (self.node(d[0]), self.node(d[1]))
505 return (self.node(d[0]), self.node(d[1]))
506 def parentrevs(self, rev):
506 def parentrevs(self, rev):
507 return self.index[rev][5:7]
507 return self.index[rev][5:7]
508 def start(self, rev):
508 def start(self, rev):
509 return getoffset(self.index[rev][0])
509 return getoffset(self.index[rev][0])
510 def end(self, rev):
510 def end(self, rev):
511 return self.start(rev) + self.length(rev)
511 return self.start(rev) + self.length(rev)
512 def length(self, rev):
512 def length(self, rev):
513 return self.index[rev][1]
513 return self.index[rev][1]
514 def base(self, rev):
514 def base(self, rev):
515 return self.index[rev][3]
515 return self.index[rev][3]
516
516
517 def size(self, rev):
517 def size(self, rev):
518 """return the length of the uncompressed text for a given revision"""
518 """return the length of the uncompressed text for a given revision"""
519 l = self.index[rev][2]
519 l = self.index[rev][2]
520 if l >= 0:
520 if l >= 0:
521 return l
521 return l
522
522
523 t = self.revision(self.node(rev))
523 t = self.revision(self.node(rev))
524 return len(t)
524 return len(t)
525
525
526 # alternate implementation, The advantage to this code is it
526 # alternate implementation, The advantage to this code is it
527 # will be faster for a single revision. But, the results are not
527 # will be faster for a single revision. But, the results are not
528 # cached, so finding the size of every revision will be slower.
528 # cached, so finding the size of every revision will be slower.
529 """
529 """
530 if self.cache and self.cache[1] == rev:
530 if self.cache and self.cache[1] == rev:
531 return len(self.cache[2])
531 return len(self.cache[2])
532
532
533 base = self.base(rev)
533 base = self.base(rev)
534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
535 base = self.cache[1]
535 base = self.cache[1]
536 text = self.cache[2]
536 text = self.cache[2]
537 else:
537 else:
538 text = self.revision(self.node(base))
538 text = self.revision(self.node(base))
539
539
540 l = len(text)
540 l = len(text)
541 for x in xrange(base + 1, rev + 1):
541 for x in xrange(base + 1, rev + 1):
542 l = mdiff.patchedsize(l, self.chunk(x))
542 l = mdiff.patchedsize(l, self.chunk(x))
543 return l
543 return l
544 """
544 """
545
545
546 def reachable(self, node, stop=None):
546 def reachable(self, node, stop=None):
547 """return a hash of all nodes ancestral to a given node, including
547 """return a hash of all nodes ancestral to a given node, including
548 the node itself, stopping when stop is matched"""
548 the node itself, stopping when stop is matched"""
549 reachable = {}
549 reachable = {}
550 visit = [node]
550 visit = [node]
551 reachable[node] = 1
551 reachable[node] = 1
552 if stop:
552 if stop:
553 stopn = self.rev(stop)
553 stopn = self.rev(stop)
554 else:
554 else:
555 stopn = 0
555 stopn = 0
556 while visit:
556 while visit:
557 n = visit.pop(0)
557 n = visit.pop(0)
558 if n == stop:
558 if n == stop:
559 continue
559 continue
560 if n == nullid:
560 if n == nullid:
561 continue
561 continue
562 for p in self.parents(n):
562 for p in self.parents(n):
563 if self.rev(p) < stopn:
563 if self.rev(p) < stopn:
564 continue
564 continue
565 if p not in reachable:
565 if p not in reachable:
566 reachable[p] = 1
566 reachable[p] = 1
567 visit.append(p)
567 visit.append(p)
568 return reachable
568 return reachable
569
569
570 def nodesbetween(self, roots=None, heads=None):
570 def nodesbetween(self, roots=None, heads=None):
571 """Return a tuple containing three elements. Elements 1 and 2 contain
571 """Return a tuple containing three elements. Elements 1 and 2 contain
572 a final list bases and heads after all the unreachable ones have been
572 a final list bases and heads after all the unreachable ones have been
573 pruned. Element 0 contains a topologically sorted list of all
573 pruned. Element 0 contains a topologically sorted list of all
574
574
575 nodes that satisfy these constraints:
575 nodes that satisfy these constraints:
576 1. All nodes must be descended from a node in roots (the nodes on
576 1. All nodes must be descended from a node in roots (the nodes on
577 roots are considered descended from themselves).
577 roots are considered descended from themselves).
578 2. All nodes must also be ancestors of a node in heads (the nodes in
578 2. All nodes must also be ancestors of a node in heads (the nodes in
579 heads are considered to be their own ancestors).
579 heads are considered to be their own ancestors).
580
580
581 If roots is unspecified, nullid is assumed as the only root.
581 If roots is unspecified, nullid is assumed as the only root.
582 If heads is unspecified, it is taken to be the output of the
582 If heads is unspecified, it is taken to be the output of the
583 heads method (i.e. a list of all nodes in the repository that
583 heads method (i.e. a list of all nodes in the repository that
584 have no children)."""
584 have no children)."""
585 nonodes = ([], [], [])
585 nonodes = ([], [], [])
586 if roots is not None:
586 if roots is not None:
587 roots = list(roots)
587 roots = list(roots)
588 if not roots:
588 if not roots:
589 return nonodes
589 return nonodes
590 lowestrev = min([self.rev(n) for n in roots])
590 lowestrev = min([self.rev(n) for n in roots])
591 else:
591 else:
592 roots = [nullid] # Everybody's a descendent of nullid
592 roots = [nullid] # Everybody's a descendent of nullid
593 lowestrev = nullrev
593 lowestrev = nullrev
594 if (lowestrev == nullrev) and (heads is None):
594 if (lowestrev == nullrev) and (heads is None):
595 # We want _all_ the nodes!
595 # We want _all_ the nodes!
596 return ([self.node(r) for r in xrange(0, self.count())],
596 return ([self.node(r) for r in xrange(0, self.count())],
597 [nullid], list(self.heads()))
597 [nullid], list(self.heads()))
598 if heads is None:
598 if heads is None:
599 # All nodes are ancestors, so the latest ancestor is the last
599 # All nodes are ancestors, so the latest ancestor is the last
600 # node.
600 # node.
601 highestrev = self.count() - 1
601 highestrev = self.count() - 1
602 # Set ancestors to None to signal that every node is an ancestor.
602 # Set ancestors to None to signal that every node is an ancestor.
603 ancestors = None
603 ancestors = None
604 # Set heads to an empty dictionary for later discovery of heads
604 # Set heads to an empty dictionary for later discovery of heads
605 heads = {}
605 heads = {}
606 else:
606 else:
607 heads = list(heads)
607 heads = list(heads)
608 if not heads:
608 if not heads:
609 return nonodes
609 return nonodes
610 ancestors = {}
610 ancestors = {}
611 # Turn heads into a dictionary so we can remove 'fake' heads.
611 # Turn heads into a dictionary so we can remove 'fake' heads.
612 # Also, later we will be using it to filter out the heads we can't
612 # Also, later we will be using it to filter out the heads we can't
613 # find from roots.
613 # find from roots.
614 heads = dict.fromkeys(heads, 0)
614 heads = dict.fromkeys(heads, 0)
615 # Start at the top and keep marking parents until we're done.
615 # Start at the top and keep marking parents until we're done.
616 nodestotag = heads.keys()
616 nodestotag = heads.keys()
617 # Remember where the top was so we can use it as a limit later.
617 # Remember where the top was so we can use it as a limit later.
618 highestrev = max([self.rev(n) for n in nodestotag])
618 highestrev = max([self.rev(n) for n in nodestotag])
619 while nodestotag:
619 while nodestotag:
620 # grab a node to tag
620 # grab a node to tag
621 n = nodestotag.pop()
621 n = nodestotag.pop()
622 # Never tag nullid
622 # Never tag nullid
623 if n == nullid:
623 if n == nullid:
624 continue
624 continue
625 # A node's revision number represents its place in a
625 # A node's revision number represents its place in a
626 # topologically sorted list of nodes.
626 # topologically sorted list of nodes.
627 r = self.rev(n)
627 r = self.rev(n)
628 if r >= lowestrev:
628 if r >= lowestrev:
629 if n not in ancestors:
629 if n not in ancestors:
630 # If we are possibly a descendent of one of the roots
630 # If we are possibly a descendent of one of the roots
631 # and we haven't already been marked as an ancestor
631 # and we haven't already been marked as an ancestor
632 ancestors[n] = 1 # Mark as ancestor
632 ancestors[n] = 1 # Mark as ancestor
633 # Add non-nullid parents to list of nodes to tag.
633 # Add non-nullid parents to list of nodes to tag.
634 nodestotag.extend([p for p in self.parents(n) if
634 nodestotag.extend([p for p in self.parents(n) if
635 p != nullid])
635 p != nullid])
636 elif n in heads: # We've seen it before, is it a fake head?
636 elif n in heads: # We've seen it before, is it a fake head?
637 # So it is, real heads should not be the ancestors of
637 # So it is, real heads should not be the ancestors of
638 # any other heads.
638 # any other heads.
639 heads.pop(n)
639 heads.pop(n)
640 if not ancestors:
640 if not ancestors:
641 return nonodes
641 return nonodes
642 # Now that we have our set of ancestors, we want to remove any
642 # Now that we have our set of ancestors, we want to remove any
643 # roots that are not ancestors.
643 # roots that are not ancestors.
644
644
645 # If one of the roots was nullid, everything is included anyway.
645 # If one of the roots was nullid, everything is included anyway.
646 if lowestrev > nullrev:
646 if lowestrev > nullrev:
647 # But, since we weren't, let's recompute the lowest rev to not
647 # But, since we weren't, let's recompute the lowest rev to not
648 # include roots that aren't ancestors.
648 # include roots that aren't ancestors.
649
649
650 # Filter out roots that aren't ancestors of heads
650 # Filter out roots that aren't ancestors of heads
651 roots = [n for n in roots if n in ancestors]
651 roots = [n for n in roots if n in ancestors]
652 # Recompute the lowest revision
652 # Recompute the lowest revision
653 if roots:
653 if roots:
654 lowestrev = min([self.rev(n) for n in roots])
654 lowestrev = min([self.rev(n) for n in roots])
655 else:
655 else:
656 # No more roots? Return empty list
656 # No more roots? Return empty list
657 return nonodes
657 return nonodes
658 else:
658 else:
659 # We are descending from nullid, and don't need to care about
659 # We are descending from nullid, and don't need to care about
660 # any other roots.
660 # any other roots.
661 lowestrev = nullrev
661 lowestrev = nullrev
662 roots = [nullid]
662 roots = [nullid]
663 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 # Transform our roots list into a 'set' (i.e. a dictionary where the
664 # values don't matter.
664 # values don't matter.
665 descendents = dict.fromkeys(roots, 1)
665 descendents = dict.fromkeys(roots, 1)
666 # Also, keep the original roots so we can filter out roots that aren't
666 # Also, keep the original roots so we can filter out roots that aren't
667 # 'real' roots (i.e. are descended from other roots).
667 # 'real' roots (i.e. are descended from other roots).
668 roots = descendents.copy()
668 roots = descendents.copy()
669 # Our topologically sorted list of output nodes.
669 # Our topologically sorted list of output nodes.
670 orderedout = []
670 orderedout = []
671 # Don't start at nullid since we don't want nullid in our output list,
671 # Don't start at nullid since we don't want nullid in our output list,
672 # and if nullid shows up in descedents, empty parents will look like
672 # and if nullid shows up in descedents, empty parents will look like
673 # they're descendents.
673 # they're descendents.
674 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 for r in xrange(max(lowestrev, 0), highestrev + 1):
675 n = self.node(r)
675 n = self.node(r)
676 isdescendent = False
676 isdescendent = False
677 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 if lowestrev == nullrev: # Everybody is a descendent of nullid
678 isdescendent = True
678 isdescendent = True
679 elif n in descendents:
679 elif n in descendents:
680 # n is already a descendent
680 # n is already a descendent
681 isdescendent = True
681 isdescendent = True
682 # This check only needs to be done here because all the roots
682 # This check only needs to be done here because all the roots
683 # will start being marked is descendents before the loop.
683 # will start being marked is descendents before the loop.
684 if n in roots:
684 if n in roots:
685 # If n was a root, check if it's a 'real' root.
685 # If n was a root, check if it's a 'real' root.
686 p = tuple(self.parents(n))
686 p = tuple(self.parents(n))
687 # If any of its parents are descendents, it's not a root.
687 # If any of its parents are descendents, it's not a root.
688 if (p[0] in descendents) or (p[1] in descendents):
688 if (p[0] in descendents) or (p[1] in descendents):
689 roots.pop(n)
689 roots.pop(n)
690 else:
690 else:
691 p = tuple(self.parents(n))
691 p = tuple(self.parents(n))
692 # A node is a descendent if either of its parents are
692 # A node is a descendent if either of its parents are
693 # descendents. (We seeded the dependents list with the roots
693 # descendents. (We seeded the dependents list with the roots
694 # up there, remember?)
694 # up there, remember?)
695 if (p[0] in descendents) or (p[1] in descendents):
695 if (p[0] in descendents) or (p[1] in descendents):
696 descendents[n] = 1
696 descendents[n] = 1
697 isdescendent = True
697 isdescendent = True
698 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 if isdescendent and ((ancestors is None) or (n in ancestors)):
699 # Only include nodes that are both descendents and ancestors.
699 # Only include nodes that are both descendents and ancestors.
700 orderedout.append(n)
700 orderedout.append(n)
701 if (ancestors is not None) and (n in heads):
701 if (ancestors is not None) and (n in heads):
702 # We're trying to figure out which heads are reachable
702 # We're trying to figure out which heads are reachable
703 # from roots.
703 # from roots.
704 # Mark this head as having been reached
704 # Mark this head as having been reached
705 heads[n] = 1
705 heads[n] = 1
706 elif ancestors is None:
706 elif ancestors is None:
707 # Otherwise, we're trying to discover the heads.
707 # Otherwise, we're trying to discover the heads.
708 # Assume this is a head because if it isn't, the next step
708 # Assume this is a head because if it isn't, the next step
709 # will eventually remove it.
709 # will eventually remove it.
710 heads[n] = 1
710 heads[n] = 1
711 # But, obviously its parents aren't.
711 # But, obviously its parents aren't.
712 for p in self.parents(n):
712 for p in self.parents(n):
713 heads.pop(p, None)
713 heads.pop(p, None)
714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
715 roots = roots.keys()
715 roots = roots.keys()
716 assert orderedout
716 assert orderedout
717 assert roots
717 assert roots
718 assert heads
718 assert heads
719 return (orderedout, roots, heads)
719 return (orderedout, roots, heads)
720
720
721 def heads(self, start=None, stop=None):
721 def heads(self, start=None, stop=None):
722 """return the list of all nodes that have no children
722 """return the list of all nodes that have no children
723
723
724 if start is specified, only heads that are descendants of
724 if start is specified, only heads that are descendants of
725 start will be returned
725 start will be returned
726 if stop is specified, it will consider all the revs from stop
726 if stop is specified, it will consider all the revs from stop
727 as if they had no children
727 as if they had no children
728 """
728 """
729 if start is None:
729 if start is None:
730 start = nullid
730 start = nullid
731 if stop is None:
731 if stop is None:
732 stop = []
732 stop = []
733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
734 startrev = self.rev(start)
734 startrev = self.rev(start)
735 reachable = {startrev: 1}
735 reachable = {startrev: 1}
736 heads = {startrev: 1}
736 heads = {startrev: 1}
737
737
738 parentrevs = self.parentrevs
738 parentrevs = self.parentrevs
739 for r in xrange(startrev + 1, self.count()):
739 for r in xrange(startrev + 1, self.count()):
740 for p in parentrevs(r):
740 for p in parentrevs(r):
741 if p in reachable:
741 if p in reachable:
742 if r not in stoprevs:
742 if r not in stoprevs:
743 reachable[r] = 1
743 reachable[r] = 1
744 heads[r] = 1
744 heads[r] = 1
745 if p in heads and p not in stoprevs:
745 if p in heads and p not in stoprevs:
746 del heads[p]
746 del heads[p]
747
747
748 return [self.node(r) for r in heads]
748 return [self.node(r) for r in heads]
749
749
750 def children(self, node):
750 def children(self, node):
751 """find the children of a given node"""
751 """find the children of a given node"""
752 c = []
752 c = []
753 p = self.rev(node)
753 p = self.rev(node)
754 for r in range(p + 1, self.count()):
754 for r in range(p + 1, self.count()):
755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
756 if prevs:
756 if prevs:
757 for pr in prevs:
757 for pr in prevs:
758 if pr == p:
758 if pr == p:
759 c.append(self.node(r))
759 c.append(self.node(r))
760 elif p == nullrev:
760 elif p == nullrev:
761 c.append(self.node(r))
761 c.append(self.node(r))
762 return c
762 return c
763
763
764 def _match(self, id):
764 def _match(self, id):
765 if isinstance(id, (long, int)):
765 if isinstance(id, (long, int)):
766 # rev
766 # rev
767 return self.node(id)
767 return self.node(id)
768 if len(id) == 20:
768 if len(id) == 20:
769 # possibly a binary node
769 # possibly a binary node
770 # odds of a binary node being all hex in ASCII are 1 in 10**25
770 # odds of a binary node being all hex in ASCII are 1 in 10**25
771 try:
771 try:
772 node = id
772 node = id
773 r = self.rev(node) # quick search the index
773 r = self.rev(node) # quick search the index
774 return node
774 return node
775 except LookupError:
775 except LookupError:
776 pass # may be partial hex id
776 pass # may be partial hex id
777 try:
777 try:
778 # str(rev)
778 # str(rev)
779 rev = int(id)
779 rev = int(id)
780 if str(rev) != id:
780 if str(rev) != id:
781 raise ValueError
781 raise ValueError
782 if rev < 0:
782 if rev < 0:
783 rev = self.count() + rev
783 rev = self.count() + rev
784 if rev < 0 or rev >= self.count():
784 if rev < 0 or rev >= self.count():
785 raise ValueError
785 raise ValueError
786 return self.node(rev)
786 return self.node(rev)
787 except (ValueError, OverflowError):
787 except (ValueError, OverflowError):
788 pass
788 pass
789 if len(id) == 40:
789 if len(id) == 40:
790 try:
790 try:
791 # a full hex nodeid?
791 # a full hex nodeid?
792 node = bin(id)
792 node = bin(id)
793 r = self.rev(node)
793 r = self.rev(node)
794 return node
794 return node
795 except TypeError:
795 except TypeError:
796 pass
796 pass
797
797
798 def _partialmatch(self, id):
798 def _partialmatch(self, id):
799 if len(id) < 40:
799 if len(id) < 40:
800 try:
800 try:
801 # hex(node)[:...]
801 # hex(node)[:...]
802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 node = None
803 node = None
804 for n in self.nodemap:
804 for n in self.nodemap:
805 if n.startswith(bin_id) and hex(n).startswith(id):
805 if n.startswith(bin_id) and hex(n).startswith(id):
806 if node is not None:
806 if node is not None:
807 raise LookupError(_("Ambiguous identifier"))
807 raise LookupError(_("Ambiguous identifier"))
808 node = n
808 node = n
809 if node is not None:
809 if node is not None:
810 return node
810 return node
811 except TypeError:
811 except TypeError:
812 pass
812 pass
813
813
814 def lookup(self, id):
814 def lookup(self, id):
815 """locate a node based on:
815 """locate a node based on:
816 - revision number or str(revision number)
816 - revision number or str(revision number)
817 - nodeid or subset of hex nodeid
817 - nodeid or subset of hex nodeid
818 """
818 """
819 n = self._match(id)
819 n = self._match(id)
820 if n is not None:
820 if n is not None:
821 return n
821 return n
822 n = self._partialmatch(id)
822 n = self._partialmatch(id)
823 if n:
823 if n:
824 return n
824 return n
825
825
826 raise LookupError(_("No match found"))
826 raise LookupError(_("No match found"))
827
827
828 def cmp(self, node, text):
828 def cmp(self, node, text):
829 """compare text with a given file revision"""
829 """compare text with a given file revision"""
830 p1, p2 = self.parents(node)
830 p1, p2 = self.parents(node)
831 return hash(text, p1, p2) != node
831 return hash(text, p1, p2) != node
832
832
833 def diff(self, a, b):
833 def diff(self, a, b):
834 """return a delta between two revisions"""
834 """return a delta between two revisions"""
835 return mdiff.textdiff(a, b)
835 return mdiff.textdiff(a, b)
836
836
837 def patches(self, t, pl):
837 def patches(self, t, pl):
838 """apply a list of patches to a string"""
838 """apply a list of patches to a string"""
839 return mdiff.patches(t, pl)
839 return mdiff.patches(t, pl)
840
840
841 def chunk(self, rev, df=None, cachelen=4096):
841 def chunk(self, rev, df=None):
842 start, length = self.start(rev), self.length(rev)
842 start, length = self.start(rev), self.length(rev)
843 inline = self._inline
843 if self._inline:
844 if inline:
845 start += (rev + 1) * self._io.size
844 start += (rev + 1) * self._io.size
846 end = start + length
845 end = start + length
847 def loadcache(df):
846 def loadcache(df):
848 cache_length = max(cachelen, length) # 4k
847 cache_length = max(65536, length)
849 if not df:
848 if not df:
850 if inline:
849 if self._inline:
851 df = self.opener(self.indexfile)
850 df = self.opener(self.indexfile)
852 else:
851 else:
853 df = self.opener(self.datafile)
852 df = self.opener(self.datafile)
854 df.seek(start)
853 df.seek(start)
855 self._chunkcache = (start, df.read(cache_length))
854 self._chunkcache = (start, df.read(cache_length))
856
855
857 if not self._chunkcache:
856 if not self._chunkcache:
858 loadcache(df)
857 loadcache(df)
859
858
860 cache_start = self._chunkcache[0]
859 cache_start = self._chunkcache[0]
861 cache_end = cache_start + len(self._chunkcache[1])
860 cache_end = cache_start + len(self._chunkcache[1])
862 if start >= cache_start and end <= cache_end:
861 if start >= cache_start and end <= cache_end:
863 # it is cached
862 # it is cached
864 offset = start - cache_start
863 offset = start - cache_start
865 else:
864 else:
866 loadcache(df)
865 loadcache(df)
867 offset = 0
866 offset = 0
868
867
869 return decompress(self._chunkcache[1][offset:offset + length])
868 # avoid copying large chunks
869 c = self._chunkcache[1]
870 if len(c) > length:
871 c = c[offset:offset + length]
872
873 return decompress(c)
870
874
871 def delta(self, node):
875 def delta(self, node):
872 """return or calculate a delta between a node and its predecessor"""
876 """return or calculate a delta between a node and its predecessor"""
873 r = self.rev(node)
877 r = self.rev(node)
874 return self.revdiff(r - 1, r)
878 return self.revdiff(r - 1, r)
875
879
876 def revdiff(self, rev1, rev2):
880 def revdiff(self, rev1, rev2):
877 """return or calculate a delta between two revisions"""
881 """return or calculate a delta between two revisions"""
878 b1 = self.base(rev1)
882 b1 = self.base(rev1)
879 b2 = self.base(rev2)
883 b2 = self.base(rev2)
880 if b1 == b2 and rev1 + 1 == rev2:
884 if b1 == b2 and rev1 + 1 == rev2:
881 return self.chunk(rev2)
885 return self.chunk(rev2)
882 else:
886 else:
883 return self.diff(self.revision(self.node(rev1)),
887 return self.diff(self.revision(self.node(rev1)),
884 self.revision(self.node(rev2)))
888 self.revision(self.node(rev2)))
885
889
886 def revision(self, node):
890 def revision(self, node):
887 """return an uncompressed revision of a given"""
891 """return an uncompressed revision of a given"""
888 if node == nullid:
892 if node == nullid:
889 return ""
893 return ""
890 if self._cache and self._cache[0] == node:
894 if self._cache and self._cache[0] == node:
891 return self._cache[2]
895 return self._cache[2]
892
896
893 # look up what we need to read
897 # look up what we need to read
894 text = None
898 text = None
895 rev = self.rev(node)
899 rev = self.rev(node)
896 base = self.base(rev)
900 base = self.base(rev)
897
901
898 if self._inline:
902 if self._inline:
899 # we probably have the whole chunk cached
903 # we probably have the whole chunk cached
900 df = None
904 df = None
901 else:
905 else:
902 df = self.opener(self.datafile)
906 df = self.opener(self.datafile)
903
907
904 # do we have useful data cached?
908 # do we have useful data cached?
905 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
909 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
906 base = self._cache[1]
910 base = self._cache[1]
907 text = self._cache[2]
911 text = self._cache[2]
908 self._loadindex(base, rev + 1)
912 self._loadindex(base, rev + 1)
909 else:
913 else:
910 self._loadindex(base, rev + 1)
914 self._loadindex(base, rev + 1)
911 text = self.chunk(base, df=df)
915 text = self.chunk(base, df=df)
912
916
913 bins = []
917 bins = []
914 for r in xrange(base + 1, rev + 1):
918 for r in xrange(base + 1, rev + 1):
915 bins.append(self.chunk(r, df=df))
919 bins.append(self.chunk(r, df=df))
916
920
917 text = self.patches(text, bins)
921 text = self.patches(text, bins)
918
922
919 p1, p2 = self.parents(node)
923 p1, p2 = self.parents(node)
920 if node != hash(text, p1, p2):
924 if node != hash(text, p1, p2):
921 raise RevlogError(_("integrity check failed on %s:%d")
925 raise RevlogError(_("integrity check failed on %s:%d")
922 % (self.datafile, rev))
926 % (self.datafile, rev))
923
927
924 self._cache = (node, rev, text)
928 self._cache = (node, rev, text)
925 return text
929 return text
926
930
927 def checkinlinesize(self, tr, fp=None):
931 def checkinlinesize(self, tr, fp=None):
928 if not self._inline:
932 if not self._inline:
929 return
933 return
930 if not fp:
934 if not fp:
931 fp = self.opener(self.indexfile, 'r')
935 fp = self.opener(self.indexfile, 'r')
932 fp.seek(0, 2)
936 fp.seek(0, 2)
933 size = fp.tell()
937 size = fp.tell()
934 if size < 131072:
938 if size < 131072:
935 return
939 return
936 trinfo = tr.find(self.indexfile)
940 trinfo = tr.find(self.indexfile)
937 if trinfo == None:
941 if trinfo == None:
938 raise RevlogError(_("%s not found in the transaction")
942 raise RevlogError(_("%s not found in the transaction")
939 % self.indexfile)
943 % self.indexfile)
940
944
941 trindex = trinfo[2]
945 trindex = trinfo[2]
942 dataoff = self.start(trindex)
946 dataoff = self.start(trindex)
943
947
944 tr.add(self.datafile, dataoff)
948 tr.add(self.datafile, dataoff)
945 df = self.opener(self.datafile, 'w')
949 df = self.opener(self.datafile, 'w')
946 calc = self._io.size
950 calc = self._io.size
947 for r in xrange(self.count()):
951 for r in xrange(self.count()):
948 start = self.start(r) + (r + 1) * calc
952 start = self.start(r) + (r + 1) * calc
949 length = self.length(r)
953 length = self.length(r)
950 fp.seek(start)
954 fp.seek(start)
951 d = fp.read(length)
955 d = fp.read(length)
952 df.write(d)
956 df.write(d)
953 fp.close()
957 fp.close()
954 df.close()
958 df.close()
955 fp = self.opener(self.indexfile, 'w', atomictemp=True)
959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
956 self.version &= ~(REVLOGNGINLINEDATA)
960 self.version &= ~(REVLOGNGINLINEDATA)
957 self._inline = False
961 self._inline = False
958 for i in xrange(self.count()):
962 for i in xrange(self.count()):
959 e = self._io.packentry(self.index[i], self.node, self.version)
963 e = self._io.packentry(self.index[i], self.node, self.version)
960 fp.write(e)
964 fp.write(e)
961
965
962 # if we don't call rename, the temp file will never replace the
966 # if we don't call rename, the temp file will never replace the
963 # real index
967 # real index
964 fp.rename()
968 fp.rename()
965
969
966 tr.replace(self.indexfile, trindex * calc)
970 tr.replace(self.indexfile, trindex * calc)
967 self._chunkcache = None
971 self._chunkcache = None
968
972
969 def addrevision(self, text, transaction, link, p1, p2, d=None):
973 def addrevision(self, text, transaction, link, p1, p2, d=None):
970 """add a revision to the log
974 """add a revision to the log
971
975
972 text - the revision data to add
976 text - the revision data to add
973 transaction - the transaction object used for rollback
977 transaction - the transaction object used for rollback
974 link - the linkrev data to add
978 link - the linkrev data to add
975 p1, p2 - the parent nodeids of the revision
979 p1, p2 - the parent nodeids of the revision
976 d - an optional precomputed delta
980 d - an optional precomputed delta
977 """
981 """
978 dfh = None
982 dfh = None
979 if not self._inline:
983 if not self._inline:
980 dfh = self.opener(self.datafile, "a")
984 dfh = self.opener(self.datafile, "a")
981 ifh = self.opener(self.indexfile, "a+")
985 ifh = self.opener(self.indexfile, "a+")
982 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
986 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
983
987
984 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
988 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
985 node = hash(text, p1, p2)
989 node = hash(text, p1, p2)
986 if node in self.nodemap:
990 if node in self.nodemap:
987 return node
991 return node
988
992
989 curr = self.count()
993 curr = self.count()
990 prev = curr - 1
994 prev = curr - 1
991 base = self.base(prev)
995 base = self.base(prev)
992 offset = self.end(prev)
996 offset = self.end(prev)
993
997
994 if curr:
998 if curr:
995 if not d:
999 if not d:
996 ptext = self.revision(self.node(prev))
1000 ptext = self.revision(self.node(prev))
997 d = self.diff(ptext, text)
1001 d = self.diff(ptext, text)
998 data = compress(d)
1002 data = compress(d)
999 l = len(data[1]) + len(data[0])
1003 l = len(data[1]) + len(data[0])
1000 dist = l + offset - self.start(base)
1004 dist = l + offset - self.start(base)
1001
1005
1002 # full versions are inserted when the needed deltas
1006 # full versions are inserted when the needed deltas
1003 # become comparable to the uncompressed text
1007 # become comparable to the uncompressed text
1004 if not curr or dist > len(text) * 2:
1008 if not curr or dist > len(text) * 2:
1005 data = compress(text)
1009 data = compress(text)
1006 l = len(data[1]) + len(data[0])
1010 l = len(data[1]) + len(data[0])
1007 base = curr
1011 base = curr
1008
1012
1009 e = (offset_type(offset, 0), l, len(text),
1013 e = (offset_type(offset, 0), l, len(text),
1010 base, link, self.rev(p1), self.rev(p2), node)
1014 base, link, self.rev(p1), self.rev(p2), node)
1011 self.index.insert(-1, e)
1015 self.index.insert(-1, e)
1012 self.nodemap[node] = curr
1016 self.nodemap[node] = curr
1013
1017
1014 entry = self._io.packentry(e, self.node, self.version)
1018 entry = self._io.packentry(e, self.node, self.version)
1015 if not self._inline:
1019 if not self._inline:
1016 transaction.add(self.datafile, offset)
1020 transaction.add(self.datafile, offset)
1017 transaction.add(self.indexfile, curr * len(entry))
1021 transaction.add(self.indexfile, curr * len(entry))
1018 if data[0]:
1022 if data[0]:
1019 dfh.write(data[0])
1023 dfh.write(data[0])
1020 dfh.write(data[1])
1024 dfh.write(data[1])
1021 dfh.flush()
1025 dfh.flush()
1022 ifh.write(entry)
1026 ifh.write(entry)
1023 else:
1027 else:
1024 ifh.seek(0, 2)
1028 ifh.seek(0, 2)
1025 transaction.add(self.indexfile, ifh.tell(), prev)
1029 transaction.add(self.indexfile, ifh.tell(), prev)
1026 ifh.write(entry)
1030 ifh.write(entry)
1027 ifh.write(data[0])
1031 ifh.write(data[0])
1028 ifh.write(data[1])
1032 ifh.write(data[1])
1029 self.checkinlinesize(transaction, ifh)
1033 self.checkinlinesize(transaction, ifh)
1030
1034
1031 self._cache = (node, curr, text)
1035 self._cache = (node, curr, text)
1032 return node
1036 return node
1033
1037
1034 def ancestor(self, a, b):
1038 def ancestor(self, a, b):
1035 """calculate the least common ancestor of nodes a and b"""
1039 """calculate the least common ancestor of nodes a and b"""
1036
1040
1037 def parents(rev):
1041 def parents(rev):
1038 return [p for p in self.parentrevs(rev) if p != nullrev]
1042 return [p for p in self.parentrevs(rev) if p != nullrev]
1039
1043
1040 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1044 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1041 if c is None:
1045 if c is None:
1042 return nullid
1046 return nullid
1043
1047
1044 return self.node(c)
1048 return self.node(c)
1045
1049
1046 def group(self, nodelist, lookup, infocollect=None):
1050 def group(self, nodelist, lookup, infocollect=None):
1047 """calculate a delta group
1051 """calculate a delta group
1048
1052
1049 Given a list of changeset revs, return a set of deltas and
1053 Given a list of changeset revs, return a set of deltas and
1050 metadata corresponding to nodes. the first delta is
1054 metadata corresponding to nodes. the first delta is
1051 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1055 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1052 have this parent as it has all history before these
1056 have this parent as it has all history before these
1053 changesets. parent is parent[0]
1057 changesets. parent is parent[0]
1054 """
1058 """
1055 revs = [self.rev(n) for n in nodelist]
1059 revs = [self.rev(n) for n in nodelist]
1056
1060
1057 # if we don't have any revisions touched by these changesets, bail
1061 # if we don't have any revisions touched by these changesets, bail
1058 if not revs:
1062 if not revs:
1059 yield changegroup.closechunk()
1063 yield changegroup.closechunk()
1060 return
1064 return
1061
1065
1062 # add the parent of the first rev
1066 # add the parent of the first rev
1063 p = self.parents(self.node(revs[0]))[0]
1067 p = self.parents(self.node(revs[0]))[0]
1064 revs.insert(0, self.rev(p))
1068 revs.insert(0, self.rev(p))
1065
1069
1066 # build deltas
1070 # build deltas
1067 for d in xrange(0, len(revs) - 1):
1071 for d in xrange(0, len(revs) - 1):
1068 a, b = revs[d], revs[d + 1]
1072 a, b = revs[d], revs[d + 1]
1069 nb = self.node(b)
1073 nb = self.node(b)
1070
1074
1071 if infocollect is not None:
1075 if infocollect is not None:
1072 infocollect(nb)
1076 infocollect(nb)
1073
1077
1074 d = self.revdiff(a, b)
1078 d = self.revdiff(a, b)
1075 p = self.parents(nb)
1079 p = self.parents(nb)
1076 meta = nb + p[0] + p[1] + lookup(nb)
1080 meta = nb + p[0] + p[1] + lookup(nb)
1077 yield changegroup.genchunk("%s%s" % (meta, d))
1081 yield changegroup.genchunk("%s%s" % (meta, d))
1078
1082
1079 yield changegroup.closechunk()
1083 yield changegroup.closechunk()
1080
1084
1081 def addgroup(self, revs, linkmapper, transaction, unique=0):
1085 def addgroup(self, revs, linkmapper, transaction, unique=0):
1082 """
1086 """
1083 add a delta group
1087 add a delta group
1084
1088
1085 given a set of deltas, add them to the revision log. the
1089 given a set of deltas, add them to the revision log. the
1086 first delta is against its parent, which should be in our
1090 first delta is against its parent, which should be in our
1087 log, the rest are against the previous delta.
1091 log, the rest are against the previous delta.
1088 """
1092 """
1089
1093
1090 #track the base of the current delta log
1094 #track the base of the current delta log
1091 r = self.count()
1095 r = self.count()
1092 t = r - 1
1096 t = r - 1
1093 node = None
1097 node = None
1094
1098
1095 base = prev = nullrev
1099 base = prev = nullrev
1096 start = end = textlen = 0
1100 start = end = textlen = 0
1097 if r:
1101 if r:
1098 end = self.end(t)
1102 end = self.end(t)
1099
1103
1100 ifh = self.opener(self.indexfile, "a+")
1104 ifh = self.opener(self.indexfile, "a+")
1101 ifh.seek(0, 2)
1105 ifh.seek(0, 2)
1102 transaction.add(self.indexfile, ifh.tell(), self.count())
1106 transaction.add(self.indexfile, ifh.tell(), self.count())
1103 if self._inline:
1107 if self._inline:
1104 dfh = None
1108 dfh = None
1105 else:
1109 else:
1106 transaction.add(self.datafile, end)
1110 transaction.add(self.datafile, end)
1107 dfh = self.opener(self.datafile, "a")
1111 dfh = self.opener(self.datafile, "a")
1108
1112
1109 # loop through our set of deltas
1113 # loop through our set of deltas
1110 chain = None
1114 chain = None
1111 for chunk in revs:
1115 for chunk in revs:
1112 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1116 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1113 link = linkmapper(cs)
1117 link = linkmapper(cs)
1114 if node in self.nodemap:
1118 if node in self.nodemap:
1115 # this can happen if two branches make the same change
1119 # this can happen if two branches make the same change
1116 # if unique:
1120 # if unique:
1117 # raise RevlogError(_("already have %s") % hex(node[:4]))
1121 # raise RevlogError(_("already have %s") % hex(node[:4]))
1118 chain = node
1122 chain = node
1119 continue
1123 continue
1120 delta = chunk[80:]
1124 delta = chunk[80:]
1121
1125
1122 for p in (p1, p2):
1126 for p in (p1, p2):
1123 if not p in self.nodemap:
1127 if not p in self.nodemap:
1124 raise LookupError(_("unknown parent %s") % short(p))
1128 raise LookupError(_("unknown parent %s") % short(p))
1125
1129
1126 if not chain:
1130 if not chain:
1127 # retrieve the parent revision of the delta chain
1131 # retrieve the parent revision of the delta chain
1128 chain = p1
1132 chain = p1
1129 if not chain in self.nodemap:
1133 if not chain in self.nodemap:
1130 raise LookupError(_("unknown base %s") % short(chain[:4]))
1134 raise LookupError(_("unknown base %s") % short(chain[:4]))
1131
1135
1132 # full versions are inserted when the needed deltas become
1136 # full versions are inserted when the needed deltas become
1133 # comparable to the uncompressed text or when the previous
1137 # comparable to the uncompressed text or when the previous
1134 # version is not the one we have a delta against. We use
1138 # version is not the one we have a delta against. We use
1135 # the size of the previous full rev as a proxy for the
1139 # the size of the previous full rev as a proxy for the
1136 # current size.
1140 # current size.
1137
1141
1138 if chain == prev:
1142 if chain == prev:
1139 tempd = compress(delta)
1143 tempd = compress(delta)
1140 cdelta = tempd[0] + tempd[1]
1144 cdelta = tempd[0] + tempd[1]
1141 textlen = mdiff.patchedsize(textlen, delta)
1145 textlen = mdiff.patchedsize(textlen, delta)
1142
1146
1143 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1147 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1144 # flush our writes here so we can read it in revision
1148 # flush our writes here so we can read it in revision
1145 if dfh:
1149 if dfh:
1146 dfh.flush()
1150 dfh.flush()
1147 ifh.flush()
1151 ifh.flush()
1148 text = self.revision(chain)
1152 text = self.revision(chain)
1149 text = self.patches(text, [delta])
1153 text = self.patches(text, [delta])
1150 chk = self._addrevision(text, transaction, link, p1, p2, None,
1154 chk = self._addrevision(text, transaction, link, p1, p2, None,
1151 ifh, dfh)
1155 ifh, dfh)
1152 if not dfh and not self._inline:
1156 if not dfh and not self._inline:
1153 # addrevision switched from inline to conventional
1157 # addrevision switched from inline to conventional
1154 # reopen the index
1158 # reopen the index
1155 dfh = self.opener(self.datafile, "a")
1159 dfh = self.opener(self.datafile, "a")
1156 ifh = self.opener(self.indexfile, "a")
1160 ifh = self.opener(self.indexfile, "a")
1157 if chk != node:
1161 if chk != node:
1158 raise RevlogError(_("consistency error adding group"))
1162 raise RevlogError(_("consistency error adding group"))
1159 textlen = len(text)
1163 textlen = len(text)
1160 else:
1164 else:
1161 e = (offset_type(end, 0), len(cdelta), textlen, base,
1165 e = (offset_type(end, 0), len(cdelta), textlen, base,
1162 link, self.rev(p1), self.rev(p2), node)
1166 link, self.rev(p1), self.rev(p2), node)
1163 self.index.insert(-1, e)
1167 self.index.insert(-1, e)
1164 self.nodemap[node] = r
1168 self.nodemap[node] = r
1165 entry = self._io.packentry(e, self.node, self.version)
1169 entry = self._io.packentry(e, self.node, self.version)
1166 if self._inline:
1170 if self._inline:
1167 ifh.write(entry)
1171 ifh.write(entry)
1168 ifh.write(cdelta)
1172 ifh.write(cdelta)
1169 self.checkinlinesize(transaction, ifh)
1173 self.checkinlinesize(transaction, ifh)
1170 if not self._inline:
1174 if not self._inline:
1171 dfh = self.opener(self.datafile, "a")
1175 dfh = self.opener(self.datafile, "a")
1172 ifh = self.opener(self.indexfile, "a")
1176 ifh = self.opener(self.indexfile, "a")
1173 else:
1177 else:
1174 dfh.write(cdelta)
1178 dfh.write(cdelta)
1175 ifh.write(entry)
1179 ifh.write(entry)
1176
1180
1177 t, r, chain, prev = r, r + 1, node, node
1181 t, r, chain, prev = r, r + 1, node, node
1178 base = self.base(t)
1182 base = self.base(t)
1179 start = self.start(base)
1183 start = self.start(base)
1180 end = self.end(t)
1184 end = self.end(t)
1181
1185
1182 return node
1186 return node
1183
1187
1184 def strip(self, rev, minlink):
1188 def strip(self, rev, minlink):
1185 if self.count() == 0 or rev >= self.count():
1189 if self.count() == 0 or rev >= self.count():
1186 return
1190 return
1187
1191
1188 if isinstance(self.index, lazyindex):
1192 if isinstance(self.index, lazyindex):
1189 self._loadindexmap()
1193 self._loadindexmap()
1190
1194
1191 # When stripping away a revision, we need to make sure it
1195 # When stripping away a revision, we need to make sure it
1192 # does not actually belong to an older changeset.
1196 # does not actually belong to an older changeset.
1193 # The minlink parameter defines the oldest revision
1197 # The minlink parameter defines the oldest revision
1194 # we're allowed to strip away.
1198 # we're allowed to strip away.
1195 while minlink > self.index[rev][4]:
1199 while minlink > self.index[rev][4]:
1196 rev += 1
1200 rev += 1
1197 if rev >= self.count():
1201 if rev >= self.count():
1198 return
1202 return
1199
1203
1200 # first truncate the files on disk
1204 # first truncate the files on disk
1201 end = self.start(rev)
1205 end = self.start(rev)
1202 if not self._inline:
1206 if not self._inline:
1203 df = self.opener(self.datafile, "a")
1207 df = self.opener(self.datafile, "a")
1204 df.truncate(end)
1208 df.truncate(end)
1205 end = rev * self._io.size
1209 end = rev * self._io.size
1206 else:
1210 else:
1207 end += rev * self._io.size
1211 end += rev * self._io.size
1208
1212
1209 indexf = self.opener(self.indexfile, "a")
1213 indexf = self.opener(self.indexfile, "a")
1210 indexf.truncate(end)
1214 indexf.truncate(end)
1211
1215
1212 # then reset internal state in memory to forget those revisions
1216 # then reset internal state in memory to forget those revisions
1213 self._cache = None
1217 self._cache = None
1214 self._chunkcache = None
1218 self._chunkcache = None
1215 for x in xrange(rev, self.count()):
1219 for x in xrange(rev, self.count()):
1216 del self.nodemap[self.node(x)]
1220 del self.nodemap[self.node(x)]
1217
1221
1218 del self.index[rev:-1]
1222 del self.index[rev:-1]
1219
1223
1220 def checksize(self):
1224 def checksize(self):
1221 expected = 0
1225 expected = 0
1222 if self.count():
1226 if self.count():
1223 expected = self.end(self.count() - 1)
1227 expected = self.end(self.count() - 1)
1224
1228
1225 try:
1229 try:
1226 f = self.opener(self.datafile)
1230 f = self.opener(self.datafile)
1227 f.seek(0, 2)
1231 f.seek(0, 2)
1228 actual = f.tell()
1232 actual = f.tell()
1229 dd = actual - expected
1233 dd = actual - expected
1230 except IOError, inst:
1234 except IOError, inst:
1231 if inst.errno != errno.ENOENT:
1235 if inst.errno != errno.ENOENT:
1232 raise
1236 raise
1233 dd = 0
1237 dd = 0
1234
1238
1235 try:
1239 try:
1236 f = self.opener(self.indexfile)
1240 f = self.opener(self.indexfile)
1237 f.seek(0, 2)
1241 f.seek(0, 2)
1238 actual = f.tell()
1242 actual = f.tell()
1239 s = self._io.size
1243 s = self._io.size
1240 i = actual / s
1244 i = actual / s
1241 di = actual - (i * s)
1245 di = actual - (i * s)
1242 if self._inline:
1246 if self._inline:
1243 databytes = 0
1247 databytes = 0
1244 for r in xrange(self.count()):
1248 for r in xrange(self.count()):
1245 databytes += self.length(r)
1249 databytes += self.length(r)
1246 dd = 0
1250 dd = 0
1247 di = actual - self.count() * s - databytes
1251 di = actual - self.count() * s - databytes
1248 except IOError, inst:
1252 except IOError, inst:
1249 if inst.errno != errno.ENOENT:
1253 if inst.errno != errno.ENOENT:
1250 raise
1254 raise
1251 di = 0
1255 di = 0
1252
1256
1253 return (dd, di)
1257 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now