##// END OF EJS Templates
revlog: mark cache private
Matt Mackall -
r4984:b4066fcb default
parent child Browse files
Show More
@@ -1,251 +1,251 b''
1 """
1 """
2 bundlerepo.py - repository class for viewing uncompressed bundles
2 bundlerepo.py - repository class for viewing uncompressed bundles
3
3
4 This provides a read-only repository interface to bundles as if
4 This provides a read-only repository interface to bundles as if
5 they were part of the actual repository.
5 they were part of the actual repository.
6
6
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import changegroup, util, os, struct, bz2, tempfile
15 import changegroup, util, os, struct, bz2, tempfile
16
16
17 import localrepo, changelog, manifest, filelog, revlog
17 import localrepo, changelog, manifest, filelog, revlog
18
18
19 class bundlerevlog(revlog.revlog):
19 class bundlerevlog(revlog.revlog):
20 def __init__(self, opener, indexfile, bundlefile,
20 def __init__(self, opener, indexfile, bundlefile,
21 linkmapper=None):
21 linkmapper=None):
22 # How it works:
22 # How it works:
23 # to retrieve a revision, we need to know the offset of
23 # to retrieve a revision, we need to know the offset of
24 # the revision in the bundlefile (an opened file).
24 # the revision in the bundlefile (an opened file).
25 #
25 #
26 # We store this offset in the index (start), to differentiate a
26 # We store this offset in the index (start), to differentiate a
27 # rev in the bundle and from a rev in the revlog, we check
27 # rev in the bundle and from a rev in the revlog, we check
28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
29 # (it is bigger since we store the node to which the delta is)
29 # (it is bigger since we store the node to which the delta is)
30 #
30 #
31 revlog.revlog.__init__(self, opener, indexfile)
31 revlog.revlog.__init__(self, opener, indexfile)
32 self.bundlefile = bundlefile
32 self.bundlefile = bundlefile
33 self.basemap = {}
33 self.basemap = {}
34 def chunkpositer():
34 def chunkpositer():
35 for chunk in changegroup.chunkiter(bundlefile):
35 for chunk in changegroup.chunkiter(bundlefile):
36 pos = bundlefile.tell()
36 pos = bundlefile.tell()
37 yield chunk, pos - len(chunk)
37 yield chunk, pos - len(chunk)
38 n = self.count()
38 n = self.count()
39 prev = None
39 prev = None
40 for chunk, start in chunkpositer():
40 for chunk, start in chunkpositer():
41 size = len(chunk)
41 size = len(chunk)
42 if size < 80:
42 if size < 80:
43 raise util.Abort("invalid changegroup")
43 raise util.Abort("invalid changegroup")
44 start += 80
44 start += 80
45 size -= 80
45 size -= 80
46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
47 if node in self.nodemap:
47 if node in self.nodemap:
48 prev = node
48 prev = node
49 continue
49 continue
50 for p in (p1, p2):
50 for p in (p1, p2):
51 if not p in self.nodemap:
51 if not p in self.nodemap:
52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
53 if linkmapper is None:
53 if linkmapper is None:
54 link = n
54 link = n
55 else:
55 else:
56 link = linkmapper(cs)
56 link = linkmapper(cs)
57
57
58 if not prev:
58 if not prev:
59 prev = p1
59 prev = p1
60 # start, size, base is not used, link, p1, p2, delta ref
60 # start, size, base is not used, link, p1, p2, delta ref
61 e = (revlog.offset_type(start, 0), size, -1, None, link,
61 e = (revlog.offset_type(start, 0), size, -1, None, link,
62 self.rev(p1), self.rev(p2), node)
62 self.rev(p1), self.rev(p2), node)
63 self.basemap[n] = prev
63 self.basemap[n] = prev
64 self.index.insert(-1, e)
64 self.index.insert(-1, e)
65 self.nodemap[node] = n
65 self.nodemap[node] = n
66 prev = node
66 prev = node
67 n += 1
67 n += 1
68
68
69 def bundle(self, rev):
69 def bundle(self, rev):
70 """is rev from the bundle"""
70 """is rev from the bundle"""
71 if rev < 0:
71 if rev < 0:
72 return False
72 return False
73 return rev in self.basemap
73 return rev in self.basemap
74 def bundlebase(self, rev): return self.basemap[rev]
74 def bundlebase(self, rev): return self.basemap[rev]
75 def chunk(self, rev, df=None, cachelen=4096):
75 def chunk(self, rev, df=None, cachelen=4096):
76 # Warning: in case of bundle, the diff is against bundlebase,
76 # Warning: in case of bundle, the diff is against bundlebase,
77 # not against rev - 1
77 # not against rev - 1
78 # XXX: could use some caching
78 # XXX: could use some caching
79 if not self.bundle(rev):
79 if not self.bundle(rev):
80 return revlog.revlog.chunk(self, rev, df, cachelen)
80 return revlog.revlog.chunk(self, rev, df, cachelen)
81 self.bundlefile.seek(self.start(rev))
81 self.bundlefile.seek(self.start(rev))
82 return self.bundlefile.read(self.length(rev))
82 return self.bundlefile.read(self.length(rev))
83
83
84 def revdiff(self, rev1, rev2):
84 def revdiff(self, rev1, rev2):
85 """return or calculate a delta between two revisions"""
85 """return or calculate a delta between two revisions"""
86 if self.bundle(rev1) and self.bundle(rev2):
86 if self.bundle(rev1) and self.bundle(rev2):
87 # hot path for bundle
87 # hot path for bundle
88 revb = self.rev(self.bundlebase(rev2))
88 revb = self.rev(self.bundlebase(rev2))
89 if revb == rev1:
89 if revb == rev1:
90 return self.chunk(rev2)
90 return self.chunk(rev2)
91 elif not self.bundle(rev1) and not self.bundle(rev2):
91 elif not self.bundle(rev1) and not self.bundle(rev2):
92 return revlog.revlog.revdiff(self, rev1, rev2)
92 return revlog.revlog.revdiff(self, rev1, rev2)
93
93
94 return self.diff(self.revision(self.node(rev1)),
94 return self.diff(self.revision(self.node(rev1)),
95 self.revision(self.node(rev2)))
95 self.revision(self.node(rev2)))
96
96
97 def revision(self, node):
97 def revision(self, node):
98 """return an uncompressed revision of a given"""
98 """return an uncompressed revision of a given"""
99 if node == nullid: return ""
99 if node == nullid: return ""
100
100
101 text = None
101 text = None
102 chain = []
102 chain = []
103 iter_node = node
103 iter_node = node
104 rev = self.rev(iter_node)
104 rev = self.rev(iter_node)
105 # reconstruct the revision if it is from a changegroup
105 # reconstruct the revision if it is from a changegroup
106 while self.bundle(rev):
106 while self.bundle(rev):
107 if self.cache and self.cache[0] == iter_node:
107 if self._cache and self._cache[0] == iter_node:
108 text = self.cache[2]
108 text = self._cache[2]
109 break
109 break
110 chain.append(rev)
110 chain.append(rev)
111 iter_node = self.bundlebase(rev)
111 iter_node = self.bundlebase(rev)
112 rev = self.rev(iter_node)
112 rev = self.rev(iter_node)
113 if text is None:
113 if text is None:
114 text = revlog.revlog.revision(self, iter_node)
114 text = revlog.revlog.revision(self, iter_node)
115
115
116 while chain:
116 while chain:
117 delta = self.chunk(chain.pop())
117 delta = self.chunk(chain.pop())
118 text = self.patches(text, [delta])
118 text = self.patches(text, [delta])
119
119
120 p1, p2 = self.parents(node)
120 p1, p2 = self.parents(node)
121 if node != revlog.hash(text, p1, p2):
121 if node != revlog.hash(text, p1, p2):
122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
123 % (self.datafile, self.rev(node)))
123 % (self.datafile, self.rev(node)))
124
124
125 self.cache = (node, self.rev(node), text)
125 self._cache = (node, self.rev(node), text)
126 return text
126 return text
127
127
128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 raise NotImplementedError
129 raise NotImplementedError
130 def addgroup(self, revs, linkmapper, transaction, unique=0):
130 def addgroup(self, revs, linkmapper, transaction, unique=0):
131 raise NotImplementedError
131 raise NotImplementedError
132 def strip(self, rev, minlink):
132 def strip(self, rev, minlink):
133 raise NotImplementedError
133 raise NotImplementedError
134 def checksize(self):
134 def checksize(self):
135 raise NotImplementedError
135 raise NotImplementedError
136
136
137 class bundlechangelog(bundlerevlog, changelog.changelog):
137 class bundlechangelog(bundlerevlog, changelog.changelog):
138 def __init__(self, opener, bundlefile):
138 def __init__(self, opener, bundlefile):
139 changelog.changelog.__init__(self, opener)
139 changelog.changelog.__init__(self, opener)
140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
141
141
142 class bundlemanifest(bundlerevlog, manifest.manifest):
142 class bundlemanifest(bundlerevlog, manifest.manifest):
143 def __init__(self, opener, bundlefile, linkmapper):
143 def __init__(self, opener, bundlefile, linkmapper):
144 manifest.manifest.__init__(self, opener)
144 manifest.manifest.__init__(self, opener)
145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
146 linkmapper)
146 linkmapper)
147
147
148 class bundlefilelog(bundlerevlog, filelog.filelog):
148 class bundlefilelog(bundlerevlog, filelog.filelog):
149 def __init__(self, opener, path, bundlefile, linkmapper):
149 def __init__(self, opener, path, bundlefile, linkmapper):
150 filelog.filelog.__init__(self, opener, path)
150 filelog.filelog.__init__(self, opener, path)
151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
152 linkmapper)
152 linkmapper)
153
153
154 class bundlerepository(localrepo.localrepository):
154 class bundlerepository(localrepo.localrepository):
155 def __init__(self, ui, path, bundlename):
155 def __init__(self, ui, path, bundlename):
156 localrepo.localrepository.__init__(self, ui, path)
156 localrepo.localrepository.__init__(self, ui, path)
157
157
158 self._url = 'bundle:' + bundlename
158 self._url = 'bundle:' + bundlename
159 if path: self._url += '+' + path
159 if path: self._url += '+' + path
160
160
161 self.tempfile = None
161 self.tempfile = None
162 self.bundlefile = open(bundlename, "rb")
162 self.bundlefile = open(bundlename, "rb")
163 header = self.bundlefile.read(6)
163 header = self.bundlefile.read(6)
164 if not header.startswith("HG"):
164 if not header.startswith("HG"):
165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
166 elif not header.startswith("HG10"):
166 elif not header.startswith("HG10"):
167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
168 elif header == "HG10BZ":
168 elif header == "HG10BZ":
169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
170 suffix=".hg10un", dir=self.path)
170 suffix=".hg10un", dir=self.path)
171 self.tempfile = temp
171 self.tempfile = temp
172 fptemp = os.fdopen(fdtemp, 'wb')
172 fptemp = os.fdopen(fdtemp, 'wb')
173 def generator(f):
173 def generator(f):
174 zd = bz2.BZ2Decompressor()
174 zd = bz2.BZ2Decompressor()
175 zd.decompress("BZ")
175 zd.decompress("BZ")
176 for chunk in f:
176 for chunk in f:
177 yield zd.decompress(chunk)
177 yield zd.decompress(chunk)
178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
179
179
180 try:
180 try:
181 fptemp.write("HG10UN")
181 fptemp.write("HG10UN")
182 for chunk in gen:
182 for chunk in gen:
183 fptemp.write(chunk)
183 fptemp.write(chunk)
184 finally:
184 finally:
185 fptemp.close()
185 fptemp.close()
186 self.bundlefile.close()
186 self.bundlefile.close()
187
187
188 self.bundlefile = open(self.tempfile, "rb")
188 self.bundlefile = open(self.tempfile, "rb")
189 # seek right after the header
189 # seek right after the header
190 self.bundlefile.seek(6)
190 self.bundlefile.seek(6)
191 elif header == "HG10UN":
191 elif header == "HG10UN":
192 # nothing to do
192 # nothing to do
193 pass
193 pass
194 else:
194 else:
195 raise util.Abort(_("%s: unknown bundle compression type")
195 raise util.Abort(_("%s: unknown bundle compression type")
196 % bundlename)
196 % bundlename)
197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
199 self.changelog.rev)
199 self.changelog.rev)
200 # dict with the mapping 'filename' -> position in the bundle
200 # dict with the mapping 'filename' -> position in the bundle
201 self.bundlefilespos = {}
201 self.bundlefilespos = {}
202 while 1:
202 while 1:
203 f = changegroup.getchunk(self.bundlefile)
203 f = changegroup.getchunk(self.bundlefile)
204 if not f:
204 if not f:
205 break
205 break
206 self.bundlefilespos[f] = self.bundlefile.tell()
206 self.bundlefilespos[f] = self.bundlefile.tell()
207 for c in changegroup.chunkiter(self.bundlefile):
207 for c in changegroup.chunkiter(self.bundlefile):
208 pass
208 pass
209
209
210 def url(self):
210 def url(self):
211 return self._url
211 return self._url
212
212
213 def dev(self):
213 def dev(self):
214 return -1
214 return -1
215
215
216 def file(self, f):
216 def file(self, f):
217 if f[0] == '/':
217 if f[0] == '/':
218 f = f[1:]
218 f = f[1:]
219 if f in self.bundlefilespos:
219 if f in self.bundlefilespos:
220 self.bundlefile.seek(self.bundlefilespos[f])
220 self.bundlefile.seek(self.bundlefilespos[f])
221 return bundlefilelog(self.sopener, f, self.bundlefile,
221 return bundlefilelog(self.sopener, f, self.bundlefile,
222 self.changelog.rev)
222 self.changelog.rev)
223 else:
223 else:
224 return filelog.filelog(self.sopener, f)
224 return filelog.filelog(self.sopener, f)
225
225
226 def close(self):
226 def close(self):
227 """Close assigned bundle file immediately."""
227 """Close assigned bundle file immediately."""
228 self.bundlefile.close()
228 self.bundlefile.close()
229
229
230 def __del__(self):
230 def __del__(self):
231 bundlefile = getattr(self, 'bundlefile', None)
231 bundlefile = getattr(self, 'bundlefile', None)
232 if bundlefile and not bundlefile.closed:
232 if bundlefile and not bundlefile.closed:
233 bundlefile.close()
233 bundlefile.close()
234 tempfile = getattr(self, 'tempfile', None)
234 tempfile = getattr(self, 'tempfile', None)
235 if tempfile is not None:
235 if tempfile is not None:
236 os.unlink(tempfile)
236 os.unlink(tempfile)
237
237
238 def instance(ui, path, create):
238 def instance(ui, path, create):
239 if create:
239 if create:
240 raise util.Abort(_('cannot create new bundle repository'))
240 raise util.Abort(_('cannot create new bundle repository'))
241 path = util.drop_scheme('file', path)
241 path = util.drop_scheme('file', path)
242 if path.startswith('bundle:'):
242 if path.startswith('bundle:'):
243 path = util.drop_scheme('bundle', path)
243 path = util.drop_scheme('bundle', path)
244 s = path.split("+", 1)
244 s = path.split("+", 1)
245 if len(s) == 1:
245 if len(s) == 1:
246 repopath, bundlename = "", s[0]
246 repopath, bundlename = "", s[0]
247 else:
247 else:
248 repopath, bundlename = s
248 repopath, bundlename = s
249 else:
249 else:
250 repopath, bundlename = '', path
250 repopath, bundlename = '', path
251 return bundlerepository(ui, repopath, bundlename)
251 return bundlerepository(ui, repopath, bundlename)
@@ -1,1278 +1,1277 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 # revlog version strings
18 # revlog version strings
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGNG = 1
20 REVLOGNG = 1
21
21
22 # revlog flags
22 # revlog flags
23 REVLOGNGINLINEDATA = (1 << 16)
23 REVLOGNGINLINEDATA = (1 << 16)
24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
24 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
25
25
26 REVLOG_DEFAULT_FORMAT = REVLOGNG
26 REVLOG_DEFAULT_FORMAT = REVLOGNG
27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
27 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
28
28
29 def hash(text, p1, p2):
29 def hash(text, p1, p2):
30 """generate a hash from the given text and its parent hashes
30 """generate a hash from the given text and its parent hashes
31
31
32 This hash combines both the current file contents and its history
32 This hash combines both the current file contents and its history
33 in a manner that makes it easy to distinguish nodes with the same
33 in a manner that makes it easy to distinguish nodes with the same
34 content in the revision graph.
34 content in the revision graph.
35 """
35 """
36 l = [p1, p2]
36 l = [p1, p2]
37 l.sort()
37 l.sort()
38 s = sha.new(l[0])
38 s = sha.new(l[0])
39 s.update(l[1])
39 s.update(l[1])
40 s.update(text)
40 s.update(text)
41 return s.digest()
41 return s.digest()
42
42
43 def compress(text):
43 def compress(text):
44 """ generate a possibly-compressed representation of text """
44 """ generate a possibly-compressed representation of text """
45 if not text:
45 if not text:
46 return ("", text)
46 return ("", text)
47 if len(text) < 44:
47 if len(text) < 44:
48 if text[0] == '\0':
48 if text[0] == '\0':
49 return ("", text)
49 return ("", text)
50 return ('u', text)
50 return ('u', text)
51 bin = zlib.compress(text)
51 bin = zlib.compress(text)
52 if len(bin) > len(text):
52 if len(bin) > len(text):
53 if text[0] == '\0':
53 if text[0] == '\0':
54 return ("", text)
54 return ("", text)
55 return ('u', text)
55 return ('u', text)
56 return ("", bin)
56 return ("", bin)
57
57
58 def decompress(bin):
58 def decompress(bin):
59 """ decompress the given input """
59 """ decompress the given input """
60 if not bin:
60 if not bin:
61 return bin
61 return bin
62 t = bin[0]
62 t = bin[0]
63 if t == '\0':
63 if t == '\0':
64 return bin
64 return bin
65 if t == 'x':
65 if t == 'x':
66 return zlib.decompress(bin)
66 return zlib.decompress(bin)
67 if t == 'u':
67 if t == 'u':
68 return bin[1:]
68 return bin[1:]
69 raise RevlogError(_("unknown compression type %r") % t)
69 raise RevlogError(_("unknown compression type %r") % t)
70
70
71 indexformatv0 = ">4l20s20s20s"
71 indexformatv0 = ">4l20s20s20s"
72 v0shaoffset = 56
72 v0shaoffset = 56
73 # index ng:
73 # index ng:
74 # 6 bytes offset
74 # 6 bytes offset
75 # 2 bytes flags
75 # 2 bytes flags
76 # 4 bytes compressed length
76 # 4 bytes compressed length
77 # 4 bytes uncompressed length
77 # 4 bytes uncompressed length
78 # 4 bytes: base rev
78 # 4 bytes: base rev
79 # 4 bytes link rev
79 # 4 bytes link rev
80 # 4 bytes parent 1 rev
80 # 4 bytes parent 1 rev
81 # 4 bytes parent 2 rev
81 # 4 bytes parent 2 rev
82 # 32 bytes: nodeid
82 # 32 bytes: nodeid
83 indexformatng = ">Qiiiiii20s12x"
83 indexformatng = ">Qiiiiii20s12x"
84 ngshaoffset = 32
84 ngshaoffset = 32
85 versionformat = ">I"
85 versionformat = ">I"
86
86
87 class lazyparser(object):
87 class lazyparser(object):
88 """
88 """
89 this class avoids the need to parse the entirety of large indices
89 this class avoids the need to parse the entirety of large indices
90 """
90 """
91
91
92 # lazyparser is not safe to use on windows if win32 extensions not
92 # lazyparser is not safe to use on windows if win32 extensions not
93 # available. it keeps file handle open, which make it not possible
93 # available. it keeps file handle open, which make it not possible
94 # to break hardlinks on local cloned repos.
94 # to break hardlinks on local cloned repos.
95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
95 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 hasattr(util, 'win32api'))
96 hasattr(util, 'win32api'))
97
97
98 def __init__(self, dataf, size):
98 def __init__(self, dataf, size):
99 self.dataf = dataf
99 self.dataf = dataf
100 self.s = struct.calcsize(indexformatng)
100 self.s = struct.calcsize(indexformatng)
101 self.datasize = size
101 self.datasize = size
102 self.l = size/self.s
102 self.l = size/self.s
103 self.index = [None] * self.l
103 self.index = [None] * self.l
104 self.map = {nullid: nullrev}
104 self.map = {nullid: nullrev}
105 self.allmap = 0
105 self.allmap = 0
106 self.all = 0
106 self.all = 0
107 self.mapfind_count = 0
107 self.mapfind_count = 0
108
108
109 def loadmap(self):
109 def loadmap(self):
110 """
110 """
111 during a commit, we need to make sure the rev being added is
111 during a commit, we need to make sure the rev being added is
112 not a duplicate. This requires loading the entire index,
112 not a duplicate. This requires loading the entire index,
113 which is fairly slow. loadmap can load up just the node map,
113 which is fairly slow. loadmap can load up just the node map,
114 which takes much less time.
114 which takes much less time.
115 """
115 """
116 if self.allmap:
116 if self.allmap:
117 return
117 return
118 end = self.datasize
118 end = self.datasize
119 self.allmap = 1
119 self.allmap = 1
120 cur = 0
120 cur = 0
121 count = 0
121 count = 0
122 blocksize = self.s * 256
122 blocksize = self.s * 256
123 self.dataf.seek(0)
123 self.dataf.seek(0)
124 while cur < end:
124 while cur < end:
125 data = self.dataf.read(blocksize)
125 data = self.dataf.read(blocksize)
126 off = 0
126 off = 0
127 for x in xrange(256):
127 for x in xrange(256):
128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
128 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 self.map[n] = count
129 self.map[n] = count
130 count += 1
130 count += 1
131 if count >= self.l:
131 if count >= self.l:
132 break
132 break
133 off += self.s
133 off += self.s
134 cur += blocksize
134 cur += blocksize
135
135
136 def loadblock(self, blockstart, blocksize, data=None):
136 def loadblock(self, blockstart, blocksize, data=None):
137 if self.all:
137 if self.all:
138 return
138 return
139 if data is None:
139 if data is None:
140 self.dataf.seek(blockstart)
140 self.dataf.seek(blockstart)
141 if blockstart + blocksize > self.datasize:
141 if blockstart + blocksize > self.datasize:
142 # the revlog may have grown since we've started running,
142 # the revlog may have grown since we've started running,
143 # but we don't have space in self.index for more entries.
143 # but we don't have space in self.index for more entries.
144 # limit blocksize so that we don't get too much data.
144 # limit blocksize so that we don't get too much data.
145 blocksize = max(self.datasize - blockstart, 0)
145 blocksize = max(self.datasize - blockstart, 0)
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 lend = len(data) / self.s
147 lend = len(data) / self.s
148 i = blockstart / self.s
148 i = blockstart / self.s
149 off = 0
149 off = 0
150 # lazyindex supports __delitem__
150 # lazyindex supports __delitem__
151 if lend > len(self.index) - i:
151 if lend > len(self.index) - i:
152 lend = len(self.index) - i
152 lend = len(self.index) - i
153 for x in xrange(lend):
153 for x in xrange(lend):
154 if self.index[i + x] == None:
154 if self.index[i + x] == None:
155 b = data[off : off + self.s]
155 b = data[off : off + self.s]
156 self.index[i + x] = b
156 self.index[i + x] = b
157 n = b[ngshaoffset:ngshaoffset + 20]
157 n = b[ngshaoffset:ngshaoffset + 20]
158 self.map[n] = i + x
158 self.map[n] = i + x
159 off += self.s
159 off += self.s
160
160
161 def findnode(self, node):
161 def findnode(self, node):
162 """search backwards through the index file for a specific node"""
162 """search backwards through the index file for a specific node"""
163 if self.allmap:
163 if self.allmap:
164 return None
164 return None
165
165
166 # hg log will cause many many searches for the manifest
166 # hg log will cause many many searches for the manifest
167 # nodes. After we get called a few times, just load the whole
167 # nodes. After we get called a few times, just load the whole
168 # thing.
168 # thing.
169 if self.mapfind_count > 8:
169 if self.mapfind_count > 8:
170 self.loadmap()
170 self.loadmap()
171 if node in self.map:
171 if node in self.map:
172 return node
172 return node
173 return None
173 return None
174 self.mapfind_count += 1
174 self.mapfind_count += 1
175 last = self.l - 1
175 last = self.l - 1
176 while self.index[last] != None:
176 while self.index[last] != None:
177 if last == 0:
177 if last == 0:
178 self.all = 1
178 self.all = 1
179 self.allmap = 1
179 self.allmap = 1
180 return None
180 return None
181 last -= 1
181 last -= 1
182 end = (last + 1) * self.s
182 end = (last + 1) * self.s
183 blocksize = self.s * 256
183 blocksize = self.s * 256
184 while end >= 0:
184 while end >= 0:
185 start = max(end - blocksize, 0)
185 start = max(end - blocksize, 0)
186 self.dataf.seek(start)
186 self.dataf.seek(start)
187 data = self.dataf.read(end - start)
187 data = self.dataf.read(end - start)
188 findend = end - start
188 findend = end - start
189 while True:
189 while True:
190 # we're searching backwards, so weh have to make sure
190 # we're searching backwards, so weh have to make sure
191 # we don't find a changeset where this node is a parent
191 # we don't find a changeset where this node is a parent
192 off = data.rfind(node, 0, findend)
192 off = data.rfind(node, 0, findend)
193 findend = off
193 findend = off
194 if off >= 0:
194 if off >= 0:
195 i = off / self.s
195 i = off / self.s
196 off = i * self.s
196 off = i * self.s
197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
197 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 if n == node:
198 if n == node:
199 self.map[n] = i + start / self.s
199 self.map[n] = i + start / self.s
200 return node
200 return node
201 else:
201 else:
202 break
202 break
203 end -= blocksize
203 end -= blocksize
204 return None
204 return None
205
205
206 def loadindex(self, i=None, end=None):
206 def loadindex(self, i=None, end=None):
207 if self.all:
207 if self.all:
208 return
208 return
209 all = False
209 all = False
210 if i == None:
210 if i == None:
211 blockstart = 0
211 blockstart = 0
212 blocksize = (512 / self.s) * self.s
212 blocksize = (512 / self.s) * self.s
213 end = self.datasize
213 end = self.datasize
214 all = True
214 all = True
215 else:
215 else:
216 if end:
216 if end:
217 blockstart = i * self.s
217 blockstart = i * self.s
218 end = end * self.s
218 end = end * self.s
219 blocksize = end - blockstart
219 blocksize = end - blockstart
220 else:
220 else:
221 blockstart = (i & ~63) * self.s
221 blockstart = (i & ~63) * self.s
222 blocksize = self.s * 64
222 blocksize = self.s * 64
223 end = blockstart + blocksize
223 end = blockstart + blocksize
224 while blockstart < end:
224 while blockstart < end:
225 self.loadblock(blockstart, blocksize)
225 self.loadblock(blockstart, blocksize)
226 blockstart += blocksize
226 blockstart += blocksize
227 if all:
227 if all:
228 self.all = True
228 self.all = True
229
229
230 class lazyindex(object):
230 class lazyindex(object):
231 """a lazy version of the index array"""
231 """a lazy version of the index array"""
232 def __init__(self, parser):
232 def __init__(self, parser):
233 self.p = parser
233 self.p = parser
234 def __len__(self):
234 def __len__(self):
235 return len(self.p.index)
235 return len(self.p.index)
236 def load(self, pos):
236 def load(self, pos):
237 if pos < 0:
237 if pos < 0:
238 pos += len(self.p.index)
238 pos += len(self.p.index)
239 self.p.loadindex(pos)
239 self.p.loadindex(pos)
240 return self.p.index[pos]
240 return self.p.index[pos]
241 def __getitem__(self, pos):
241 def __getitem__(self, pos):
242 ret = self.p.index[pos] or self.load(pos)
242 ret = self.p.index[pos] or self.load(pos)
243 if isinstance(ret, str):
243 if isinstance(ret, str):
244 ret = struct.unpack(indexformatng, ret)
244 ret = struct.unpack(indexformatng, ret)
245 return ret
245 return ret
246 def __setitem__(self, pos, item):
246 def __setitem__(self, pos, item):
247 self.p.index[pos] = item
247 self.p.index[pos] = item
248 def __delitem__(self, pos):
248 def __delitem__(self, pos):
249 del self.p.index[pos]
249 del self.p.index[pos]
250 def insert(self, pos, e):
250 def insert(self, pos, e):
251 self.p.index.insert(pos, e)
251 self.p.index.insert(pos, e)
252 def append(self, e):
252 def append(self, e):
253 self.p.index.append(e)
253 self.p.index.append(e)
254
254
255 class lazymap(object):
255 class lazymap(object):
256 """a lazy version of the node map"""
256 """a lazy version of the node map"""
257 def __init__(self, parser):
257 def __init__(self, parser):
258 self.p = parser
258 self.p = parser
259 def load(self, key):
259 def load(self, key):
260 n = self.p.findnode(key)
260 n = self.p.findnode(key)
261 if n == None:
261 if n == None:
262 raise KeyError(key)
262 raise KeyError(key)
263 def __contains__(self, key):
263 def __contains__(self, key):
264 if key in self.p.map:
264 if key in self.p.map:
265 return True
265 return True
266 self.p.loadmap()
266 self.p.loadmap()
267 return key in self.p.map
267 return key in self.p.map
268 def __iter__(self):
268 def __iter__(self):
269 yield nullid
269 yield nullid
270 for i in xrange(self.p.l):
270 for i in xrange(self.p.l):
271 ret = self.p.index[i]
271 ret = self.p.index[i]
272 if not ret:
272 if not ret:
273 self.p.loadindex(i)
273 self.p.loadindex(i)
274 ret = self.p.index[i]
274 ret = self.p.index[i]
275 if isinstance(ret, str):
275 if isinstance(ret, str):
276 ret = struct.unpack(indexformatng, ret)
276 ret = struct.unpack(indexformatng, ret)
277 yield ret[7]
277 yield ret[7]
278 def __getitem__(self, key):
278 def __getitem__(self, key):
279 try:
279 try:
280 return self.p.map[key]
280 return self.p.map[key]
281 except KeyError:
281 except KeyError:
282 try:
282 try:
283 self.load(key)
283 self.load(key)
284 return self.p.map[key]
284 return self.p.map[key]
285 except KeyError:
285 except KeyError:
286 raise KeyError("node " + hex(key))
286 raise KeyError("node " + hex(key))
287 def __setitem__(self, key, val):
287 def __setitem__(self, key, val):
288 self.p.map[key] = val
288 self.p.map[key] = val
289 def __delitem__(self, key):
289 def __delitem__(self, key):
290 del self.p.map[key]
290 del self.p.map[key]
291
291
292 class RevlogError(Exception):
292 class RevlogError(Exception):
293 pass
293 pass
294 class LookupError(RevlogError):
294 class LookupError(RevlogError):
295 pass
295 pass
296
296
297 def getoffset(q):
297 def getoffset(q):
298 if q & 0xFFFF:
298 if q & 0xFFFF:
299 raise RevlogError(_('incompatible revision flag %x') % q)
299 raise RevlogError(_('incompatible revision flag %x') % q)
300 return int(q >> 16)
300 return int(q >> 16)
301
301
302 def gettype(q):
302 def gettype(q):
303 return int(q & 0xFFFF)
303 return int(q & 0xFFFF)
304
304
305 def offset_type(offset, type):
305 def offset_type(offset, type):
306 return long(long(offset) << 16 | type)
306 return long(long(offset) << 16 | type)
307
307
308 class revlogoldio(object):
308 class revlogoldio(object):
309 def __init__(self):
309 def __init__(self):
310 self.size = struct.calcsize(indexformatv0)
310 self.size = struct.calcsize(indexformatv0)
311
311
312 def parseindex(self, fp, st, inline):
312 def parseindex(self, fp, st, inline):
313 s = self.size
313 s = self.size
314 index = []
314 index = []
315 nodemap = {nullid: nullrev}
315 nodemap = {nullid: nullrev}
316 n = off = 0
316 n = off = 0
317 data = fp.read()
317 data = fp.read()
318 l = len(data)
318 l = len(data)
319 while off + s <= l:
319 while off + s <= l:
320 cur = data[off:off + s]
320 cur = data[off:off + s]
321 off += s
321 off += s
322 e = struct.unpack(indexformatv0, cur)
322 e = struct.unpack(indexformatv0, cur)
323 # transform to revlogv1 format
323 # transform to revlogv1 format
324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
325 nodemap[e[4]], nodemap[e[5]], e[6])
325 nodemap[e[4]], nodemap[e[5]], e[6])
326 index.append(e2)
326 index.append(e2)
327 nodemap[e[6]] = n
327 nodemap[e[6]] = n
328 n += 1
328 n += 1
329
329
330 return index, nodemap, None
330 return index, nodemap, None
331
331
332 class revlogio(object):
332 class revlogio(object):
333 def __init__(self):
333 def __init__(self):
334 self.size = struct.calcsize(indexformatng)
334 self.size = struct.calcsize(indexformatng)
335
335
336 def parseindex(self, fp, st, inline):
336 def parseindex(self, fp, st, inline):
337 if (lazyparser.safe_to_use and not inline and
337 if (lazyparser.safe_to_use and not inline and
338 st and st.st_size > 1000000):
338 st and st.st_size > 1000000):
339 # big index, let's parse it on demand
339 # big index, let's parse it on demand
340 parser = lazyparser(fp, st.st_size)
340 parser = lazyparser(fp, st.st_size)
341 index = lazyindex(parser)
341 index = lazyindex(parser)
342 nodemap = lazymap(parser)
342 nodemap = lazymap(parser)
343 e = list(index[0])
343 e = list(index[0])
344 type = gettype(e[0])
344 type = gettype(e[0])
345 e[0] = offset_type(0, type)
345 e[0] = offset_type(0, type)
346 index[0] = e
346 index[0] = e
347 return index, nodemap, None
347 return index, nodemap, None
348
348
349 s = self.size
349 s = self.size
350 cache = None
350 cache = None
351 index = []
351 index = []
352 nodemap = {nullid: nullrev}
352 nodemap = {nullid: nullrev}
353 n = off = 0
353 n = off = 0
354 # if we're not using lazymap, always read the whole index
354 # if we're not using lazymap, always read the whole index
355 data = fp.read()
355 data = fp.read()
356 l = len(data)
356 l = len(data)
357 if inline:
357 if inline:
358 cache = (0, data)
358 cache = (0, data)
359 while off + s <= l:
359 while off + s <= l:
360 e = struct.unpack(indexformatng, data[off:off + s])
360 e = struct.unpack(indexformatng, data[off:off + s])
361 index.append(e)
361 index.append(e)
362 nodemap[e[7]] = n
362 nodemap[e[7]] = n
363 n += 1
363 n += 1
364 off += s
364 off += s
365 if inline:
365 if inline:
366 if e[1] < 0:
366 if e[1] < 0:
367 break
367 break
368 off += e[1]
368 off += e[1]
369
369
370 e = list(index[0])
370 e = list(index[0])
371 type = gettype(e[0])
371 type = gettype(e[0])
372 e[0] = offset_type(0, type)
372 e[0] = offset_type(0, type)
373 index[0] = e
373 index[0] = e
374
374
375 return index, nodemap, cache
375 return index, nodemap, cache
376
376
377 class revlog(object):
377 class revlog(object):
378 """
378 """
379 the underlying revision storage object
379 the underlying revision storage object
380
380
381 A revlog consists of two parts, an index and the revision data.
381 A revlog consists of two parts, an index and the revision data.
382
382
383 The index is a file with a fixed record size containing
383 The index is a file with a fixed record size containing
384 information on each revision, includings its nodeid (hash), the
384 information on each revision, includings its nodeid (hash), the
385 nodeids of its parents, the position and offset of its data within
385 nodeids of its parents, the position and offset of its data within
386 the data file, and the revision it's based on. Finally, each entry
386 the data file, and the revision it's based on. Finally, each entry
387 contains a linkrev entry that can serve as a pointer to external
387 contains a linkrev entry that can serve as a pointer to external
388 data.
388 data.
389
389
390 The revision data itself is a linear collection of data chunks.
390 The revision data itself is a linear collection of data chunks.
391 Each chunk represents a revision and is usually represented as a
391 Each chunk represents a revision and is usually represented as a
392 delta against the previous chunk. To bound lookup time, runs of
392 delta against the previous chunk. To bound lookup time, runs of
393 deltas are limited to about 2 times the length of the original
393 deltas are limited to about 2 times the length of the original
394 version data. This makes retrieval of a version proportional to
394 version data. This makes retrieval of a version proportional to
395 its size, or O(1) relative to the number of revisions.
395 its size, or O(1) relative to the number of revisions.
396
396
397 Both pieces of the revlog are written to in an append-only
397 Both pieces of the revlog are written to in an append-only
398 fashion, which means we never need to rewrite a file to insert or
398 fashion, which means we never need to rewrite a file to insert or
399 remove data, and can use some simple techniques to avoid the need
399 remove data, and can use some simple techniques to avoid the need
400 for locking while reading.
400 for locking while reading.
401 """
401 """
402 def __init__(self, opener, indexfile):
402 def __init__(self, opener, indexfile):
403 """
403 """
404 create a revlog object
404 create a revlog object
405
405
406 opener is a function that abstracts the file opening operation
406 opener is a function that abstracts the file opening operation
407 and can be used to implement COW semantics or the like.
407 and can be used to implement COW semantics or the like.
408 """
408 """
409 self.indexfile = indexfile
409 self.indexfile = indexfile
410 self.datafile = indexfile[:-2] + ".d"
410 self.datafile = indexfile[:-2] + ".d"
411 self.opener = opener
411 self.opener = opener
412
413 self.indexstat = None
412 self.indexstat = None
414 self.cache = None
413 self._cache = None
415 self._chunkcache = None
414 self._chunkcache = None
416 self.defversion = REVLOG_DEFAULT_VERSION
415 self.defversion = REVLOG_DEFAULT_VERSION
417 if hasattr(opener, "defversion"):
416 if hasattr(opener, "defversion"):
418 self.defversion = opener.defversion
417 self.defversion = opener.defversion
419 if self.defversion & REVLOGNG:
418 if self.defversion & REVLOGNG:
420 self.defversion |= REVLOGNGINLINEDATA
419 self.defversion |= REVLOGNGINLINEDATA
421 self._load()
420 self._load()
422
421
423 def _load(self):
422 def _load(self):
424 v = self.defversion
423 v = self.defversion
425 try:
424 try:
426 f = self.opener(self.indexfile)
425 f = self.opener(self.indexfile)
427 i = f.read(4)
426 i = f.read(4)
428 f.seek(0)
427 f.seek(0)
429 if len(i) > 0:
428 if len(i) > 0:
430 v = struct.unpack(versionformat, i)[0]
429 v = struct.unpack(versionformat, i)[0]
431 except IOError, inst:
430 except IOError, inst:
432 if inst.errno != errno.ENOENT:
431 if inst.errno != errno.ENOENT:
433 raise
432 raise
434 i = ""
433 i = ""
435 else:
434 else:
436 try:
435 try:
437 st = util.fstat(f)
436 st = util.fstat(f)
438 except AttributeError, inst:
437 except AttributeError, inst:
439 st = None
438 st = None
440 else:
439 else:
441 oldst = self.indexstat
440 oldst = self.indexstat
442 if (oldst and st.st_dev == oldst.st_dev
441 if (oldst and st.st_dev == oldst.st_dev
443 and st.st_ino == oldst.st_ino
442 and st.st_ino == oldst.st_ino
444 and st.st_mtime == oldst.st_mtime
443 and st.st_mtime == oldst.st_mtime
445 and st.st_ctime == oldst.st_ctime
444 and st.st_ctime == oldst.st_ctime
446 and st.st_size == oldst.st_size):
445 and st.st_size == oldst.st_size):
447 return
446 return
448 self.indexstat = st
447 self.indexstat = st
449 flags = v & ~0xFFFF
448 flags = v & ~0xFFFF
450 fmt = v & 0xFFFF
449 fmt = v & 0xFFFF
451 if fmt == REVLOGV0:
450 if fmt == REVLOGV0:
452 if flags:
451 if flags:
453 raise RevlogError(_("index %s unknown flags %#04x for format v0")
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 % (self.indexfile, flags >> 16))
453 % (self.indexfile, flags >> 16))
455 elif fmt == REVLOGNG:
454 elif fmt == REVLOGNG:
456 if flags & ~REVLOGNGINLINEDATA:
455 if flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
456 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
457 % (self.indexfile, flags >> 16))
459 else:
458 else:
460 raise RevlogError(_("index %s unknown format %d")
459 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
460 % (self.indexfile, fmt))
462 self.version = v
461 self.version = v
463 self._inline = v & REVLOGNGINLINEDATA
462 self._inline = v & REVLOGNGINLINEDATA
464 self.nodemap = {nullid: nullrev}
463 self.nodemap = {nullid: nullrev}
465 self.index = []
464 self.index = []
466 self._io = revlogio()
465 self._io = revlogio()
467 if self.version == REVLOGV0:
466 if self.version == REVLOGV0:
468 self._io = revlogoldio()
467 self._io = revlogoldio()
469 if i:
468 if i:
470 d = self._io.parseindex(f, st, self._inline)
469 d = self._io.parseindex(f, st, self._inline)
471 self.index, self.nodemap, self._chunkcache = d
470 self.index, self.nodemap, self._chunkcache = d
472 # add the magic null revision at -1
471 # add the magic null revision at -1
473 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
474
473
475 def _loadindex(self, start, end):
474 def _loadindex(self, start, end):
476 """load a block of indexes all at once from the lazy parser"""
475 """load a block of indexes all at once from the lazy parser"""
477 if isinstance(self.index, lazyindex):
476 if isinstance(self.index, lazyindex):
478 self.index.p.loadindex(start, end)
477 self.index.p.loadindex(start, end)
479
478
480 def _loadindexmap(self):
479 def _loadindexmap(self):
481 """loads both the map and the index from the lazy parser"""
480 """loads both the map and the index from the lazy parser"""
482 if isinstance(self.index, lazyindex):
481 if isinstance(self.index, lazyindex):
483 p = self.index.p
482 p = self.index.p
484 p.loadindex()
483 p.loadindex()
485 self.nodemap = p.map
484 self.nodemap = p.map
486
485
487 def _loadmap(self):
486 def _loadmap(self):
488 """loads the map from the lazy parser"""
487 """loads the map from the lazy parser"""
489 if isinstance(self.nodemap, lazymap):
488 if isinstance(self.nodemap, lazymap):
490 self.nodemap.p.loadmap()
489 self.nodemap.p.loadmap()
491 self.nodemap = self.nodemap.p.map
490 self.nodemap = self.nodemap.p.map
492
491
493 def tip(self):
492 def tip(self):
494 return self.node(len(self.index) - 2)
493 return self.node(len(self.index) - 2)
495 def count(self):
494 def count(self):
496 return len(self.index) - 1
495 return len(self.index) - 1
497
496
498 def rev(self, node):
497 def rev(self, node):
499 try:
498 try:
500 return self.nodemap[node]
499 return self.nodemap[node]
501 except KeyError:
500 except KeyError:
502 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
501 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
503 def node(self, rev):
502 def node(self, rev):
504 return self.index[rev][7]
503 return self.index[rev][7]
505 def linkrev(self, node):
504 def linkrev(self, node):
506 return self.index[self.rev(node)][4]
505 return self.index[self.rev(node)][4]
507 def parents(self, node):
506 def parents(self, node):
508 d = self.index[self.rev(node)][5:7]
507 d = self.index[self.rev(node)][5:7]
509 return (self.node(d[0]), self.node(d[1]))
508 return (self.node(d[0]), self.node(d[1]))
510 def parentrevs(self, rev):
509 def parentrevs(self, rev):
511 return self.index[rev][5:7]
510 return self.index[rev][5:7]
512 def start(self, rev):
511 def start(self, rev):
513 return getoffset(self.index[rev][0])
512 return getoffset(self.index[rev][0])
514 def end(self, rev):
513 def end(self, rev):
515 return self.start(rev) + self.length(rev)
514 return self.start(rev) + self.length(rev)
516 def length(self, rev):
515 def length(self, rev):
517 return self.index[rev][1]
516 return self.index[rev][1]
518 def base(self, rev):
517 def base(self, rev):
519 return self.index[rev][3]
518 return self.index[rev][3]
520
519
521 def size(self, rev):
520 def size(self, rev):
522 """return the length of the uncompressed text for a given revision"""
521 """return the length of the uncompressed text for a given revision"""
523 l = self.index[rev][2]
522 l = self.index[rev][2]
524 if l >= 0:
523 if l >= 0:
525 return l
524 return l
526
525
527 t = self.revision(self.node(rev))
526 t = self.revision(self.node(rev))
528 return len(t)
527 return len(t)
529
528
530 # alternate implementation, The advantage to this code is it
529 # alternate implementation, The advantage to this code is it
531 # will be faster for a single revision. But, the results are not
530 # will be faster for a single revision. But, the results are not
532 # cached, so finding the size of every revision will be slower.
531 # cached, so finding the size of every revision will be slower.
533 """
532 """
534 if self.cache and self.cache[1] == rev:
533 if self.cache and self.cache[1] == rev:
535 return len(self.cache[2])
534 return len(self.cache[2])
536
535
537 base = self.base(rev)
536 base = self.base(rev)
538 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
539 base = self.cache[1]
538 base = self.cache[1]
540 text = self.cache[2]
539 text = self.cache[2]
541 else:
540 else:
542 text = self.revision(self.node(base))
541 text = self.revision(self.node(base))
543
542
544 l = len(text)
543 l = len(text)
545 for x in xrange(base + 1, rev + 1):
544 for x in xrange(base + 1, rev + 1):
546 l = mdiff.patchedsize(l, self.chunk(x))
545 l = mdiff.patchedsize(l, self.chunk(x))
547 return l
546 return l
548 """
547 """
549
548
550 def reachable(self, node, stop=None):
549 def reachable(self, node, stop=None):
551 """return a hash of all nodes ancestral to a given node, including
550 """return a hash of all nodes ancestral to a given node, including
552 the node itself, stopping when stop is matched"""
551 the node itself, stopping when stop is matched"""
553 reachable = {}
552 reachable = {}
554 visit = [node]
553 visit = [node]
555 reachable[node] = 1
554 reachable[node] = 1
556 if stop:
555 if stop:
557 stopn = self.rev(stop)
556 stopn = self.rev(stop)
558 else:
557 else:
559 stopn = 0
558 stopn = 0
560 while visit:
559 while visit:
561 n = visit.pop(0)
560 n = visit.pop(0)
562 if n == stop:
561 if n == stop:
563 continue
562 continue
564 if n == nullid:
563 if n == nullid:
565 continue
564 continue
566 for p in self.parents(n):
565 for p in self.parents(n):
567 if self.rev(p) < stopn:
566 if self.rev(p) < stopn:
568 continue
567 continue
569 if p not in reachable:
568 if p not in reachable:
570 reachable[p] = 1
569 reachable[p] = 1
571 visit.append(p)
570 visit.append(p)
572 return reachable
571 return reachable
573
572
574 def nodesbetween(self, roots=None, heads=None):
573 def nodesbetween(self, roots=None, heads=None):
575 """Return a tuple containing three elements. Elements 1 and 2 contain
574 """Return a tuple containing three elements. Elements 1 and 2 contain
576 a final list bases and heads after all the unreachable ones have been
575 a final list bases and heads after all the unreachable ones have been
577 pruned. Element 0 contains a topologically sorted list of all
576 pruned. Element 0 contains a topologically sorted list of all
578
577
579 nodes that satisfy these constraints:
578 nodes that satisfy these constraints:
580 1. All nodes must be descended from a node in roots (the nodes on
579 1. All nodes must be descended from a node in roots (the nodes on
581 roots are considered descended from themselves).
580 roots are considered descended from themselves).
582 2. All nodes must also be ancestors of a node in heads (the nodes in
581 2. All nodes must also be ancestors of a node in heads (the nodes in
583 heads are considered to be their own ancestors).
582 heads are considered to be their own ancestors).
584
583
585 If roots is unspecified, nullid is assumed as the only root.
584 If roots is unspecified, nullid is assumed as the only root.
586 If heads is unspecified, it is taken to be the output of the
585 If heads is unspecified, it is taken to be the output of the
587 heads method (i.e. a list of all nodes in the repository that
586 heads method (i.e. a list of all nodes in the repository that
588 have no children)."""
587 have no children)."""
589 nonodes = ([], [], [])
588 nonodes = ([], [], [])
590 if roots is not None:
589 if roots is not None:
591 roots = list(roots)
590 roots = list(roots)
592 if not roots:
591 if not roots:
593 return nonodes
592 return nonodes
594 lowestrev = min([self.rev(n) for n in roots])
593 lowestrev = min([self.rev(n) for n in roots])
595 else:
594 else:
596 roots = [nullid] # Everybody's a descendent of nullid
595 roots = [nullid] # Everybody's a descendent of nullid
597 lowestrev = nullrev
596 lowestrev = nullrev
598 if (lowestrev == nullrev) and (heads is None):
597 if (lowestrev == nullrev) and (heads is None):
599 # We want _all_ the nodes!
598 # We want _all_ the nodes!
600 return ([self.node(r) for r in xrange(0, self.count())],
599 return ([self.node(r) for r in xrange(0, self.count())],
601 [nullid], list(self.heads()))
600 [nullid], list(self.heads()))
602 if heads is None:
601 if heads is None:
603 # All nodes are ancestors, so the latest ancestor is the last
602 # All nodes are ancestors, so the latest ancestor is the last
604 # node.
603 # node.
605 highestrev = self.count() - 1
604 highestrev = self.count() - 1
606 # Set ancestors to None to signal that every node is an ancestor.
605 # Set ancestors to None to signal that every node is an ancestor.
607 ancestors = None
606 ancestors = None
608 # Set heads to an empty dictionary for later discovery of heads
607 # Set heads to an empty dictionary for later discovery of heads
609 heads = {}
608 heads = {}
610 else:
609 else:
611 heads = list(heads)
610 heads = list(heads)
612 if not heads:
611 if not heads:
613 return nonodes
612 return nonodes
614 ancestors = {}
613 ancestors = {}
615 # Turn heads into a dictionary so we can remove 'fake' heads.
614 # Turn heads into a dictionary so we can remove 'fake' heads.
616 # Also, later we will be using it to filter out the heads we can't
615 # Also, later we will be using it to filter out the heads we can't
617 # find from roots.
616 # find from roots.
618 heads = dict.fromkeys(heads, 0)
617 heads = dict.fromkeys(heads, 0)
619 # Start at the top and keep marking parents until we're done.
618 # Start at the top and keep marking parents until we're done.
620 nodestotag = heads.keys()
619 nodestotag = heads.keys()
621 # Remember where the top was so we can use it as a limit later.
620 # Remember where the top was so we can use it as a limit later.
622 highestrev = max([self.rev(n) for n in nodestotag])
621 highestrev = max([self.rev(n) for n in nodestotag])
623 while nodestotag:
622 while nodestotag:
624 # grab a node to tag
623 # grab a node to tag
625 n = nodestotag.pop()
624 n = nodestotag.pop()
626 # Never tag nullid
625 # Never tag nullid
627 if n == nullid:
626 if n == nullid:
628 continue
627 continue
629 # A node's revision number represents its place in a
628 # A node's revision number represents its place in a
630 # topologically sorted list of nodes.
629 # topologically sorted list of nodes.
631 r = self.rev(n)
630 r = self.rev(n)
632 if r >= lowestrev:
631 if r >= lowestrev:
633 if n not in ancestors:
632 if n not in ancestors:
634 # If we are possibly a descendent of one of the roots
633 # If we are possibly a descendent of one of the roots
635 # and we haven't already been marked as an ancestor
634 # and we haven't already been marked as an ancestor
636 ancestors[n] = 1 # Mark as ancestor
635 ancestors[n] = 1 # Mark as ancestor
637 # Add non-nullid parents to list of nodes to tag.
636 # Add non-nullid parents to list of nodes to tag.
638 nodestotag.extend([p for p in self.parents(n) if
637 nodestotag.extend([p for p in self.parents(n) if
639 p != nullid])
638 p != nullid])
640 elif n in heads: # We've seen it before, is it a fake head?
639 elif n in heads: # We've seen it before, is it a fake head?
641 # So it is, real heads should not be the ancestors of
640 # So it is, real heads should not be the ancestors of
642 # any other heads.
641 # any other heads.
643 heads.pop(n)
642 heads.pop(n)
644 if not ancestors:
643 if not ancestors:
645 return nonodes
644 return nonodes
646 # Now that we have our set of ancestors, we want to remove any
645 # Now that we have our set of ancestors, we want to remove any
647 # roots that are not ancestors.
646 # roots that are not ancestors.
648
647
649 # If one of the roots was nullid, everything is included anyway.
648 # If one of the roots was nullid, everything is included anyway.
650 if lowestrev > nullrev:
649 if lowestrev > nullrev:
651 # But, since we weren't, let's recompute the lowest rev to not
650 # But, since we weren't, let's recompute the lowest rev to not
652 # include roots that aren't ancestors.
651 # include roots that aren't ancestors.
653
652
654 # Filter out roots that aren't ancestors of heads
653 # Filter out roots that aren't ancestors of heads
655 roots = [n for n in roots if n in ancestors]
654 roots = [n for n in roots if n in ancestors]
656 # Recompute the lowest revision
655 # Recompute the lowest revision
657 if roots:
656 if roots:
658 lowestrev = min([self.rev(n) for n in roots])
657 lowestrev = min([self.rev(n) for n in roots])
659 else:
658 else:
660 # No more roots? Return empty list
659 # No more roots? Return empty list
661 return nonodes
660 return nonodes
662 else:
661 else:
663 # We are descending from nullid, and don't need to care about
662 # We are descending from nullid, and don't need to care about
664 # any other roots.
663 # any other roots.
665 lowestrev = nullrev
664 lowestrev = nullrev
666 roots = [nullid]
665 roots = [nullid]
667 # Transform our roots list into a 'set' (i.e. a dictionary where the
666 # Transform our roots list into a 'set' (i.e. a dictionary where the
668 # values don't matter.
667 # values don't matter.
669 descendents = dict.fromkeys(roots, 1)
668 descendents = dict.fromkeys(roots, 1)
670 # Also, keep the original roots so we can filter out roots that aren't
669 # Also, keep the original roots so we can filter out roots that aren't
671 # 'real' roots (i.e. are descended from other roots).
670 # 'real' roots (i.e. are descended from other roots).
672 roots = descendents.copy()
671 roots = descendents.copy()
673 # Our topologically sorted list of output nodes.
672 # Our topologically sorted list of output nodes.
674 orderedout = []
673 orderedout = []
675 # Don't start at nullid since we don't want nullid in our output list,
674 # Don't start at nullid since we don't want nullid in our output list,
676 # and if nullid shows up in descedents, empty parents will look like
675 # and if nullid shows up in descedents, empty parents will look like
677 # they're descendents.
676 # they're descendents.
678 for r in xrange(max(lowestrev, 0), highestrev + 1):
677 for r in xrange(max(lowestrev, 0), highestrev + 1):
679 n = self.node(r)
678 n = self.node(r)
680 isdescendent = False
679 isdescendent = False
681 if lowestrev == nullrev: # Everybody is a descendent of nullid
680 if lowestrev == nullrev: # Everybody is a descendent of nullid
682 isdescendent = True
681 isdescendent = True
683 elif n in descendents:
682 elif n in descendents:
684 # n is already a descendent
683 # n is already a descendent
685 isdescendent = True
684 isdescendent = True
686 # This check only needs to be done here because all the roots
685 # This check only needs to be done here because all the roots
687 # will start being marked is descendents before the loop.
686 # will start being marked is descendents before the loop.
688 if n in roots:
687 if n in roots:
689 # If n was a root, check if it's a 'real' root.
688 # If n was a root, check if it's a 'real' root.
690 p = tuple(self.parents(n))
689 p = tuple(self.parents(n))
691 # If any of its parents are descendents, it's not a root.
690 # If any of its parents are descendents, it's not a root.
692 if (p[0] in descendents) or (p[1] in descendents):
691 if (p[0] in descendents) or (p[1] in descendents):
693 roots.pop(n)
692 roots.pop(n)
694 else:
693 else:
695 p = tuple(self.parents(n))
694 p = tuple(self.parents(n))
696 # A node is a descendent if either of its parents are
695 # A node is a descendent if either of its parents are
697 # descendents. (We seeded the dependents list with the roots
696 # descendents. (We seeded the dependents list with the roots
698 # up there, remember?)
697 # up there, remember?)
699 if (p[0] in descendents) or (p[1] in descendents):
698 if (p[0] in descendents) or (p[1] in descendents):
700 descendents[n] = 1
699 descendents[n] = 1
701 isdescendent = True
700 isdescendent = True
702 if isdescendent and ((ancestors is None) or (n in ancestors)):
701 if isdescendent and ((ancestors is None) or (n in ancestors)):
703 # Only include nodes that are both descendents and ancestors.
702 # Only include nodes that are both descendents and ancestors.
704 orderedout.append(n)
703 orderedout.append(n)
705 if (ancestors is not None) and (n in heads):
704 if (ancestors is not None) and (n in heads):
706 # We're trying to figure out which heads are reachable
705 # We're trying to figure out which heads are reachable
707 # from roots.
706 # from roots.
708 # Mark this head as having been reached
707 # Mark this head as having been reached
709 heads[n] = 1
708 heads[n] = 1
710 elif ancestors is None:
709 elif ancestors is None:
711 # Otherwise, we're trying to discover the heads.
710 # Otherwise, we're trying to discover the heads.
712 # Assume this is a head because if it isn't, the next step
711 # Assume this is a head because if it isn't, the next step
713 # will eventually remove it.
712 # will eventually remove it.
714 heads[n] = 1
713 heads[n] = 1
715 # But, obviously its parents aren't.
714 # But, obviously its parents aren't.
716 for p in self.parents(n):
715 for p in self.parents(n):
717 heads.pop(p, None)
716 heads.pop(p, None)
718 heads = [n for n in heads.iterkeys() if heads[n] != 0]
717 heads = [n for n in heads.iterkeys() if heads[n] != 0]
719 roots = roots.keys()
718 roots = roots.keys()
720 assert orderedout
719 assert orderedout
721 assert roots
720 assert roots
722 assert heads
721 assert heads
723 return (orderedout, roots, heads)
722 return (orderedout, roots, heads)
724
723
725 def heads(self, start=None, stop=None):
724 def heads(self, start=None, stop=None):
726 """return the list of all nodes that have no children
725 """return the list of all nodes that have no children
727
726
728 if start is specified, only heads that are descendants of
727 if start is specified, only heads that are descendants of
729 start will be returned
728 start will be returned
730 if stop is specified, it will consider all the revs from stop
729 if stop is specified, it will consider all the revs from stop
731 as if they had no children
730 as if they had no children
732 """
731 """
733 if start is None:
732 if start is None:
734 start = nullid
733 start = nullid
735 if stop is None:
734 if stop is None:
736 stop = []
735 stop = []
737 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
736 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
738 startrev = self.rev(start)
737 startrev = self.rev(start)
739 reachable = {startrev: 1}
738 reachable = {startrev: 1}
740 heads = {startrev: 1}
739 heads = {startrev: 1}
741
740
742 parentrevs = self.parentrevs
741 parentrevs = self.parentrevs
743 for r in xrange(startrev + 1, self.count()):
742 for r in xrange(startrev + 1, self.count()):
744 for p in parentrevs(r):
743 for p in parentrevs(r):
745 if p in reachable:
744 if p in reachable:
746 if r not in stoprevs:
745 if r not in stoprevs:
747 reachable[r] = 1
746 reachable[r] = 1
748 heads[r] = 1
747 heads[r] = 1
749 if p in heads and p not in stoprevs:
748 if p in heads and p not in stoprevs:
750 del heads[p]
749 del heads[p]
751
750
752 return [self.node(r) for r in heads]
751 return [self.node(r) for r in heads]
753
752
754 def children(self, node):
753 def children(self, node):
755 """find the children of a given node"""
754 """find the children of a given node"""
756 c = []
755 c = []
757 p = self.rev(node)
756 p = self.rev(node)
758 for r in range(p + 1, self.count()):
757 for r in range(p + 1, self.count()):
759 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
758 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
760 if prevs:
759 if prevs:
761 for pr in prevs:
760 for pr in prevs:
762 if pr == p:
761 if pr == p:
763 c.append(self.node(r))
762 c.append(self.node(r))
764 elif p == nullrev:
763 elif p == nullrev:
765 c.append(self.node(r))
764 c.append(self.node(r))
766 return c
765 return c
767
766
768 def _match(self, id):
767 def _match(self, id):
769 if isinstance(id, (long, int)):
768 if isinstance(id, (long, int)):
770 # rev
769 # rev
771 return self.node(id)
770 return self.node(id)
772 if len(id) == 20:
771 if len(id) == 20:
773 # possibly a binary node
772 # possibly a binary node
774 # odds of a binary node being all hex in ASCII are 1 in 10**25
773 # odds of a binary node being all hex in ASCII are 1 in 10**25
775 try:
774 try:
776 node = id
775 node = id
777 r = self.rev(node) # quick search the index
776 r = self.rev(node) # quick search the index
778 return node
777 return node
779 except LookupError:
778 except LookupError:
780 pass # may be partial hex id
779 pass # may be partial hex id
781 try:
780 try:
782 # str(rev)
781 # str(rev)
783 rev = int(id)
782 rev = int(id)
784 if str(rev) != id:
783 if str(rev) != id:
785 raise ValueError
784 raise ValueError
786 if rev < 0:
785 if rev < 0:
787 rev = self.count() + rev
786 rev = self.count() + rev
788 if rev < 0 or rev >= self.count():
787 if rev < 0 or rev >= self.count():
789 raise ValueError
788 raise ValueError
790 return self.node(rev)
789 return self.node(rev)
791 except (ValueError, OverflowError):
790 except (ValueError, OverflowError):
792 pass
791 pass
793 if len(id) == 40:
792 if len(id) == 40:
794 try:
793 try:
795 # a full hex nodeid?
794 # a full hex nodeid?
796 node = bin(id)
795 node = bin(id)
797 r = self.rev(node)
796 r = self.rev(node)
798 return node
797 return node
799 except TypeError:
798 except TypeError:
800 pass
799 pass
801
800
802 def _partialmatch(self, id):
801 def _partialmatch(self, id):
803 if len(id) < 40:
802 if len(id) < 40:
804 try:
803 try:
805 # hex(node)[:...]
804 # hex(node)[:...]
806 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
805 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
807 node = None
806 node = None
808 for n in self.nodemap:
807 for n in self.nodemap:
809 if n.startswith(bin_id) and hex(n).startswith(id):
808 if n.startswith(bin_id) and hex(n).startswith(id):
810 if node is not None:
809 if node is not None:
811 raise LookupError(_("Ambiguous identifier"))
810 raise LookupError(_("Ambiguous identifier"))
812 node = n
811 node = n
813 if node is not None:
812 if node is not None:
814 return node
813 return node
815 except TypeError:
814 except TypeError:
816 pass
815 pass
817
816
818 def lookup(self, id):
817 def lookup(self, id):
819 """locate a node based on:
818 """locate a node based on:
820 - revision number or str(revision number)
819 - revision number or str(revision number)
821 - nodeid or subset of hex nodeid
820 - nodeid or subset of hex nodeid
822 """
821 """
823 n = self._match(id)
822 n = self._match(id)
824 if n is not None:
823 if n is not None:
825 return n
824 return n
826 n = self._partialmatch(id)
825 n = self._partialmatch(id)
827 if n:
826 if n:
828 return n
827 return n
829
828
830 raise LookupError(_("No match found"))
829 raise LookupError(_("No match found"))
831
830
832 def cmp(self, node, text):
831 def cmp(self, node, text):
833 """compare text with a given file revision"""
832 """compare text with a given file revision"""
834 p1, p2 = self.parents(node)
833 p1, p2 = self.parents(node)
835 return hash(text, p1, p2) != node
834 return hash(text, p1, p2) != node
836
835
837 def diff(self, a, b):
836 def diff(self, a, b):
838 """return a delta between two revisions"""
837 """return a delta between two revisions"""
839 return mdiff.textdiff(a, b)
838 return mdiff.textdiff(a, b)
840
839
841 def patches(self, t, pl):
840 def patches(self, t, pl):
842 """apply a list of patches to a string"""
841 """apply a list of patches to a string"""
843 return mdiff.patches(t, pl)
842 return mdiff.patches(t, pl)
844
843
845 def chunk(self, rev, df=None, cachelen=4096):
844 def chunk(self, rev, df=None, cachelen=4096):
846 start, length = self.start(rev), self.length(rev)
845 start, length = self.start(rev), self.length(rev)
847 inline = self._inline
846 inline = self._inline
848 if inline:
847 if inline:
849 start += (rev + 1) * self._io.size
848 start += (rev + 1) * self._io.size
850 end = start + length
849 end = start + length
851 def loadcache(df):
850 def loadcache(df):
852 cache_length = max(cachelen, length) # 4k
851 cache_length = max(cachelen, length) # 4k
853 if not df:
852 if not df:
854 if inline:
853 if inline:
855 df = self.opener(self.indexfile)
854 df = self.opener(self.indexfile)
856 else:
855 else:
857 df = self.opener(self.datafile)
856 df = self.opener(self.datafile)
858 df.seek(start)
857 df.seek(start)
859 self._chunkcache = (start, df.read(cache_length))
858 self._chunkcache = (start, df.read(cache_length))
860
859
861 if not self._chunkcache:
860 if not self._chunkcache:
862 loadcache(df)
861 loadcache(df)
863
862
864 cache_start = self._chunkcache[0]
863 cache_start = self._chunkcache[0]
865 cache_end = cache_start + len(self._chunkcache[1])
864 cache_end = cache_start + len(self._chunkcache[1])
866 if start >= cache_start and end <= cache_end:
865 if start >= cache_start and end <= cache_end:
867 # it is cached
866 # it is cached
868 offset = start - cache_start
867 offset = start - cache_start
869 else:
868 else:
870 loadcache(df)
869 loadcache(df)
871 offset = 0
870 offset = 0
872
871
873 return decompress(self._chunkcache[1][offset:offset + length])
872 return decompress(self._chunkcache[1][offset:offset + length])
874
873
875 def delta(self, node):
874 def delta(self, node):
876 """return or calculate a delta between a node and its predecessor"""
875 """return or calculate a delta between a node and its predecessor"""
877 r = self.rev(node)
876 r = self.rev(node)
878 return self.revdiff(r - 1, r)
877 return self.revdiff(r - 1, r)
879
878
880 def revdiff(self, rev1, rev2):
879 def revdiff(self, rev1, rev2):
881 """return or calculate a delta between two revisions"""
880 """return or calculate a delta between two revisions"""
882 b1 = self.base(rev1)
881 b1 = self.base(rev1)
883 b2 = self.base(rev2)
882 b2 = self.base(rev2)
884 if b1 == b2 and rev1 + 1 == rev2:
883 if b1 == b2 and rev1 + 1 == rev2:
885 return self.chunk(rev2)
884 return self.chunk(rev2)
886 else:
885 else:
887 return self.diff(self.revision(self.node(rev1)),
886 return self.diff(self.revision(self.node(rev1)),
888 self.revision(self.node(rev2)))
887 self.revision(self.node(rev2)))
889
888
890 def revision(self, node):
889 def revision(self, node):
891 """return an uncompressed revision of a given"""
890 """return an uncompressed revision of a given"""
892 if node == nullid:
891 if node == nullid:
893 return ""
892 return ""
894 if self.cache and self.cache[0] == node:
893 if self._cache and self._cache[0] == node:
895 return self.cache[2]
894 return self._cache[2]
896
895
897 # look up what we need to read
896 # look up what we need to read
898 text = None
897 text = None
899 rev = self.rev(node)
898 rev = self.rev(node)
900 base = self.base(rev)
899 base = self.base(rev)
901
900
902 if self._inline:
901 if self._inline:
903 # we probably have the whole chunk cached
902 # we probably have the whole chunk cached
904 df = None
903 df = None
905 else:
904 else:
906 df = self.opener(self.datafile)
905 df = self.opener(self.datafile)
907
906
908 # do we have useful data cached?
907 # do we have useful data cached?
909 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
908 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
910 base = self.cache[1]
909 base = self._cache[1]
911 text = self.cache[2]
910 text = self._cache[2]
912 self._loadindex(base, rev + 1)
911 self._loadindex(base, rev + 1)
913 else:
912 else:
914 self._loadindex(base, rev + 1)
913 self._loadindex(base, rev + 1)
915 text = self.chunk(base, df=df)
914 text = self.chunk(base, df=df)
916
915
917 bins = []
916 bins = []
918 for r in xrange(base + 1, rev + 1):
917 for r in xrange(base + 1, rev + 1):
919 bins.append(self.chunk(r, df=df))
918 bins.append(self.chunk(r, df=df))
920
919
921 text = self.patches(text, bins)
920 text = self.patches(text, bins)
922
921
923 p1, p2 = self.parents(node)
922 p1, p2 = self.parents(node)
924 if node != hash(text, p1, p2):
923 if node != hash(text, p1, p2):
925 raise RevlogError(_("integrity check failed on %s:%d")
924 raise RevlogError(_("integrity check failed on %s:%d")
926 % (self.datafile, rev))
925 % (self.datafile, rev))
927
926
928 self.cache = (node, rev, text)
927 self._cache = (node, rev, text)
929 return text
928 return text
930
929
931 def checkinlinesize(self, tr, fp=None):
930 def checkinlinesize(self, tr, fp=None):
932 if not self._inline:
931 if not self._inline:
933 return
932 return
934 if not fp:
933 if not fp:
935 fp = self.opener(self.indexfile, 'r')
934 fp = self.opener(self.indexfile, 'r')
936 fp.seek(0, 2)
935 fp.seek(0, 2)
937 size = fp.tell()
936 size = fp.tell()
938 if size < 131072:
937 if size < 131072:
939 return
938 return
940 trinfo = tr.find(self.indexfile)
939 trinfo = tr.find(self.indexfile)
941 if trinfo == None:
940 if trinfo == None:
942 raise RevlogError(_("%s not found in the transaction")
941 raise RevlogError(_("%s not found in the transaction")
943 % self.indexfile)
942 % self.indexfile)
944
943
945 trindex = trinfo[2]
944 trindex = trinfo[2]
946 dataoff = self.start(trindex)
945 dataoff = self.start(trindex)
947
946
948 tr.add(self.datafile, dataoff)
947 tr.add(self.datafile, dataoff)
949 df = self.opener(self.datafile, 'w')
948 df = self.opener(self.datafile, 'w')
950 calc = self._io.size
949 calc = self._io.size
951 for r in xrange(self.count()):
950 for r in xrange(self.count()):
952 start = self.start(r) + (r + 1) * calc
951 start = self.start(r) + (r + 1) * calc
953 length = self.length(r)
952 length = self.length(r)
954 fp.seek(start)
953 fp.seek(start)
955 d = fp.read(length)
954 d = fp.read(length)
956 df.write(d)
955 df.write(d)
957 fp.close()
956 fp.close()
958 df.close()
957 df.close()
959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
958 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 self.version &= ~(REVLOGNGINLINEDATA)
959 self.version &= ~(REVLOGNGINLINEDATA)
961 self._inline = False
960 self._inline = False
962 if self.count():
961 if self.count():
963 x = self.index[0]
962 x = self.index[0]
964 e = struct.pack(indexformatng, *x)[4:]
963 e = struct.pack(indexformatng, *x)[4:]
965 l = struct.pack(versionformat, self.version)
964 l = struct.pack(versionformat, self.version)
966 fp.write(l)
965 fp.write(l)
967 fp.write(e)
966 fp.write(e)
968
967
969 for i in xrange(1, self.count()):
968 for i in xrange(1, self.count()):
970 x = self.index[i]
969 x = self.index[i]
971 e = struct.pack(indexformatng, *x)
970 e = struct.pack(indexformatng, *x)
972 fp.write(e)
971 fp.write(e)
973
972
974 # if we don't call rename, the temp file will never replace the
973 # if we don't call rename, the temp file will never replace the
975 # real index
974 # real index
976 fp.rename()
975 fp.rename()
977
976
978 tr.replace(self.indexfile, trindex * calc)
977 tr.replace(self.indexfile, trindex * calc)
979 self._chunkcache = None
978 self._chunkcache = None
980
979
981 def addrevision(self, text, transaction, link, p1, p2, d=None):
980 def addrevision(self, text, transaction, link, p1, p2, d=None):
982 """add a revision to the log
981 """add a revision to the log
983
982
984 text - the revision data to add
983 text - the revision data to add
985 transaction - the transaction object used for rollback
984 transaction - the transaction object used for rollback
986 link - the linkrev data to add
985 link - the linkrev data to add
987 p1, p2 - the parent nodeids of the revision
986 p1, p2 - the parent nodeids of the revision
988 d - an optional precomputed delta
987 d - an optional precomputed delta
989 """
988 """
990 dfh = None
989 dfh = None
991 if not self._inline:
990 if not self._inline:
992 dfh = self.opener(self.datafile, "a")
991 dfh = self.opener(self.datafile, "a")
993 ifh = self.opener(self.indexfile, "a+")
992 ifh = self.opener(self.indexfile, "a+")
994 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
993 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
995
994
996 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
995 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
997 node = hash(text, p1, p2)
996 node = hash(text, p1, p2)
998 if node in self.nodemap:
997 if node in self.nodemap:
999 return node
998 return node
1000
999
1001 curr = self.count()
1000 curr = self.count()
1002 prev = curr - 1
1001 prev = curr - 1
1003 base = self.base(prev)
1002 base = self.base(prev)
1004 offset = self.end(prev)
1003 offset = self.end(prev)
1005
1004
1006 if curr:
1005 if curr:
1007 if not d:
1006 if not d:
1008 ptext = self.revision(self.node(prev))
1007 ptext = self.revision(self.node(prev))
1009 d = self.diff(ptext, text)
1008 d = self.diff(ptext, text)
1010 data = compress(d)
1009 data = compress(d)
1011 l = len(data[1]) + len(data[0])
1010 l = len(data[1]) + len(data[0])
1012 dist = l + offset - self.start(base)
1011 dist = l + offset - self.start(base)
1013
1012
1014 # full versions are inserted when the needed deltas
1013 # full versions are inserted when the needed deltas
1015 # become comparable to the uncompressed text
1014 # become comparable to the uncompressed text
1016 if not curr or dist > len(text) * 2:
1015 if not curr or dist > len(text) * 2:
1017 data = compress(text)
1016 data = compress(text)
1018 l = len(data[1]) + len(data[0])
1017 l = len(data[1]) + len(data[0])
1019 base = curr
1018 base = curr
1020
1019
1021 e = (offset_type(offset, 0), l, len(text),
1020 e = (offset_type(offset, 0), l, len(text),
1022 base, link, self.rev(p1), self.rev(p2), node)
1021 base, link, self.rev(p1), self.rev(p2), node)
1023 self.index.insert(-1, e)
1022 self.index.insert(-1, e)
1024 self.nodemap[node] = curr
1023 self.nodemap[node] = curr
1025
1024
1026 if self.version == REVLOGV0:
1025 if self.version == REVLOGV0:
1027 e = (offset, l, base, link, p1, p2, node)
1026 e = (offset, l, base, link, p1, p2, node)
1028 entry = struct.pack(indexformatv0, *e)
1027 entry = struct.pack(indexformatv0, *e)
1029 else:
1028 else:
1030 entry = struct.pack(indexformatng, *e)
1029 entry = struct.pack(indexformatng, *e)
1031 if not curr:
1030 if not curr:
1032 entry = struct.pack(versionformat, self.version) + entry[4:]
1031 entry = struct.pack(versionformat, self.version) + entry[4:]
1033
1032
1034 if not self._inline:
1033 if not self._inline:
1035 transaction.add(self.datafile, offset)
1034 transaction.add(self.datafile, offset)
1036 transaction.add(self.indexfile, curr * len(entry))
1035 transaction.add(self.indexfile, curr * len(entry))
1037 if data[0]:
1036 if data[0]:
1038 dfh.write(data[0])
1037 dfh.write(data[0])
1039 dfh.write(data[1])
1038 dfh.write(data[1])
1040 dfh.flush()
1039 dfh.flush()
1041 ifh.write(entry)
1040 ifh.write(entry)
1042 else:
1041 else:
1043 ifh.seek(0, 2)
1042 ifh.seek(0, 2)
1044 transaction.add(self.indexfile, ifh.tell(), prev)
1043 transaction.add(self.indexfile, ifh.tell(), prev)
1045 ifh.write(entry)
1044 ifh.write(entry)
1046 ifh.write(data[0])
1045 ifh.write(data[0])
1047 ifh.write(data[1])
1046 ifh.write(data[1])
1048 self.checkinlinesize(transaction, ifh)
1047 self.checkinlinesize(transaction, ifh)
1049
1048
1050 self.cache = (node, curr, text)
1049 self._cache = (node, curr, text)
1051 return node
1050 return node
1052
1051
1053 def ancestor(self, a, b):
1052 def ancestor(self, a, b):
1054 """calculate the least common ancestor of nodes a and b"""
1053 """calculate the least common ancestor of nodes a and b"""
1055
1054
1056 def parents(rev):
1055 def parents(rev):
1057 return [p for p in self.parentrevs(rev) if p != nullrev]
1056 return [p for p in self.parentrevs(rev) if p != nullrev]
1058
1057
1059 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1058 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1060 if c is None:
1059 if c is None:
1061 return nullid
1060 return nullid
1062
1061
1063 return self.node(c)
1062 return self.node(c)
1064
1063
1065 def group(self, nodelist, lookup, infocollect=None):
1064 def group(self, nodelist, lookup, infocollect=None):
1066 """calculate a delta group
1065 """calculate a delta group
1067
1066
1068 Given a list of changeset revs, return a set of deltas and
1067 Given a list of changeset revs, return a set of deltas and
1069 metadata corresponding to nodes. the first delta is
1068 metadata corresponding to nodes. the first delta is
1070 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1069 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1071 have this parent as it has all history before these
1070 have this parent as it has all history before these
1072 changesets. parent is parent[0]
1071 changesets. parent is parent[0]
1073 """
1072 """
1074 revs = [self.rev(n) for n in nodelist]
1073 revs = [self.rev(n) for n in nodelist]
1075
1074
1076 # if we don't have any revisions touched by these changesets, bail
1075 # if we don't have any revisions touched by these changesets, bail
1077 if not revs:
1076 if not revs:
1078 yield changegroup.closechunk()
1077 yield changegroup.closechunk()
1079 return
1078 return
1080
1079
1081 # add the parent of the first rev
1080 # add the parent of the first rev
1082 p = self.parents(self.node(revs[0]))[0]
1081 p = self.parents(self.node(revs[0]))[0]
1083 revs.insert(0, self.rev(p))
1082 revs.insert(0, self.rev(p))
1084
1083
1085 # build deltas
1084 # build deltas
1086 for d in xrange(0, len(revs) - 1):
1085 for d in xrange(0, len(revs) - 1):
1087 a, b = revs[d], revs[d + 1]
1086 a, b = revs[d], revs[d + 1]
1088 nb = self.node(b)
1087 nb = self.node(b)
1089
1088
1090 if infocollect is not None:
1089 if infocollect is not None:
1091 infocollect(nb)
1090 infocollect(nb)
1092
1091
1093 d = self.revdiff(a, b)
1092 d = self.revdiff(a, b)
1094 p = self.parents(nb)
1093 p = self.parents(nb)
1095 meta = nb + p[0] + p[1] + lookup(nb)
1094 meta = nb + p[0] + p[1] + lookup(nb)
1096 yield changegroup.genchunk("%s%s" % (meta, d))
1095 yield changegroup.genchunk("%s%s" % (meta, d))
1097
1096
1098 yield changegroup.closechunk()
1097 yield changegroup.closechunk()
1099
1098
1100 def addgroup(self, revs, linkmapper, transaction, unique=0):
1099 def addgroup(self, revs, linkmapper, transaction, unique=0):
1101 """
1100 """
1102 add a delta group
1101 add a delta group
1103
1102
1104 given a set of deltas, add them to the revision log. the
1103 given a set of deltas, add them to the revision log. the
1105 first delta is against its parent, which should be in our
1104 first delta is against its parent, which should be in our
1106 log, the rest are against the previous delta.
1105 log, the rest are against the previous delta.
1107 """
1106 """
1108
1107
1109 #track the base of the current delta log
1108 #track the base of the current delta log
1110 r = self.count()
1109 r = self.count()
1111 t = r - 1
1110 t = r - 1
1112 node = None
1111 node = None
1113
1112
1114 base = prev = nullrev
1113 base = prev = nullrev
1115 start = end = textlen = 0
1114 start = end = textlen = 0
1116 if r:
1115 if r:
1117 end = self.end(t)
1116 end = self.end(t)
1118
1117
1119 ifh = self.opener(self.indexfile, "a+")
1118 ifh = self.opener(self.indexfile, "a+")
1120 ifh.seek(0, 2)
1119 ifh.seek(0, 2)
1121 transaction.add(self.indexfile, ifh.tell(), self.count())
1120 transaction.add(self.indexfile, ifh.tell(), self.count())
1122 if self._inline:
1121 if self._inline:
1123 dfh = None
1122 dfh = None
1124 else:
1123 else:
1125 transaction.add(self.datafile, end)
1124 transaction.add(self.datafile, end)
1126 dfh = self.opener(self.datafile, "a")
1125 dfh = self.opener(self.datafile, "a")
1127
1126
1128 # loop through our set of deltas
1127 # loop through our set of deltas
1129 chain = None
1128 chain = None
1130 for chunk in revs:
1129 for chunk in revs:
1131 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1130 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1132 link = linkmapper(cs)
1131 link = linkmapper(cs)
1133 if node in self.nodemap:
1132 if node in self.nodemap:
1134 # this can happen if two branches make the same change
1133 # this can happen if two branches make the same change
1135 # if unique:
1134 # if unique:
1136 # raise RevlogError(_("already have %s") % hex(node[:4]))
1135 # raise RevlogError(_("already have %s") % hex(node[:4]))
1137 chain = node
1136 chain = node
1138 continue
1137 continue
1139 delta = chunk[80:]
1138 delta = chunk[80:]
1140
1139
1141 for p in (p1, p2):
1140 for p in (p1, p2):
1142 if not p in self.nodemap:
1141 if not p in self.nodemap:
1143 raise LookupError(_("unknown parent %s") % short(p))
1142 raise LookupError(_("unknown parent %s") % short(p))
1144
1143
1145 if not chain:
1144 if not chain:
1146 # retrieve the parent revision of the delta chain
1145 # retrieve the parent revision of the delta chain
1147 chain = p1
1146 chain = p1
1148 if not chain in self.nodemap:
1147 if not chain in self.nodemap:
1149 raise LookupError(_("unknown base %s") % short(chain[:4]))
1148 raise LookupError(_("unknown base %s") % short(chain[:4]))
1150
1149
1151 # full versions are inserted when the needed deltas become
1150 # full versions are inserted when the needed deltas become
1152 # comparable to the uncompressed text or when the previous
1151 # comparable to the uncompressed text or when the previous
1153 # version is not the one we have a delta against. We use
1152 # version is not the one we have a delta against. We use
1154 # the size of the previous full rev as a proxy for the
1153 # the size of the previous full rev as a proxy for the
1155 # current size.
1154 # current size.
1156
1155
1157 if chain == prev:
1156 if chain == prev:
1158 tempd = compress(delta)
1157 tempd = compress(delta)
1159 cdelta = tempd[0] + tempd[1]
1158 cdelta = tempd[0] + tempd[1]
1160 textlen = mdiff.patchedsize(textlen, delta)
1159 textlen = mdiff.patchedsize(textlen, delta)
1161
1160
1162 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1161 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1163 # flush our writes here so we can read it in revision
1162 # flush our writes here so we can read it in revision
1164 if dfh:
1163 if dfh:
1165 dfh.flush()
1164 dfh.flush()
1166 ifh.flush()
1165 ifh.flush()
1167 text = self.revision(chain)
1166 text = self.revision(chain)
1168 text = self.patches(text, [delta])
1167 text = self.patches(text, [delta])
1169 chk = self._addrevision(text, transaction, link, p1, p2, None,
1168 chk = self._addrevision(text, transaction, link, p1, p2, None,
1170 ifh, dfh)
1169 ifh, dfh)
1171 if not dfh and not self._inline:
1170 if not dfh and not self._inline:
1172 # addrevision switched from inline to conventional
1171 # addrevision switched from inline to conventional
1173 # reopen the index
1172 # reopen the index
1174 dfh = self.opener(self.datafile, "a")
1173 dfh = self.opener(self.datafile, "a")
1175 ifh = self.opener(self.indexfile, "a")
1174 ifh = self.opener(self.indexfile, "a")
1176 if chk != node:
1175 if chk != node:
1177 raise RevlogError(_("consistency error adding group"))
1176 raise RevlogError(_("consistency error adding group"))
1178 textlen = len(text)
1177 textlen = len(text)
1179 else:
1178 else:
1180 e = (offset_type(end, 0), len(cdelta), textlen, base,
1179 e = (offset_type(end, 0), len(cdelta), textlen, base,
1181 link, self.rev(p1), self.rev(p2), node)
1180 link, self.rev(p1), self.rev(p2), node)
1182 self.index.insert(-1, e)
1181 self.index.insert(-1, e)
1183 self.nodemap[node] = r
1182 self.nodemap[node] = r
1184 if self._inline:
1183 if self._inline:
1185 ifh.write(struct.pack(indexformatng, *e))
1184 ifh.write(struct.pack(indexformatng, *e))
1186 ifh.write(cdelta)
1185 ifh.write(cdelta)
1187 self.checkinlinesize(transaction, ifh)
1186 self.checkinlinesize(transaction, ifh)
1188 if not self._inline:
1187 if not self._inline:
1189 dfh = self.opener(self.datafile, "a")
1188 dfh = self.opener(self.datafile, "a")
1190 ifh = self.opener(self.indexfile, "a")
1189 ifh = self.opener(self.indexfile, "a")
1191 else:
1190 else:
1192 if self.version == REVLOGV0:
1191 if self.version == REVLOGV0:
1193 e = (end, len(cdelta), base, link, p1, p2, node)
1192 e = (end, len(cdelta), base, link, p1, p2, node)
1194 entry = struct.pack(indexformatv0, *e)
1193 entry = struct.pack(indexformatv0, *e)
1195 else:
1194 else:
1196 entry = struct.pack(indexformatng, *e)
1195 entry = struct.pack(indexformatng, *e)
1197 dfh.write(cdelta)
1196 dfh.write(cdelta)
1198 ifh.write(entry)
1197 ifh.write(entry)
1199
1198
1200 t, r, chain, prev = r, r + 1, node, node
1199 t, r, chain, prev = r, r + 1, node, node
1201 base = self.base(t)
1200 base = self.base(t)
1202 start = self.start(base)
1201 start = self.start(base)
1203 end = self.end(t)
1202 end = self.end(t)
1204
1203
1205 return node
1204 return node
1206
1205
1207 def strip(self, rev, minlink):
1206 def strip(self, rev, minlink):
1208 if self.count() == 0 or rev >= self.count():
1207 if self.count() == 0 or rev >= self.count():
1209 return
1208 return
1210
1209
1211 if isinstance(self.index, lazyindex):
1210 if isinstance(self.index, lazyindex):
1212 self._loadindexmap()
1211 self._loadindexmap()
1213
1212
1214 # When stripping away a revision, we need to make sure it
1213 # When stripping away a revision, we need to make sure it
1215 # does not actually belong to an older changeset.
1214 # does not actually belong to an older changeset.
1216 # The minlink parameter defines the oldest revision
1215 # The minlink parameter defines the oldest revision
1217 # we're allowed to strip away.
1216 # we're allowed to strip away.
1218 while minlink > self.index[rev][4]:
1217 while minlink > self.index[rev][4]:
1219 rev += 1
1218 rev += 1
1220 if rev >= self.count():
1219 if rev >= self.count():
1221 return
1220 return
1222
1221
1223 # first truncate the files on disk
1222 # first truncate the files on disk
1224 end = self.start(rev)
1223 end = self.start(rev)
1225 if not self._inline:
1224 if not self._inline:
1226 df = self.opener(self.datafile, "a")
1225 df = self.opener(self.datafile, "a")
1227 df.truncate(end)
1226 df.truncate(end)
1228 end = rev * self._io.size
1227 end = rev * self._io.size
1229 else:
1228 else:
1230 end += rev * self._io.size
1229 end += rev * self._io.size
1231
1230
1232 indexf = self.opener(self.indexfile, "a")
1231 indexf = self.opener(self.indexfile, "a")
1233 indexf.truncate(end)
1232 indexf.truncate(end)
1234
1233
1235 # then reset internal state in memory to forget those revisions
1234 # then reset internal state in memory to forget those revisions
1236 self.cache = None
1235 self._cache = None
1237 self._chunkcache = None
1236 self._chunkcache = None
1238 for x in xrange(rev, self.count()):
1237 for x in xrange(rev, self.count()):
1239 del self.nodemap[self.node(x)]
1238 del self.nodemap[self.node(x)]
1240
1239
1241 del self.index[rev:-1]
1240 del self.index[rev:-1]
1242
1241
1243 def checksize(self):
1242 def checksize(self):
1244 expected = 0
1243 expected = 0
1245 if self.count():
1244 if self.count():
1246 expected = self.end(self.count() - 1)
1245 expected = self.end(self.count() - 1)
1247
1246
1248 try:
1247 try:
1249 f = self.opener(self.datafile)
1248 f = self.opener(self.datafile)
1250 f.seek(0, 2)
1249 f.seek(0, 2)
1251 actual = f.tell()
1250 actual = f.tell()
1252 dd = actual - expected
1251 dd = actual - expected
1253 except IOError, inst:
1252 except IOError, inst:
1254 if inst.errno != errno.ENOENT:
1253 if inst.errno != errno.ENOENT:
1255 raise
1254 raise
1256 dd = 0
1255 dd = 0
1257
1256
1258 try:
1257 try:
1259 f = self.opener(self.indexfile)
1258 f = self.opener(self.indexfile)
1260 f.seek(0, 2)
1259 f.seek(0, 2)
1261 actual = f.tell()
1260 actual = f.tell()
1262 s = self._io.size
1261 s = self._io.size
1263 i = actual / s
1262 i = actual / s
1264 di = actual - (i * s)
1263 di = actual - (i * s)
1265 if self._inline:
1264 if self._inline:
1266 databytes = 0
1265 databytes = 0
1267 for r in xrange(self.count()):
1266 for r in xrange(self.count()):
1268 databytes += self.length(r)
1267 databytes += self.length(r)
1269 dd = 0
1268 dd = 0
1270 di = actual - self.count() * s - databytes
1269 di = actual - self.count() * s - databytes
1271 except IOError, inst:
1270 except IOError, inst:
1272 if inst.errno != errno.ENOENT:
1271 if inst.errno != errno.ENOENT:
1273 raise
1272 raise
1274 di = 0
1273 di = 0
1275
1274
1276 return (dd, di)
1275 return (dd, di)
1277
1276
1278
1277
General Comments 0
You need to be logged in to leave comments. Login now