##// END OF EJS Templates
revlog: eliminate diff and patches functions...
Matt Mackall -
r4989:1aaed3d6 default
parent child Browse files
Show More
@@ -1,251 +1,250 b''
1 """
1 """
2 bundlerepo.py - repository class for viewing uncompressed bundles
2 bundlerepo.py - repository class for viewing uncompressed bundles
3
3
4 This provides a read-only repository interface to bundles as if
4 This provides a read-only repository interface to bundles as if
5 they were part of the actual repository.
5 they were part of the actual repository.
6
6
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import changegroup, util, os, struct, bz2, tempfile
15 import changegroup, util, os, struct, bz2, tempfile, mdiff
16
17 import localrepo, changelog, manifest, filelog, revlog
16 import localrepo, changelog, manifest, filelog, revlog
18
17
19 class bundlerevlog(revlog.revlog):
18 class bundlerevlog(revlog.revlog):
20 def __init__(self, opener, indexfile, bundlefile,
19 def __init__(self, opener, indexfile, bundlefile,
21 linkmapper=None):
20 linkmapper=None):
22 # How it works:
21 # How it works:
23 # to retrieve a revision, we need to know the offset of
22 # to retrieve a revision, we need to know the offset of
24 # the revision in the bundlefile (an opened file).
23 # the revision in the bundlefile (an opened file).
25 #
24 #
26 # We store this offset in the index (start), to differentiate a
25 # We store this offset in the index (start), to differentiate a
27 # rev in the bundle and from a rev in the revlog, we check
26 # rev in the bundle and from a rev in the revlog, we check
28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
27 # len(index[r]). If the tuple is bigger than 7, it is a bundle
29 # (it is bigger since we store the node to which the delta is)
28 # (it is bigger since we store the node to which the delta is)
30 #
29 #
31 revlog.revlog.__init__(self, opener, indexfile)
30 revlog.revlog.__init__(self, opener, indexfile)
32 self.bundlefile = bundlefile
31 self.bundlefile = bundlefile
33 self.basemap = {}
32 self.basemap = {}
34 def chunkpositer():
33 def chunkpositer():
35 for chunk in changegroup.chunkiter(bundlefile):
34 for chunk in changegroup.chunkiter(bundlefile):
36 pos = bundlefile.tell()
35 pos = bundlefile.tell()
37 yield chunk, pos - len(chunk)
36 yield chunk, pos - len(chunk)
38 n = self.count()
37 n = self.count()
39 prev = None
38 prev = None
40 for chunk, start in chunkpositer():
39 for chunk, start in chunkpositer():
41 size = len(chunk)
40 size = len(chunk)
42 if size < 80:
41 if size < 80:
43 raise util.Abort("invalid changegroup")
42 raise util.Abort("invalid changegroup")
44 start += 80
43 start += 80
45 size -= 80
44 size -= 80
46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
45 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
47 if node in self.nodemap:
46 if node in self.nodemap:
48 prev = node
47 prev = node
49 continue
48 continue
50 for p in (p1, p2):
49 for p in (p1, p2):
51 if not p in self.nodemap:
50 if not p in self.nodemap:
52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
51 raise revlog.LookupError(_("unknown parent %s") % short(p1))
53 if linkmapper is None:
52 if linkmapper is None:
54 link = n
53 link = n
55 else:
54 else:
56 link = linkmapper(cs)
55 link = linkmapper(cs)
57
56
58 if not prev:
57 if not prev:
59 prev = p1
58 prev = p1
60 # start, size, base is not used, link, p1, p2, delta ref
59 # start, size, base is not used, link, p1, p2, delta ref
61 e = (revlog.offset_type(start, 0), size, -1, None, link,
60 e = (revlog.offset_type(start, 0), size, -1, None, link,
62 self.rev(p1), self.rev(p2), node)
61 self.rev(p1), self.rev(p2), node)
63 self.basemap[n] = prev
62 self.basemap[n] = prev
64 self.index.insert(-1, e)
63 self.index.insert(-1, e)
65 self.nodemap[node] = n
64 self.nodemap[node] = n
66 prev = node
65 prev = node
67 n += 1
66 n += 1
68
67
69 def bundle(self, rev):
68 def bundle(self, rev):
70 """is rev from the bundle"""
69 """is rev from the bundle"""
71 if rev < 0:
70 if rev < 0:
72 return False
71 return False
73 return rev in self.basemap
72 return rev in self.basemap
74 def bundlebase(self, rev): return self.basemap[rev]
73 def bundlebase(self, rev): return self.basemap[rev]
75 def chunk(self, rev, df=None, cachelen=4096):
74 def chunk(self, rev, df=None, cachelen=4096):
76 # Warning: in case of bundle, the diff is against bundlebase,
75 # Warning: in case of bundle, the diff is against bundlebase,
77 # not against rev - 1
76 # not against rev - 1
78 # XXX: could use some caching
77 # XXX: could use some caching
79 if not self.bundle(rev):
78 if not self.bundle(rev):
80 return revlog.revlog.chunk(self, rev, df)
79 return revlog.revlog.chunk(self, rev, df)
81 self.bundlefile.seek(self.start(rev))
80 self.bundlefile.seek(self.start(rev))
82 return self.bundlefile.read(self.length(rev))
81 return self.bundlefile.read(self.length(rev))
83
82
84 def revdiff(self, rev1, rev2):
83 def revdiff(self, rev1, rev2):
85 """return or calculate a delta between two revisions"""
84 """return or calculate a delta between two revisions"""
86 if self.bundle(rev1) and self.bundle(rev2):
85 if self.bundle(rev1) and self.bundle(rev2):
87 # hot path for bundle
86 # hot path for bundle
88 revb = self.rev(self.bundlebase(rev2))
87 revb = self.rev(self.bundlebase(rev2))
89 if revb == rev1:
88 if revb == rev1:
90 return self.chunk(rev2)
89 return self.chunk(rev2)
91 elif not self.bundle(rev1) and not self.bundle(rev2):
90 elif not self.bundle(rev1) and not self.bundle(rev2):
92 return revlog.revlog.revdiff(self, rev1, rev2)
91 return revlog.revlog.revdiff(self, rev1, rev2)
93
92
94 return self.diff(self.revision(self.node(rev1)),
93 return mdiff.textdiff(self.revision(self.node(rev1)),
95 self.revision(self.node(rev2)))
94 self.revision(self.node(rev2)))
96
95
97 def revision(self, node):
96 def revision(self, node):
98 """return an uncompressed revision of a given"""
97 """return an uncompressed revision of a given"""
99 if node == nullid: return ""
98 if node == nullid: return ""
100
99
101 text = None
100 text = None
102 chain = []
101 chain = []
103 iter_node = node
102 iter_node = node
104 rev = self.rev(iter_node)
103 rev = self.rev(iter_node)
105 # reconstruct the revision if it is from a changegroup
104 # reconstruct the revision if it is from a changegroup
106 while self.bundle(rev):
105 while self.bundle(rev):
107 if self._cache and self._cache[0] == iter_node:
106 if self._cache and self._cache[0] == iter_node:
108 text = self._cache[2]
107 text = self._cache[2]
109 break
108 break
110 chain.append(rev)
109 chain.append(rev)
111 iter_node = self.bundlebase(rev)
110 iter_node = self.bundlebase(rev)
112 rev = self.rev(iter_node)
111 rev = self.rev(iter_node)
113 if text is None:
112 if text is None:
114 text = revlog.revlog.revision(self, iter_node)
113 text = revlog.revlog.revision(self, iter_node)
115
114
116 while chain:
115 while chain:
117 delta = self.chunk(chain.pop())
116 delta = self.chunk(chain.pop())
118 text = self.patches(text, [delta])
117 text = mdiff.patches(text, [delta])
119
118
120 p1, p2 = self.parents(node)
119 p1, p2 = self.parents(node)
121 if node != revlog.hash(text, p1, p2):
120 if node != revlog.hash(text, p1, p2):
122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
121 raise revlog.RevlogError(_("integrity check failed on %s:%d")
123 % (self.datafile, self.rev(node)))
122 % (self.datafile, self.rev(node)))
124
123
125 self._cache = (node, self.rev(node), text)
124 self._cache = (node, self.rev(node), text)
126 return text
125 return text
127
126
128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
127 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 raise NotImplementedError
128 raise NotImplementedError
130 def addgroup(self, revs, linkmapper, transaction, unique=0):
129 def addgroup(self, revs, linkmapper, transaction, unique=0):
131 raise NotImplementedError
130 raise NotImplementedError
132 def strip(self, rev, minlink):
131 def strip(self, rev, minlink):
133 raise NotImplementedError
132 raise NotImplementedError
134 def checksize(self):
133 def checksize(self):
135 raise NotImplementedError
134 raise NotImplementedError
136
135
137 class bundlechangelog(bundlerevlog, changelog.changelog):
136 class bundlechangelog(bundlerevlog, changelog.changelog):
138 def __init__(self, opener, bundlefile):
137 def __init__(self, opener, bundlefile):
139 changelog.changelog.__init__(self, opener)
138 changelog.changelog.__init__(self, opener)
140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
139 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
141
140
142 class bundlemanifest(bundlerevlog, manifest.manifest):
141 class bundlemanifest(bundlerevlog, manifest.manifest):
143 def __init__(self, opener, bundlefile, linkmapper):
142 def __init__(self, opener, bundlefile, linkmapper):
144 manifest.manifest.__init__(self, opener)
143 manifest.manifest.__init__(self, opener)
145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
144 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
146 linkmapper)
145 linkmapper)
147
146
148 class bundlefilelog(bundlerevlog, filelog.filelog):
147 class bundlefilelog(bundlerevlog, filelog.filelog):
149 def __init__(self, opener, path, bundlefile, linkmapper):
148 def __init__(self, opener, path, bundlefile, linkmapper):
150 filelog.filelog.__init__(self, opener, path)
149 filelog.filelog.__init__(self, opener, path)
151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
150 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
152 linkmapper)
151 linkmapper)
153
152
154 class bundlerepository(localrepo.localrepository):
153 class bundlerepository(localrepo.localrepository):
155 def __init__(self, ui, path, bundlename):
154 def __init__(self, ui, path, bundlename):
156 localrepo.localrepository.__init__(self, ui, path)
155 localrepo.localrepository.__init__(self, ui, path)
157
156
158 self._url = 'bundle:' + bundlename
157 self._url = 'bundle:' + bundlename
159 if path: self._url += '+' + path
158 if path: self._url += '+' + path
160
159
161 self.tempfile = None
160 self.tempfile = None
162 self.bundlefile = open(bundlename, "rb")
161 self.bundlefile = open(bundlename, "rb")
163 header = self.bundlefile.read(6)
162 header = self.bundlefile.read(6)
164 if not header.startswith("HG"):
163 if not header.startswith("HG"):
165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
164 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
166 elif not header.startswith("HG10"):
165 elif not header.startswith("HG10"):
167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
166 raise util.Abort(_("%s: unknown bundle version") % bundlename)
168 elif header == "HG10BZ":
167 elif header == "HG10BZ":
169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
168 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
170 suffix=".hg10un", dir=self.path)
169 suffix=".hg10un", dir=self.path)
171 self.tempfile = temp
170 self.tempfile = temp
172 fptemp = os.fdopen(fdtemp, 'wb')
171 fptemp = os.fdopen(fdtemp, 'wb')
173 def generator(f):
172 def generator(f):
174 zd = bz2.BZ2Decompressor()
173 zd = bz2.BZ2Decompressor()
175 zd.decompress("BZ")
174 zd.decompress("BZ")
176 for chunk in f:
175 for chunk in f:
177 yield zd.decompress(chunk)
176 yield zd.decompress(chunk)
178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
177 gen = generator(util.filechunkiter(self.bundlefile, 4096))
179
178
180 try:
179 try:
181 fptemp.write("HG10UN")
180 fptemp.write("HG10UN")
182 for chunk in gen:
181 for chunk in gen:
183 fptemp.write(chunk)
182 fptemp.write(chunk)
184 finally:
183 finally:
185 fptemp.close()
184 fptemp.close()
186 self.bundlefile.close()
185 self.bundlefile.close()
187
186
188 self.bundlefile = open(self.tempfile, "rb")
187 self.bundlefile = open(self.tempfile, "rb")
189 # seek right after the header
188 # seek right after the header
190 self.bundlefile.seek(6)
189 self.bundlefile.seek(6)
191 elif header == "HG10UN":
190 elif header == "HG10UN":
192 # nothing to do
191 # nothing to do
193 pass
192 pass
194 else:
193 else:
195 raise util.Abort(_("%s: unknown bundle compression type")
194 raise util.Abort(_("%s: unknown bundle compression type")
196 % bundlename)
195 % bundlename)
197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
196 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
197 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
199 self.changelog.rev)
198 self.changelog.rev)
200 # dict with the mapping 'filename' -> position in the bundle
199 # dict with the mapping 'filename' -> position in the bundle
201 self.bundlefilespos = {}
200 self.bundlefilespos = {}
202 while 1:
201 while 1:
203 f = changegroup.getchunk(self.bundlefile)
202 f = changegroup.getchunk(self.bundlefile)
204 if not f:
203 if not f:
205 break
204 break
206 self.bundlefilespos[f] = self.bundlefile.tell()
205 self.bundlefilespos[f] = self.bundlefile.tell()
207 for c in changegroup.chunkiter(self.bundlefile):
206 for c in changegroup.chunkiter(self.bundlefile):
208 pass
207 pass
209
208
210 def url(self):
209 def url(self):
211 return self._url
210 return self._url
212
211
213 def dev(self):
212 def dev(self):
214 return -1
213 return -1
215
214
216 def file(self, f):
215 def file(self, f):
217 if f[0] == '/':
216 if f[0] == '/':
218 f = f[1:]
217 f = f[1:]
219 if f in self.bundlefilespos:
218 if f in self.bundlefilespos:
220 self.bundlefile.seek(self.bundlefilespos[f])
219 self.bundlefile.seek(self.bundlefilespos[f])
221 return bundlefilelog(self.sopener, f, self.bundlefile,
220 return bundlefilelog(self.sopener, f, self.bundlefile,
222 self.changelog.rev)
221 self.changelog.rev)
223 else:
222 else:
224 return filelog.filelog(self.sopener, f)
223 return filelog.filelog(self.sopener, f)
225
224
226 def close(self):
225 def close(self):
227 """Close assigned bundle file immediately."""
226 """Close assigned bundle file immediately."""
228 self.bundlefile.close()
227 self.bundlefile.close()
229
228
230 def __del__(self):
229 def __del__(self):
231 bundlefile = getattr(self, 'bundlefile', None)
230 bundlefile = getattr(self, 'bundlefile', None)
232 if bundlefile and not bundlefile.closed:
231 if bundlefile and not bundlefile.closed:
233 bundlefile.close()
232 bundlefile.close()
234 tempfile = getattr(self, 'tempfile', None)
233 tempfile = getattr(self, 'tempfile', None)
235 if tempfile is not None:
234 if tempfile is not None:
236 os.unlink(tempfile)
235 os.unlink(tempfile)
237
236
238 def instance(ui, path, create):
237 def instance(ui, path, create):
239 if create:
238 if create:
240 raise util.Abort(_('cannot create new bundle repository'))
239 raise util.Abort(_('cannot create new bundle repository'))
241 path = util.drop_scheme('file', path)
240 path = util.drop_scheme('file', path)
242 if path.startswith('bundle:'):
241 if path.startswith('bundle:'):
243 path = util.drop_scheme('bundle', path)
242 path = util.drop_scheme('bundle', path)
244 s = path.split("+", 1)
243 s = path.split("+", 1)
245 if len(s) == 1:
244 if len(s) == 1:
246 repopath, bundlename = "", s[0]
245 repopath, bundlename = "", s[0]
247 else:
246 else:
248 repopath, bundlename = s
247 repopath, bundlename = s
249 else:
248 else:
250 repopath, bundlename = '', path
249 repopath, bundlename = '', path
251 return bundlerepository(ui, repopath, bundlename)
250 return bundlerepository(ui, repopath, bundlename)
@@ -1,1257 +1,1245 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 # revlog flags
18 # revlog flags
19 REVLOGV0 = 0
19 REVLOGV0 = 0
20 REVLOGNG = 1
20 REVLOGNG = 1
21 REVLOGNGINLINEDATA = (1 << 16)
21 REVLOGNGINLINEDATA = (1 << 16)
22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 REVLOG_DEFAULT_FORMAT = REVLOGNG
23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25
25
26 class RevlogError(Exception):
26 class RevlogError(Exception):
27 pass
27 pass
28 class LookupError(RevlogError):
28 class LookupError(RevlogError):
29 pass
29 pass
30
30
31 def getoffset(q):
31 def getoffset(q):
32 if q & 0xFFFF:
32 if q & 0xFFFF:
33 raise RevlogError(_('incompatible revision flag %x') % q)
33 raise RevlogError(_('incompatible revision flag %x') % q)
34 return int(q >> 16)
34 return int(q >> 16)
35
35
36 def gettype(q):
36 def gettype(q):
37 return int(q & 0xFFFF)
37 return int(q & 0xFFFF)
38
38
39 def offset_type(offset, type):
39 def offset_type(offset, type):
40 return long(long(offset) << 16 | type)
40 return long(long(offset) << 16 | type)
41
41
42 def hash(text, p1, p2):
42 def hash(text, p1, p2):
43 """generate a hash from the given text and its parent hashes
43 """generate a hash from the given text and its parent hashes
44
44
45 This hash combines both the current file contents and its history
45 This hash combines both the current file contents and its history
46 in a manner that makes it easy to distinguish nodes with the same
46 in a manner that makes it easy to distinguish nodes with the same
47 content in the revision graph.
47 content in the revision graph.
48 """
48 """
49 l = [p1, p2]
49 l = [p1, p2]
50 l.sort()
50 l.sort()
51 s = sha.new(l[0])
51 s = sha.new(l[0])
52 s.update(l[1])
52 s.update(l[1])
53 s.update(text)
53 s.update(text)
54 return s.digest()
54 return s.digest()
55
55
56 def compress(text):
56 def compress(text):
57 """ generate a possibly-compressed representation of text """
57 """ generate a possibly-compressed representation of text """
58 if not text:
58 if not text:
59 return ("", text)
59 return ("", text)
60 if len(text) < 44:
60 if len(text) < 44:
61 if text[0] == '\0':
61 if text[0] == '\0':
62 return ("", text)
62 return ("", text)
63 return ('u', text)
63 return ('u', text)
64 bin = zlib.compress(text)
64 bin = zlib.compress(text)
65 if len(bin) > len(text):
65 if len(bin) > len(text):
66 if text[0] == '\0':
66 if text[0] == '\0':
67 return ("", text)
67 return ("", text)
68 return ('u', text)
68 return ('u', text)
69 return ("", bin)
69 return ("", bin)
70
70
71 def decompress(bin):
71 def decompress(bin):
72 """ decompress the given input """
72 """ decompress the given input """
73 if not bin:
73 if not bin:
74 return bin
74 return bin
75 t = bin[0]
75 t = bin[0]
76 if t == '\0':
76 if t == '\0':
77 return bin
77 return bin
78 if t == 'x':
78 if t == 'x':
79 return zlib.decompress(bin)
79 return zlib.decompress(bin)
80 if t == 'u':
80 if t == 'u':
81 return bin[1:]
81 return bin[1:]
82 raise RevlogError(_("unknown compression type %r") % t)
82 raise RevlogError(_("unknown compression type %r") % t)
83
83
84 class lazyparser(object):
84 class lazyparser(object):
85 """
85 """
86 this class avoids the need to parse the entirety of large indices
86 this class avoids the need to parse the entirety of large indices
87 """
87 """
88
88
89 # lazyparser is not safe to use on windows if win32 extensions not
89 # lazyparser is not safe to use on windows if win32 extensions not
90 # available. it keeps file handle open, which make it not possible
90 # available. it keeps file handle open, which make it not possible
91 # to break hardlinks on local cloned repos.
91 # to break hardlinks on local cloned repos.
92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 hasattr(util, 'win32api'))
93 hasattr(util, 'win32api'))
94
94
95 def __init__(self, dataf, size):
95 def __init__(self, dataf, size):
96 self.dataf = dataf
96 self.dataf = dataf
97 self.s = struct.calcsize(indexformatng)
97 self.s = struct.calcsize(indexformatng)
98 self.datasize = size
98 self.datasize = size
99 self.l = size/self.s
99 self.l = size/self.s
100 self.index = [None] * self.l
100 self.index = [None] * self.l
101 self.map = {nullid: nullrev}
101 self.map = {nullid: nullrev}
102 self.allmap = 0
102 self.allmap = 0
103 self.all = 0
103 self.all = 0
104 self.mapfind_count = 0
104 self.mapfind_count = 0
105
105
106 def loadmap(self):
106 def loadmap(self):
107 """
107 """
108 during a commit, we need to make sure the rev being added is
108 during a commit, we need to make sure the rev being added is
109 not a duplicate. This requires loading the entire index,
109 not a duplicate. This requires loading the entire index,
110 which is fairly slow. loadmap can load up just the node map,
110 which is fairly slow. loadmap can load up just the node map,
111 which takes much less time.
111 which takes much less time.
112 """
112 """
113 if self.allmap:
113 if self.allmap:
114 return
114 return
115 end = self.datasize
115 end = self.datasize
116 self.allmap = 1
116 self.allmap = 1
117 cur = 0
117 cur = 0
118 count = 0
118 count = 0
119 blocksize = self.s * 256
119 blocksize = self.s * 256
120 self.dataf.seek(0)
120 self.dataf.seek(0)
121 while cur < end:
121 while cur < end:
122 data = self.dataf.read(blocksize)
122 data = self.dataf.read(blocksize)
123 off = 0
123 off = 0
124 for x in xrange(256):
124 for x in xrange(256):
125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 self.map[n] = count
126 self.map[n] = count
127 count += 1
127 count += 1
128 if count >= self.l:
128 if count >= self.l:
129 break
129 break
130 off += self.s
130 off += self.s
131 cur += blocksize
131 cur += blocksize
132
132
133 def loadblock(self, blockstart, blocksize, data=None):
133 def loadblock(self, blockstart, blocksize, data=None):
134 if self.all:
134 if self.all:
135 return
135 return
136 if data is None:
136 if data is None:
137 self.dataf.seek(blockstart)
137 self.dataf.seek(blockstart)
138 if blockstart + blocksize > self.datasize:
138 if blockstart + blocksize > self.datasize:
139 # the revlog may have grown since we've started running,
139 # the revlog may have grown since we've started running,
140 # but we don't have space in self.index for more entries.
140 # but we don't have space in self.index for more entries.
141 # limit blocksize so that we don't get too much data.
141 # limit blocksize so that we don't get too much data.
142 blocksize = max(self.datasize - blockstart, 0)
142 blocksize = max(self.datasize - blockstart, 0)
143 data = self.dataf.read(blocksize)
143 data = self.dataf.read(blocksize)
144 lend = len(data) / self.s
144 lend = len(data) / self.s
145 i = blockstart / self.s
145 i = blockstart / self.s
146 off = 0
146 off = 0
147 # lazyindex supports __delitem__
147 # lazyindex supports __delitem__
148 if lend > len(self.index) - i:
148 if lend > len(self.index) - i:
149 lend = len(self.index) - i
149 lend = len(self.index) - i
150 for x in xrange(lend):
150 for x in xrange(lend):
151 if self.index[i + x] == None:
151 if self.index[i + x] == None:
152 b = data[off : off + self.s]
152 b = data[off : off + self.s]
153 self.index[i + x] = b
153 self.index[i + x] = b
154 n = b[ngshaoffset:ngshaoffset + 20]
154 n = b[ngshaoffset:ngshaoffset + 20]
155 self.map[n] = i + x
155 self.map[n] = i + x
156 off += self.s
156 off += self.s
157
157
158 def findnode(self, node):
158 def findnode(self, node):
159 """search backwards through the index file for a specific node"""
159 """search backwards through the index file for a specific node"""
160 if self.allmap:
160 if self.allmap:
161 return None
161 return None
162
162
163 # hg log will cause many many searches for the manifest
163 # hg log will cause many many searches for the manifest
164 # nodes. After we get called a few times, just load the whole
164 # nodes. After we get called a few times, just load the whole
165 # thing.
165 # thing.
166 if self.mapfind_count > 8:
166 if self.mapfind_count > 8:
167 self.loadmap()
167 self.loadmap()
168 if node in self.map:
168 if node in self.map:
169 return node
169 return node
170 return None
170 return None
171 self.mapfind_count += 1
171 self.mapfind_count += 1
172 last = self.l - 1
172 last = self.l - 1
173 while self.index[last] != None:
173 while self.index[last] != None:
174 if last == 0:
174 if last == 0:
175 self.all = 1
175 self.all = 1
176 self.allmap = 1
176 self.allmap = 1
177 return None
177 return None
178 last -= 1
178 last -= 1
179 end = (last + 1) * self.s
179 end = (last + 1) * self.s
180 blocksize = self.s * 256
180 blocksize = self.s * 256
181 while end >= 0:
181 while end >= 0:
182 start = max(end - blocksize, 0)
182 start = max(end - blocksize, 0)
183 self.dataf.seek(start)
183 self.dataf.seek(start)
184 data = self.dataf.read(end - start)
184 data = self.dataf.read(end - start)
185 findend = end - start
185 findend = end - start
186 while True:
186 while True:
187 # we're searching backwards, so weh have to make sure
187 # we're searching backwards, so weh have to make sure
188 # we don't find a changeset where this node is a parent
188 # we don't find a changeset where this node is a parent
189 off = data.rfind(node, 0, findend)
189 off = data.rfind(node, 0, findend)
190 findend = off
190 findend = off
191 if off >= 0:
191 if off >= 0:
192 i = off / self.s
192 i = off / self.s
193 off = i * self.s
193 off = i * self.s
194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 if n == node:
195 if n == node:
196 self.map[n] = i + start / self.s
196 self.map[n] = i + start / self.s
197 return node
197 return node
198 else:
198 else:
199 break
199 break
200 end -= blocksize
200 end -= blocksize
201 return None
201 return None
202
202
203 def loadindex(self, i=None, end=None):
203 def loadindex(self, i=None, end=None):
204 if self.all:
204 if self.all:
205 return
205 return
206 all = False
206 all = False
207 if i == None:
207 if i == None:
208 blockstart = 0
208 blockstart = 0
209 blocksize = (512 / self.s) * self.s
209 blocksize = (512 / self.s) * self.s
210 end = self.datasize
210 end = self.datasize
211 all = True
211 all = True
212 else:
212 else:
213 if end:
213 if end:
214 blockstart = i * self.s
214 blockstart = i * self.s
215 end = end * self.s
215 end = end * self.s
216 blocksize = end - blockstart
216 blocksize = end - blockstart
217 else:
217 else:
218 blockstart = (i & ~63) * self.s
218 blockstart = (i & ~63) * self.s
219 blocksize = self.s * 64
219 blocksize = self.s * 64
220 end = blockstart + blocksize
220 end = blockstart + blocksize
221 while blockstart < end:
221 while blockstart < end:
222 self.loadblock(blockstart, blocksize)
222 self.loadblock(blockstart, blocksize)
223 blockstart += blocksize
223 blockstart += blocksize
224 if all:
224 if all:
225 self.all = True
225 self.all = True
226
226
227 class lazyindex(object):
227 class lazyindex(object):
228 """a lazy version of the index array"""
228 """a lazy version of the index array"""
229 def __init__(self, parser):
229 def __init__(self, parser):
230 self.p = parser
230 self.p = parser
231 def __len__(self):
231 def __len__(self):
232 return len(self.p.index)
232 return len(self.p.index)
233 def load(self, pos):
233 def load(self, pos):
234 if pos < 0:
234 if pos < 0:
235 pos += len(self.p.index)
235 pos += len(self.p.index)
236 self.p.loadindex(pos)
236 self.p.loadindex(pos)
237 return self.p.index[pos]
237 return self.p.index[pos]
238 def __getitem__(self, pos):
238 def __getitem__(self, pos):
239 ret = self.p.index[pos] or self.load(pos)
239 ret = self.p.index[pos] or self.load(pos)
240 if isinstance(ret, str):
240 if isinstance(ret, str):
241 ret = struct.unpack(indexformatng, ret)
241 ret = struct.unpack(indexformatng, ret)
242 return ret
242 return ret
243 def __setitem__(self, pos, item):
243 def __setitem__(self, pos, item):
244 self.p.index[pos] = item
244 self.p.index[pos] = item
245 def __delitem__(self, pos):
245 def __delitem__(self, pos):
246 del self.p.index[pos]
246 del self.p.index[pos]
247 def insert(self, pos, e):
247 def insert(self, pos, e):
248 self.p.index.insert(pos, e)
248 self.p.index.insert(pos, e)
249 def append(self, e):
249 def append(self, e):
250 self.p.index.append(e)
250 self.p.index.append(e)
251
251
252 class lazymap(object):
252 class lazymap(object):
253 """a lazy version of the node map"""
253 """a lazy version of the node map"""
254 def __init__(self, parser):
254 def __init__(self, parser):
255 self.p = parser
255 self.p = parser
256 def load(self, key):
256 def load(self, key):
257 n = self.p.findnode(key)
257 n = self.p.findnode(key)
258 if n == None:
258 if n == None:
259 raise KeyError(key)
259 raise KeyError(key)
260 def __contains__(self, key):
260 def __contains__(self, key):
261 if key in self.p.map:
261 if key in self.p.map:
262 return True
262 return True
263 self.p.loadmap()
263 self.p.loadmap()
264 return key in self.p.map
264 return key in self.p.map
265 def __iter__(self):
265 def __iter__(self):
266 yield nullid
266 yield nullid
267 for i in xrange(self.p.l):
267 for i in xrange(self.p.l):
268 ret = self.p.index[i]
268 ret = self.p.index[i]
269 if not ret:
269 if not ret:
270 self.p.loadindex(i)
270 self.p.loadindex(i)
271 ret = self.p.index[i]
271 ret = self.p.index[i]
272 if isinstance(ret, str):
272 if isinstance(ret, str):
273 ret = struct.unpack(indexformatng, ret)
273 ret = struct.unpack(indexformatng, ret)
274 yield ret[7]
274 yield ret[7]
275 def __getitem__(self, key):
275 def __getitem__(self, key):
276 try:
276 try:
277 return self.p.map[key]
277 return self.p.map[key]
278 except KeyError:
278 except KeyError:
279 try:
279 try:
280 self.load(key)
280 self.load(key)
281 return self.p.map[key]
281 return self.p.map[key]
282 except KeyError:
282 except KeyError:
283 raise KeyError("node " + hex(key))
283 raise KeyError("node " + hex(key))
284 def __setitem__(self, key, val):
284 def __setitem__(self, key, val):
285 self.p.map[key] = val
285 self.p.map[key] = val
286 def __delitem__(self, key):
286 def __delitem__(self, key):
287 del self.p.map[key]
287 del self.p.map[key]
288
288
289 indexformatv0 = ">4l20s20s20s"
289 indexformatv0 = ">4l20s20s20s"
290 v0shaoffset = 56
290 v0shaoffset = 56
291
291
292 class revlogoldio(object):
292 class revlogoldio(object):
293 def __init__(self):
293 def __init__(self):
294 self.size = struct.calcsize(indexformatv0)
294 self.size = struct.calcsize(indexformatv0)
295
295
296 def parseindex(self, fp, inline):
296 def parseindex(self, fp, inline):
297 s = self.size
297 s = self.size
298 index = []
298 index = []
299 nodemap = {nullid: nullrev}
299 nodemap = {nullid: nullrev}
300 n = off = 0
300 n = off = 0
301 data = fp.read()
301 data = fp.read()
302 l = len(data)
302 l = len(data)
303 while off + s <= l:
303 while off + s <= l:
304 cur = data[off:off + s]
304 cur = data[off:off + s]
305 off += s
305 off += s
306 e = struct.unpack(indexformatv0, cur)
306 e = struct.unpack(indexformatv0, cur)
307 # transform to revlogv1 format
307 # transform to revlogv1 format
308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 nodemap[e[4]], nodemap[e[5]], e[6])
309 nodemap[e[4]], nodemap[e[5]], e[6])
310 index.append(e2)
310 index.append(e2)
311 nodemap[e[6]] = n
311 nodemap[e[6]] = n
312 n += 1
312 n += 1
313
313
314 return index, nodemap, None
314 return index, nodemap, None
315
315
316 def packentry(self, entry, node, version):
316 def packentry(self, entry, node, version):
317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 node(entry[5]), node(entry[6]), entry[7])
318 node(entry[5]), node(entry[6]), entry[7])
319 return struct.pack(indexformatv0, *e2)
319 return struct.pack(indexformatv0, *e2)
320
320
321 # index ng:
321 # index ng:
322 # 6 bytes offset
322 # 6 bytes offset
323 # 2 bytes flags
323 # 2 bytes flags
324 # 4 bytes compressed length
324 # 4 bytes compressed length
325 # 4 bytes uncompressed length
325 # 4 bytes uncompressed length
326 # 4 bytes: base rev
326 # 4 bytes: base rev
327 # 4 bytes link rev
327 # 4 bytes link rev
328 # 4 bytes parent 1 rev
328 # 4 bytes parent 1 rev
329 # 4 bytes parent 2 rev
329 # 4 bytes parent 2 rev
330 # 32 bytes: nodeid
330 # 32 bytes: nodeid
331 indexformatng = ">Qiiiiii20s12x"
331 indexformatng = ">Qiiiiii20s12x"
332 ngshaoffset = 32
332 ngshaoffset = 32
333 versionformat = ">I"
333 versionformat = ">I"
334
334
335 class revlogio(object):
335 class revlogio(object):
336 def __init__(self):
336 def __init__(self):
337 self.size = struct.calcsize(indexformatng)
337 self.size = struct.calcsize(indexformatng)
338
338
339 def parseindex(self, fp, inline):
339 def parseindex(self, fp, inline):
340 try:
340 try:
341 size = util.fstat(fp).st_size
341 size = util.fstat(fp).st_size
342 except AttributeError:
342 except AttributeError:
343 size = 0
343 size = 0
344
344
345 if lazyparser.safe_to_use and not inline and size > 1000000:
345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 # big index, let's parse it on demand
346 # big index, let's parse it on demand
347 parser = lazyparser(fp, size)
347 parser = lazyparser(fp, size)
348 index = lazyindex(parser)
348 index = lazyindex(parser)
349 nodemap = lazymap(parser)
349 nodemap = lazymap(parser)
350 e = list(index[0])
350 e = list(index[0])
351 type = gettype(e[0])
351 type = gettype(e[0])
352 e[0] = offset_type(0, type)
352 e[0] = offset_type(0, type)
353 index[0] = e
353 index[0] = e
354 return index, nodemap, None
354 return index, nodemap, None
355
355
356 s = self.size
356 s = self.size
357 cache = None
357 cache = None
358 index = []
358 index = []
359 nodemap = {nullid: nullrev}
359 nodemap = {nullid: nullrev}
360 n = off = 0
360 n = off = 0
361 # if we're not using lazymap, always read the whole index
361 # if we're not using lazymap, always read the whole index
362 data = fp.read()
362 data = fp.read()
363 l = len(data)
363 l = len(data)
364 if inline:
364 if inline:
365 cache = (0, data)
365 cache = (0, data)
366 while off + s <= l:
366 while off + s <= l:
367 e = struct.unpack(indexformatng, data[off:off + s])
367 e = struct.unpack(indexformatng, data[off:off + s])
368 index.append(e)
368 index.append(e)
369 nodemap[e[7]] = n
369 nodemap[e[7]] = n
370 n += 1
370 n += 1
371 off += s
371 off += s
372 if inline:
372 if inline:
373 if e[1] < 0:
373 if e[1] < 0:
374 break
374 break
375 off += e[1]
375 off += e[1]
376
376
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381
381
382 return index, nodemap, cache
382 return index, nodemap, cache
383
383
384 def packentry(self, entry, node, version):
384 def packentry(self, entry, node, version):
385 p = struct.pack(indexformatng, *entry)
385 p = struct.pack(indexformatng, *entry)
386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
387 p = struct.pack(versionformat, version) + p[4:]
387 p = struct.pack(versionformat, version) + p[4:]
388 return p
388 return p
389
389
390 class revlog(object):
390 class revlog(object):
391 """
391 """
392 the underlying revision storage object
392 the underlying revision storage object
393
393
394 A revlog consists of two parts, an index and the revision data.
394 A revlog consists of two parts, an index and the revision data.
395
395
396 The index is a file with a fixed record size containing
396 The index is a file with a fixed record size containing
397 information on each revision, includings its nodeid (hash), the
397 information on each revision, includings its nodeid (hash), the
398 nodeids of its parents, the position and offset of its data within
398 nodeids of its parents, the position and offset of its data within
399 the data file, and the revision it's based on. Finally, each entry
399 the data file, and the revision it's based on. Finally, each entry
400 contains a linkrev entry that can serve as a pointer to external
400 contains a linkrev entry that can serve as a pointer to external
401 data.
401 data.
402
402
403 The revision data itself is a linear collection of data chunks.
403 The revision data itself is a linear collection of data chunks.
404 Each chunk represents a revision and is usually represented as a
404 Each chunk represents a revision and is usually represented as a
405 delta against the previous chunk. To bound lookup time, runs of
405 delta against the previous chunk. To bound lookup time, runs of
406 deltas are limited to about 2 times the length of the original
406 deltas are limited to about 2 times the length of the original
407 version data. This makes retrieval of a version proportional to
407 version data. This makes retrieval of a version proportional to
408 its size, or O(1) relative to the number of revisions.
408 its size, or O(1) relative to the number of revisions.
409
409
410 Both pieces of the revlog are written to in an append-only
410 Both pieces of the revlog are written to in an append-only
411 fashion, which means we never need to rewrite a file to insert or
411 fashion, which means we never need to rewrite a file to insert or
412 remove data, and can use some simple techniques to avoid the need
412 remove data, and can use some simple techniques to avoid the need
413 for locking while reading.
413 for locking while reading.
414 """
414 """
415 def __init__(self, opener, indexfile):
415 def __init__(self, opener, indexfile):
416 """
416 """
417 create a revlog object
417 create a revlog object
418
418
419 opener is a function that abstracts the file opening operation
419 opener is a function that abstracts the file opening operation
420 and can be used to implement COW semantics or the like.
420 and can be used to implement COW semantics or the like.
421 """
421 """
422 self.indexfile = indexfile
422 self.indexfile = indexfile
423 self.datafile = indexfile[:-2] + ".d"
423 self.datafile = indexfile[:-2] + ".d"
424 self.opener = opener
424 self.opener = opener
425 self._cache = None
425 self._cache = None
426 self._chunkcache = None
426 self._chunkcache = None
427 self.nodemap = {nullid: nullrev}
427 self.nodemap = {nullid: nullrev}
428 self.index = []
428 self.index = []
429
429
430 v = REVLOG_DEFAULT_VERSION
430 v = REVLOG_DEFAULT_VERSION
431 if hasattr(opener, "defversion"):
431 if hasattr(opener, "defversion"):
432 v = opener.defversion
432 v = opener.defversion
433 if v & REVLOGNG:
433 if v & REVLOGNG:
434 v |= REVLOGNGINLINEDATA
434 v |= REVLOGNGINLINEDATA
435
435
436 i = ""
436 i = ""
437 try:
437 try:
438 f = self.opener(self.indexfile)
438 f = self.opener(self.indexfile)
439 i = f.read(4)
439 i = f.read(4)
440 f.seek(0)
440 f.seek(0)
441 if len(i) > 0:
441 if len(i) > 0:
442 v = struct.unpack(versionformat, i)[0]
442 v = struct.unpack(versionformat, i)[0]
443 except IOError, inst:
443 except IOError, inst:
444 if inst.errno != errno.ENOENT:
444 if inst.errno != errno.ENOENT:
445 raise
445 raise
446
446
447 self.version = v
447 self.version = v
448 self._inline = v & REVLOGNGINLINEDATA
448 self._inline = v & REVLOGNGINLINEDATA
449 flags = v & ~0xFFFF
449 flags = v & ~0xFFFF
450 fmt = v & 0xFFFF
450 fmt = v & 0xFFFF
451 if fmt == REVLOGV0 and flags:
451 if fmt == REVLOGV0 and flags:
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
453 % (self.indexfile, flags >> 16))
453 % (self.indexfile, flags >> 16))
454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
456 % (self.indexfile, flags >> 16))
456 % (self.indexfile, flags >> 16))
457 elif fmt > REVLOGNG:
457 elif fmt > REVLOGNG:
458 raise RevlogError(_("index %s unknown format %d")
458 raise RevlogError(_("index %s unknown format %d")
459 % (self.indexfile, fmt))
459 % (self.indexfile, fmt))
460
460
461 self._io = revlogio()
461 self._io = revlogio()
462 if self.version == REVLOGV0:
462 if self.version == REVLOGV0:
463 self._io = revlogoldio()
463 self._io = revlogoldio()
464 if i:
464 if i:
465 d = self._io.parseindex(f, self._inline)
465 d = self._io.parseindex(f, self._inline)
466 self.index, self.nodemap, self._chunkcache = d
466 self.index, self.nodemap, self._chunkcache = d
467
467
468 # add the magic null revision at -1
468 # add the magic null revision at -1
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 def _loadindex(self, start, end):
471 def _loadindex(self, start, end):
472 """load a block of indexes all at once from the lazy parser"""
472 """load a block of indexes all at once from the lazy parser"""
473 if isinstance(self.index, lazyindex):
473 if isinstance(self.index, lazyindex):
474 self.index.p.loadindex(start, end)
474 self.index.p.loadindex(start, end)
475
475
476 def _loadindexmap(self):
476 def _loadindexmap(self):
477 """loads both the map and the index from the lazy parser"""
477 """loads both the map and the index from the lazy parser"""
478 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
479 p = self.index.p
479 p = self.index.p
480 p.loadindex()
480 p.loadindex()
481 self.nodemap = p.map
481 self.nodemap = p.map
482
482
483 def _loadmap(self):
483 def _loadmap(self):
484 """loads the map from the lazy parser"""
484 """loads the map from the lazy parser"""
485 if isinstance(self.nodemap, lazymap):
485 if isinstance(self.nodemap, lazymap):
486 self.nodemap.p.loadmap()
486 self.nodemap.p.loadmap()
487 self.nodemap = self.nodemap.p.map
487 self.nodemap = self.nodemap.p.map
488
488
489 def tip(self):
489 def tip(self):
490 return self.node(len(self.index) - 2)
490 return self.node(len(self.index) - 2)
491 def count(self):
491 def count(self):
492 return len(self.index) - 1
492 return len(self.index) - 1
493
493
494 def rev(self, node):
494 def rev(self, node):
495 try:
495 try:
496 return self.nodemap[node]
496 return self.nodemap[node]
497 except KeyError:
497 except KeyError:
498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
499 def node(self, rev):
499 def node(self, rev):
500 return self.index[rev][7]
500 return self.index[rev][7]
501 def linkrev(self, node):
501 def linkrev(self, node):
502 return self.index[self.rev(node)][4]
502 return self.index[self.rev(node)][4]
503 def parents(self, node):
503 def parents(self, node):
504 d = self.index[self.rev(node)][5:7]
504 d = self.index[self.rev(node)][5:7]
505 return (self.node(d[0]), self.node(d[1]))
505 return (self.node(d[0]), self.node(d[1]))
506 def parentrevs(self, rev):
506 def parentrevs(self, rev):
507 return self.index[rev][5:7]
507 return self.index[rev][5:7]
508 def start(self, rev):
508 def start(self, rev):
509 return getoffset(self.index[rev][0])
509 return getoffset(self.index[rev][0])
510 def end(self, rev):
510 def end(self, rev):
511 return self.start(rev) + self.length(rev)
511 return self.start(rev) + self.length(rev)
512 def length(self, rev):
512 def length(self, rev):
513 return self.index[rev][1]
513 return self.index[rev][1]
514 def base(self, rev):
514 def base(self, rev):
515 return self.index[rev][3]
515 return self.index[rev][3]
516
516
517 def size(self, rev):
517 def size(self, rev):
518 """return the length of the uncompressed text for a given revision"""
518 """return the length of the uncompressed text for a given revision"""
519 l = self.index[rev][2]
519 l = self.index[rev][2]
520 if l >= 0:
520 if l >= 0:
521 return l
521 return l
522
522
523 t = self.revision(self.node(rev))
523 t = self.revision(self.node(rev))
524 return len(t)
524 return len(t)
525
525
526 # alternate implementation, The advantage to this code is it
526 # alternate implementation, The advantage to this code is it
527 # will be faster for a single revision. But, the results are not
527 # will be faster for a single revision. But, the results are not
528 # cached, so finding the size of every revision will be slower.
528 # cached, so finding the size of every revision will be slower.
529 """
529 """
530 if self.cache and self.cache[1] == rev:
530 if self.cache and self.cache[1] == rev:
531 return len(self.cache[2])
531 return len(self.cache[2])
532
532
533 base = self.base(rev)
533 base = self.base(rev)
534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
535 base = self.cache[1]
535 base = self.cache[1]
536 text = self.cache[2]
536 text = self.cache[2]
537 else:
537 else:
538 text = self.revision(self.node(base))
538 text = self.revision(self.node(base))
539
539
540 l = len(text)
540 l = len(text)
541 for x in xrange(base + 1, rev + 1):
541 for x in xrange(base + 1, rev + 1):
542 l = mdiff.patchedsize(l, self.chunk(x))
542 l = mdiff.patchedsize(l, self.chunk(x))
543 return l
543 return l
544 """
544 """
545
545
546 def reachable(self, node, stop=None):
546 def reachable(self, node, stop=None):
547 """return a hash of all nodes ancestral to a given node, including
547 """return a hash of all nodes ancestral to a given node, including
548 the node itself, stopping when stop is matched"""
548 the node itself, stopping when stop is matched"""
549 reachable = {}
549 reachable = {}
550 visit = [node]
550 visit = [node]
551 reachable[node] = 1
551 reachable[node] = 1
552 if stop:
552 if stop:
553 stopn = self.rev(stop)
553 stopn = self.rev(stop)
554 else:
554 else:
555 stopn = 0
555 stopn = 0
556 while visit:
556 while visit:
557 n = visit.pop(0)
557 n = visit.pop(0)
558 if n == stop:
558 if n == stop:
559 continue
559 continue
560 if n == nullid:
560 if n == nullid:
561 continue
561 continue
562 for p in self.parents(n):
562 for p in self.parents(n):
563 if self.rev(p) < stopn:
563 if self.rev(p) < stopn:
564 continue
564 continue
565 if p not in reachable:
565 if p not in reachable:
566 reachable[p] = 1
566 reachable[p] = 1
567 visit.append(p)
567 visit.append(p)
568 return reachable
568 return reachable
569
569
570 def nodesbetween(self, roots=None, heads=None):
570 def nodesbetween(self, roots=None, heads=None):
571 """Return a tuple containing three elements. Elements 1 and 2 contain
571 """Return a tuple containing three elements. Elements 1 and 2 contain
572 a final list bases and heads after all the unreachable ones have been
572 a final list bases and heads after all the unreachable ones have been
573 pruned. Element 0 contains a topologically sorted list of all
573 pruned. Element 0 contains a topologically sorted list of all
574
574
575 nodes that satisfy these constraints:
575 nodes that satisfy these constraints:
576 1. All nodes must be descended from a node in roots (the nodes on
576 1. All nodes must be descended from a node in roots (the nodes on
577 roots are considered descended from themselves).
577 roots are considered descended from themselves).
578 2. All nodes must also be ancestors of a node in heads (the nodes in
578 2. All nodes must also be ancestors of a node in heads (the nodes in
579 heads are considered to be their own ancestors).
579 heads are considered to be their own ancestors).
580
580
581 If roots is unspecified, nullid is assumed as the only root.
581 If roots is unspecified, nullid is assumed as the only root.
582 If heads is unspecified, it is taken to be the output of the
582 If heads is unspecified, it is taken to be the output of the
583 heads method (i.e. a list of all nodes in the repository that
583 heads method (i.e. a list of all nodes in the repository that
584 have no children)."""
584 have no children)."""
585 nonodes = ([], [], [])
585 nonodes = ([], [], [])
586 if roots is not None:
586 if roots is not None:
587 roots = list(roots)
587 roots = list(roots)
588 if not roots:
588 if not roots:
589 return nonodes
589 return nonodes
590 lowestrev = min([self.rev(n) for n in roots])
590 lowestrev = min([self.rev(n) for n in roots])
591 else:
591 else:
592 roots = [nullid] # Everybody's a descendent of nullid
592 roots = [nullid] # Everybody's a descendent of nullid
593 lowestrev = nullrev
593 lowestrev = nullrev
594 if (lowestrev == nullrev) and (heads is None):
594 if (lowestrev == nullrev) and (heads is None):
595 # We want _all_ the nodes!
595 # We want _all_ the nodes!
596 return ([self.node(r) for r in xrange(0, self.count())],
596 return ([self.node(r) for r in xrange(0, self.count())],
597 [nullid], list(self.heads()))
597 [nullid], list(self.heads()))
598 if heads is None:
598 if heads is None:
599 # All nodes are ancestors, so the latest ancestor is the last
599 # All nodes are ancestors, so the latest ancestor is the last
600 # node.
600 # node.
601 highestrev = self.count() - 1
601 highestrev = self.count() - 1
602 # Set ancestors to None to signal that every node is an ancestor.
602 # Set ancestors to None to signal that every node is an ancestor.
603 ancestors = None
603 ancestors = None
604 # Set heads to an empty dictionary for later discovery of heads
604 # Set heads to an empty dictionary for later discovery of heads
605 heads = {}
605 heads = {}
606 else:
606 else:
607 heads = list(heads)
607 heads = list(heads)
608 if not heads:
608 if not heads:
609 return nonodes
609 return nonodes
610 ancestors = {}
610 ancestors = {}
611 # Turn heads into a dictionary so we can remove 'fake' heads.
611 # Turn heads into a dictionary so we can remove 'fake' heads.
612 # Also, later we will be using it to filter out the heads we can't
612 # Also, later we will be using it to filter out the heads we can't
613 # find from roots.
613 # find from roots.
614 heads = dict.fromkeys(heads, 0)
614 heads = dict.fromkeys(heads, 0)
615 # Start at the top and keep marking parents until we're done.
615 # Start at the top and keep marking parents until we're done.
616 nodestotag = heads.keys()
616 nodestotag = heads.keys()
617 # Remember where the top was so we can use it as a limit later.
617 # Remember where the top was so we can use it as a limit later.
618 highestrev = max([self.rev(n) for n in nodestotag])
618 highestrev = max([self.rev(n) for n in nodestotag])
619 while nodestotag:
619 while nodestotag:
620 # grab a node to tag
620 # grab a node to tag
621 n = nodestotag.pop()
621 n = nodestotag.pop()
622 # Never tag nullid
622 # Never tag nullid
623 if n == nullid:
623 if n == nullid:
624 continue
624 continue
625 # A node's revision number represents its place in a
625 # A node's revision number represents its place in a
626 # topologically sorted list of nodes.
626 # topologically sorted list of nodes.
627 r = self.rev(n)
627 r = self.rev(n)
628 if r >= lowestrev:
628 if r >= lowestrev:
629 if n not in ancestors:
629 if n not in ancestors:
630 # If we are possibly a descendent of one of the roots
630 # If we are possibly a descendent of one of the roots
631 # and we haven't already been marked as an ancestor
631 # and we haven't already been marked as an ancestor
632 ancestors[n] = 1 # Mark as ancestor
632 ancestors[n] = 1 # Mark as ancestor
633 # Add non-nullid parents to list of nodes to tag.
633 # Add non-nullid parents to list of nodes to tag.
634 nodestotag.extend([p for p in self.parents(n) if
634 nodestotag.extend([p for p in self.parents(n) if
635 p != nullid])
635 p != nullid])
636 elif n in heads: # We've seen it before, is it a fake head?
636 elif n in heads: # We've seen it before, is it a fake head?
637 # So it is, real heads should not be the ancestors of
637 # So it is, real heads should not be the ancestors of
638 # any other heads.
638 # any other heads.
639 heads.pop(n)
639 heads.pop(n)
640 if not ancestors:
640 if not ancestors:
641 return nonodes
641 return nonodes
642 # Now that we have our set of ancestors, we want to remove any
642 # Now that we have our set of ancestors, we want to remove any
643 # roots that are not ancestors.
643 # roots that are not ancestors.
644
644
645 # If one of the roots was nullid, everything is included anyway.
645 # If one of the roots was nullid, everything is included anyway.
646 if lowestrev > nullrev:
646 if lowestrev > nullrev:
647 # But, since we weren't, let's recompute the lowest rev to not
647 # But, since we weren't, let's recompute the lowest rev to not
648 # include roots that aren't ancestors.
648 # include roots that aren't ancestors.
649
649
650 # Filter out roots that aren't ancestors of heads
650 # Filter out roots that aren't ancestors of heads
651 roots = [n for n in roots if n in ancestors]
651 roots = [n for n in roots if n in ancestors]
652 # Recompute the lowest revision
652 # Recompute the lowest revision
653 if roots:
653 if roots:
654 lowestrev = min([self.rev(n) for n in roots])
654 lowestrev = min([self.rev(n) for n in roots])
655 else:
655 else:
656 # No more roots? Return empty list
656 # No more roots? Return empty list
657 return nonodes
657 return nonodes
658 else:
658 else:
659 # We are descending from nullid, and don't need to care about
659 # We are descending from nullid, and don't need to care about
660 # any other roots.
660 # any other roots.
661 lowestrev = nullrev
661 lowestrev = nullrev
662 roots = [nullid]
662 roots = [nullid]
663 # Transform our roots list into a 'set' (i.e. a dictionary where the
663 # Transform our roots list into a 'set' (i.e. a dictionary where the
664 # values don't matter.
664 # values don't matter.
665 descendents = dict.fromkeys(roots, 1)
665 descendents = dict.fromkeys(roots, 1)
666 # Also, keep the original roots so we can filter out roots that aren't
666 # Also, keep the original roots so we can filter out roots that aren't
667 # 'real' roots (i.e. are descended from other roots).
667 # 'real' roots (i.e. are descended from other roots).
668 roots = descendents.copy()
668 roots = descendents.copy()
669 # Our topologically sorted list of output nodes.
669 # Our topologically sorted list of output nodes.
670 orderedout = []
670 orderedout = []
671 # Don't start at nullid since we don't want nullid in our output list,
671 # Don't start at nullid since we don't want nullid in our output list,
672 # and if nullid shows up in descedents, empty parents will look like
672 # and if nullid shows up in descedents, empty parents will look like
673 # they're descendents.
673 # they're descendents.
674 for r in xrange(max(lowestrev, 0), highestrev + 1):
674 for r in xrange(max(lowestrev, 0), highestrev + 1):
675 n = self.node(r)
675 n = self.node(r)
676 isdescendent = False
676 isdescendent = False
677 if lowestrev == nullrev: # Everybody is a descendent of nullid
677 if lowestrev == nullrev: # Everybody is a descendent of nullid
678 isdescendent = True
678 isdescendent = True
679 elif n in descendents:
679 elif n in descendents:
680 # n is already a descendent
680 # n is already a descendent
681 isdescendent = True
681 isdescendent = True
682 # This check only needs to be done here because all the roots
682 # This check only needs to be done here because all the roots
683 # will start being marked is descendents before the loop.
683 # will start being marked is descendents before the loop.
684 if n in roots:
684 if n in roots:
685 # If n was a root, check if it's a 'real' root.
685 # If n was a root, check if it's a 'real' root.
686 p = tuple(self.parents(n))
686 p = tuple(self.parents(n))
687 # If any of its parents are descendents, it's not a root.
687 # If any of its parents are descendents, it's not a root.
688 if (p[0] in descendents) or (p[1] in descendents):
688 if (p[0] in descendents) or (p[1] in descendents):
689 roots.pop(n)
689 roots.pop(n)
690 else:
690 else:
691 p = tuple(self.parents(n))
691 p = tuple(self.parents(n))
692 # A node is a descendent if either of its parents are
692 # A node is a descendent if either of its parents are
693 # descendents. (We seeded the dependents list with the roots
693 # descendents. (We seeded the dependents list with the roots
694 # up there, remember?)
694 # up there, remember?)
695 if (p[0] in descendents) or (p[1] in descendents):
695 if (p[0] in descendents) or (p[1] in descendents):
696 descendents[n] = 1
696 descendents[n] = 1
697 isdescendent = True
697 isdescendent = True
698 if isdescendent and ((ancestors is None) or (n in ancestors)):
698 if isdescendent and ((ancestors is None) or (n in ancestors)):
699 # Only include nodes that are both descendents and ancestors.
699 # Only include nodes that are both descendents and ancestors.
700 orderedout.append(n)
700 orderedout.append(n)
701 if (ancestors is not None) and (n in heads):
701 if (ancestors is not None) and (n in heads):
702 # We're trying to figure out which heads are reachable
702 # We're trying to figure out which heads are reachable
703 # from roots.
703 # from roots.
704 # Mark this head as having been reached
704 # Mark this head as having been reached
705 heads[n] = 1
705 heads[n] = 1
706 elif ancestors is None:
706 elif ancestors is None:
707 # Otherwise, we're trying to discover the heads.
707 # Otherwise, we're trying to discover the heads.
708 # Assume this is a head because if it isn't, the next step
708 # Assume this is a head because if it isn't, the next step
709 # will eventually remove it.
709 # will eventually remove it.
710 heads[n] = 1
710 heads[n] = 1
711 # But, obviously its parents aren't.
711 # But, obviously its parents aren't.
712 for p in self.parents(n):
712 for p in self.parents(n):
713 heads.pop(p, None)
713 heads.pop(p, None)
714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
715 roots = roots.keys()
715 roots = roots.keys()
716 assert orderedout
716 assert orderedout
717 assert roots
717 assert roots
718 assert heads
718 assert heads
719 return (orderedout, roots, heads)
719 return (orderedout, roots, heads)
720
720
721 def heads(self, start=None, stop=None):
721 def heads(self, start=None, stop=None):
722 """return the list of all nodes that have no children
722 """return the list of all nodes that have no children
723
723
724 if start is specified, only heads that are descendants of
724 if start is specified, only heads that are descendants of
725 start will be returned
725 start will be returned
726 if stop is specified, it will consider all the revs from stop
726 if stop is specified, it will consider all the revs from stop
727 as if they had no children
727 as if they had no children
728 """
728 """
729 if start is None:
729 if start is None:
730 start = nullid
730 start = nullid
731 if stop is None:
731 if stop is None:
732 stop = []
732 stop = []
733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
734 startrev = self.rev(start)
734 startrev = self.rev(start)
735 reachable = {startrev: 1}
735 reachable = {startrev: 1}
736 heads = {startrev: 1}
736 heads = {startrev: 1}
737
737
738 parentrevs = self.parentrevs
738 parentrevs = self.parentrevs
739 for r in xrange(startrev + 1, self.count()):
739 for r in xrange(startrev + 1, self.count()):
740 for p in parentrevs(r):
740 for p in parentrevs(r):
741 if p in reachable:
741 if p in reachable:
742 if r not in stoprevs:
742 if r not in stoprevs:
743 reachable[r] = 1
743 reachable[r] = 1
744 heads[r] = 1
744 heads[r] = 1
745 if p in heads and p not in stoprevs:
745 if p in heads and p not in stoprevs:
746 del heads[p]
746 del heads[p]
747
747
748 return [self.node(r) for r in heads]
748 return [self.node(r) for r in heads]
749
749
750 def children(self, node):
750 def children(self, node):
751 """find the children of a given node"""
751 """find the children of a given node"""
752 c = []
752 c = []
753 p = self.rev(node)
753 p = self.rev(node)
754 for r in range(p + 1, self.count()):
754 for r in range(p + 1, self.count()):
755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
756 if prevs:
756 if prevs:
757 for pr in prevs:
757 for pr in prevs:
758 if pr == p:
758 if pr == p:
759 c.append(self.node(r))
759 c.append(self.node(r))
760 elif p == nullrev:
760 elif p == nullrev:
761 c.append(self.node(r))
761 c.append(self.node(r))
762 return c
762 return c
763
763
764 def _match(self, id):
764 def _match(self, id):
765 if isinstance(id, (long, int)):
765 if isinstance(id, (long, int)):
766 # rev
766 # rev
767 return self.node(id)
767 return self.node(id)
768 if len(id) == 20:
768 if len(id) == 20:
769 # possibly a binary node
769 # possibly a binary node
770 # odds of a binary node being all hex in ASCII are 1 in 10**25
770 # odds of a binary node being all hex in ASCII are 1 in 10**25
771 try:
771 try:
772 node = id
772 node = id
773 r = self.rev(node) # quick search the index
773 r = self.rev(node) # quick search the index
774 return node
774 return node
775 except LookupError:
775 except LookupError:
776 pass # may be partial hex id
776 pass # may be partial hex id
777 try:
777 try:
778 # str(rev)
778 # str(rev)
779 rev = int(id)
779 rev = int(id)
780 if str(rev) != id:
780 if str(rev) != id:
781 raise ValueError
781 raise ValueError
782 if rev < 0:
782 if rev < 0:
783 rev = self.count() + rev
783 rev = self.count() + rev
784 if rev < 0 or rev >= self.count():
784 if rev < 0 or rev >= self.count():
785 raise ValueError
785 raise ValueError
786 return self.node(rev)
786 return self.node(rev)
787 except (ValueError, OverflowError):
787 except (ValueError, OverflowError):
788 pass
788 pass
789 if len(id) == 40:
789 if len(id) == 40:
790 try:
790 try:
791 # a full hex nodeid?
791 # a full hex nodeid?
792 node = bin(id)
792 node = bin(id)
793 r = self.rev(node)
793 r = self.rev(node)
794 return node
794 return node
795 except TypeError:
795 except TypeError:
796 pass
796 pass
797
797
798 def _partialmatch(self, id):
798 def _partialmatch(self, id):
799 if len(id) < 40:
799 if len(id) < 40:
800 try:
800 try:
801 # hex(node)[:...]
801 # hex(node)[:...]
802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 node = None
803 node = None
804 for n in self.nodemap:
804 for n in self.nodemap:
805 if n.startswith(bin_id) and hex(n).startswith(id):
805 if n.startswith(bin_id) and hex(n).startswith(id):
806 if node is not None:
806 if node is not None:
807 raise LookupError(_("Ambiguous identifier"))
807 raise LookupError(_("Ambiguous identifier"))
808 node = n
808 node = n
809 if node is not None:
809 if node is not None:
810 return node
810 return node
811 except TypeError:
811 except TypeError:
812 pass
812 pass
813
813
814 def lookup(self, id):
814 def lookup(self, id):
815 """locate a node based on:
815 """locate a node based on:
816 - revision number or str(revision number)
816 - revision number or str(revision number)
817 - nodeid or subset of hex nodeid
817 - nodeid or subset of hex nodeid
818 """
818 """
819 n = self._match(id)
819 n = self._match(id)
820 if n is not None:
820 if n is not None:
821 return n
821 return n
822 n = self._partialmatch(id)
822 n = self._partialmatch(id)
823 if n:
823 if n:
824 return n
824 return n
825
825
826 raise LookupError(_("No match found"))
826 raise LookupError(_("No match found"))
827
827
828 def cmp(self, node, text):
828 def cmp(self, node, text):
829 """compare text with a given file revision"""
829 """compare text with a given file revision"""
830 p1, p2 = self.parents(node)
830 p1, p2 = self.parents(node)
831 return hash(text, p1, p2) != node
831 return hash(text, p1, p2) != node
832
832
833 def diff(self, a, b):
834 """return a delta between two revisions"""
835 return mdiff.textdiff(a, b)
836
837 def patches(self, t, pl):
838 """apply a list of patches to a string"""
839 return mdiff.patches(t, pl)
840
841 def chunk(self, rev, df=None):
833 def chunk(self, rev, df=None):
842 start, length = self.start(rev), self.length(rev)
834 start, length = self.start(rev), self.length(rev)
843 if self._inline:
835 if self._inline:
844 start += (rev + 1) * self._io.size
836 start += (rev + 1) * self._io.size
845 end = start + length
837 end = start + length
846 def loadcache(df):
838 def loadcache(df):
847 cache_length = max(65536, length)
839 cache_length = max(65536, length)
848 if not df:
840 if not df:
849 if self._inline:
841 if self._inline:
850 df = self.opener(self.indexfile)
842 df = self.opener(self.indexfile)
851 else:
843 else:
852 df = self.opener(self.datafile)
844 df = self.opener(self.datafile)
853 df.seek(start)
845 df.seek(start)
854 self._chunkcache = (start, df.read(cache_length))
846 self._chunkcache = (start, df.read(cache_length))
855
847
856 if not self._chunkcache:
848 if not self._chunkcache:
857 loadcache(df)
849 loadcache(df)
858
850
859 cache_start = self._chunkcache[0]
851 cache_start = self._chunkcache[0]
860 cache_end = cache_start + len(self._chunkcache[1])
852 cache_end = cache_start + len(self._chunkcache[1])
861 if start >= cache_start and end <= cache_end:
853 if start >= cache_start and end <= cache_end:
862 # it is cached
854 # it is cached
863 offset = start - cache_start
855 offset = start - cache_start
864 else:
856 else:
865 loadcache(df)
857 loadcache(df)
866 offset = 0
858 offset = 0
867
859
868 # avoid copying large chunks
860 # avoid copying large chunks
869 c = self._chunkcache[1]
861 c = self._chunkcache[1]
870 if len(c) > length:
862 if len(c) > length:
871 c = c[offset:offset + length]
863 c = c[offset:offset + length]
872
864
873 return decompress(c)
865 return decompress(c)
874
866
875 def delta(self, node):
867 def delta(self, node):
876 """return or calculate a delta between a node and its predecessor"""
868 """return or calculate a delta between a node and its predecessor"""
877 r = self.rev(node)
869 r = self.rev(node)
878 return self.revdiff(r - 1, r)
870 return self.revdiff(r - 1, r)
879
871
880 def revdiff(self, rev1, rev2):
872 def revdiff(self, rev1, rev2):
881 """return or calculate a delta between two revisions"""
873 """return or calculate a delta between two revisions"""
882 b1 = self.base(rev1)
874 b1 = self.base(rev1)
883 b2 = self.base(rev2)
875 b2 = self.base(rev2)
884 if b1 == b2 and rev1 + 1 == rev2:
876 if b1 == b2 and rev1 + 1 == rev2:
885 return self.chunk(rev2)
877 return self.chunk(rev2)
886 else:
878 else:
887 return self.diff(self.revision(self.node(rev1)),
879 return mdiff.textdiff(self.revision(self.node(rev1)),
888 self.revision(self.node(rev2)))
880 self.revision(self.node(rev2)))
889
881
890 def revision(self, node):
882 def revision(self, node):
891 """return an uncompressed revision of a given"""
883 """return an uncompressed revision of a given"""
892 if node == nullid:
884 if node == nullid:
893 return ""
885 return ""
894 if self._cache and self._cache[0] == node:
886 if self._cache and self._cache[0] == node:
895 return self._cache[2]
887 return self._cache[2]
896
888
897 # look up what we need to read
889 # look up what we need to read
898 text = None
890 text = None
899 rev = self.rev(node)
891 rev = self.rev(node)
900 base = self.base(rev)
892 base = self.base(rev)
901
893
902 if self._inline:
894 if self._inline:
903 # we probably have the whole chunk cached
895 # we probably have the whole chunk cached
904 df = None
896 df = None
905 else:
897 else:
906 df = self.opener(self.datafile)
898 df = self.opener(self.datafile)
907
899
908 # do we have useful data cached?
900 # do we have useful data cached?
909 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
901 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
910 base = self._cache[1]
902 base = self._cache[1]
911 text = self._cache[2]
903 text = self._cache[2]
912 self._loadindex(base, rev + 1)
904 self._loadindex(base, rev + 1)
913 else:
905 else:
914 self._loadindex(base, rev + 1)
906 self._loadindex(base, rev + 1)
915 text = self.chunk(base, df=df)
907 text = self.chunk(base, df=df)
916
908
917 bins = []
909 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
918 for r in xrange(base + 1, rev + 1):
910 text = mdiff.patches(text, bins)
919 bins.append(self.chunk(r, df=df))
920
921 text = self.patches(text, bins)
922
923 p1, p2 = self.parents(node)
911 p1, p2 = self.parents(node)
924 if node != hash(text, p1, p2):
912 if node != hash(text, p1, p2):
925 raise RevlogError(_("integrity check failed on %s:%d")
913 raise RevlogError(_("integrity check failed on %s:%d")
926 % (self.datafile, rev))
914 % (self.datafile, rev))
927
915
928 self._cache = (node, rev, text)
916 self._cache = (node, rev, text)
929 return text
917 return text
930
918
931 def checkinlinesize(self, tr, fp=None):
919 def checkinlinesize(self, tr, fp=None):
932 if not self._inline:
920 if not self._inline:
933 return
921 return
934 if not fp:
922 if not fp:
935 fp = self.opener(self.indexfile, 'r')
923 fp = self.opener(self.indexfile, 'r')
936 fp.seek(0, 2)
924 fp.seek(0, 2)
937 size = fp.tell()
925 size = fp.tell()
938 if size < 131072:
926 if size < 131072:
939 return
927 return
940 trinfo = tr.find(self.indexfile)
928 trinfo = tr.find(self.indexfile)
941 if trinfo == None:
929 if trinfo == None:
942 raise RevlogError(_("%s not found in the transaction")
930 raise RevlogError(_("%s not found in the transaction")
943 % self.indexfile)
931 % self.indexfile)
944
932
945 trindex = trinfo[2]
933 trindex = trinfo[2]
946 dataoff = self.start(trindex)
934 dataoff = self.start(trindex)
947
935
948 tr.add(self.datafile, dataoff)
936 tr.add(self.datafile, dataoff)
949 df = self.opener(self.datafile, 'w')
937 df = self.opener(self.datafile, 'w')
950 calc = self._io.size
938 calc = self._io.size
951 for r in xrange(self.count()):
939 for r in xrange(self.count()):
952 start = self.start(r) + (r + 1) * calc
940 start = self.start(r) + (r + 1) * calc
953 length = self.length(r)
941 length = self.length(r)
954 fp.seek(start)
942 fp.seek(start)
955 d = fp.read(length)
943 d = fp.read(length)
956 df.write(d)
944 df.write(d)
957 fp.close()
945 fp.close()
958 df.close()
946 df.close()
959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
947 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 self.version &= ~(REVLOGNGINLINEDATA)
948 self.version &= ~(REVLOGNGINLINEDATA)
961 self._inline = False
949 self._inline = False
962 for i in xrange(self.count()):
950 for i in xrange(self.count()):
963 e = self._io.packentry(self.index[i], self.node, self.version)
951 e = self._io.packentry(self.index[i], self.node, self.version)
964 fp.write(e)
952 fp.write(e)
965
953
966 # if we don't call rename, the temp file will never replace the
954 # if we don't call rename, the temp file will never replace the
967 # real index
955 # real index
968 fp.rename()
956 fp.rename()
969
957
970 tr.replace(self.indexfile, trindex * calc)
958 tr.replace(self.indexfile, trindex * calc)
971 self._chunkcache = None
959 self._chunkcache = None
972
960
973 def addrevision(self, text, transaction, link, p1, p2, d=None):
961 def addrevision(self, text, transaction, link, p1, p2, d=None):
974 """add a revision to the log
962 """add a revision to the log
975
963
976 text - the revision data to add
964 text - the revision data to add
977 transaction - the transaction object used for rollback
965 transaction - the transaction object used for rollback
978 link - the linkrev data to add
966 link - the linkrev data to add
979 p1, p2 - the parent nodeids of the revision
967 p1, p2 - the parent nodeids of the revision
980 d - an optional precomputed delta
968 d - an optional precomputed delta
981 """
969 """
982 dfh = None
970 dfh = None
983 if not self._inline:
971 if not self._inline:
984 dfh = self.opener(self.datafile, "a")
972 dfh = self.opener(self.datafile, "a")
985 ifh = self.opener(self.indexfile, "a+")
973 ifh = self.opener(self.indexfile, "a+")
986 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
974 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
987
975
988 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
976 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
989 node = hash(text, p1, p2)
977 node = hash(text, p1, p2)
990 if node in self.nodemap:
978 if node in self.nodemap:
991 return node
979 return node
992
980
993 curr = self.count()
981 curr = self.count()
994 prev = curr - 1
982 prev = curr - 1
995 base = self.base(prev)
983 base = self.base(prev)
996 offset = self.end(prev)
984 offset = self.end(prev)
997
985
998 if curr:
986 if curr:
999 if not d:
987 if not d:
1000 ptext = self.revision(self.node(prev))
988 ptext = self.revision(self.node(prev))
1001 d = self.diff(ptext, text)
989 d = mdiff.textdiff(ptext, text)
1002 data = compress(d)
990 data = compress(d)
1003 l = len(data[1]) + len(data[0])
991 l = len(data[1]) + len(data[0])
1004 dist = l + offset - self.start(base)
992 dist = l + offset - self.start(base)
1005
993
1006 # full versions are inserted when the needed deltas
994 # full versions are inserted when the needed deltas
1007 # become comparable to the uncompressed text
995 # become comparable to the uncompressed text
1008 if not curr or dist > len(text) * 2:
996 if not curr or dist > len(text) * 2:
1009 data = compress(text)
997 data = compress(text)
1010 l = len(data[1]) + len(data[0])
998 l = len(data[1]) + len(data[0])
1011 base = curr
999 base = curr
1012
1000
1013 e = (offset_type(offset, 0), l, len(text),
1001 e = (offset_type(offset, 0), l, len(text),
1014 base, link, self.rev(p1), self.rev(p2), node)
1002 base, link, self.rev(p1), self.rev(p2), node)
1015 self.index.insert(-1, e)
1003 self.index.insert(-1, e)
1016 self.nodemap[node] = curr
1004 self.nodemap[node] = curr
1017
1005
1018 entry = self._io.packentry(e, self.node, self.version)
1006 entry = self._io.packentry(e, self.node, self.version)
1019 if not self._inline:
1007 if not self._inline:
1020 transaction.add(self.datafile, offset)
1008 transaction.add(self.datafile, offset)
1021 transaction.add(self.indexfile, curr * len(entry))
1009 transaction.add(self.indexfile, curr * len(entry))
1022 if data[0]:
1010 if data[0]:
1023 dfh.write(data[0])
1011 dfh.write(data[0])
1024 dfh.write(data[1])
1012 dfh.write(data[1])
1025 dfh.flush()
1013 dfh.flush()
1026 ifh.write(entry)
1014 ifh.write(entry)
1027 else:
1015 else:
1028 ifh.seek(0, 2)
1016 ifh.seek(0, 2)
1029 transaction.add(self.indexfile, ifh.tell(), prev)
1017 transaction.add(self.indexfile, ifh.tell(), prev)
1030 ifh.write(entry)
1018 ifh.write(entry)
1031 ifh.write(data[0])
1019 ifh.write(data[0])
1032 ifh.write(data[1])
1020 ifh.write(data[1])
1033 self.checkinlinesize(transaction, ifh)
1021 self.checkinlinesize(transaction, ifh)
1034
1022
1035 self._cache = (node, curr, text)
1023 self._cache = (node, curr, text)
1036 return node
1024 return node
1037
1025
1038 def ancestor(self, a, b):
1026 def ancestor(self, a, b):
1039 """calculate the least common ancestor of nodes a and b"""
1027 """calculate the least common ancestor of nodes a and b"""
1040
1028
1041 def parents(rev):
1029 def parents(rev):
1042 return [p for p in self.parentrevs(rev) if p != nullrev]
1030 return [p for p in self.parentrevs(rev) if p != nullrev]
1043
1031
1044 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1032 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1045 if c is None:
1033 if c is None:
1046 return nullid
1034 return nullid
1047
1035
1048 return self.node(c)
1036 return self.node(c)
1049
1037
1050 def group(self, nodelist, lookup, infocollect=None):
1038 def group(self, nodelist, lookup, infocollect=None):
1051 """calculate a delta group
1039 """calculate a delta group
1052
1040
1053 Given a list of changeset revs, return a set of deltas and
1041 Given a list of changeset revs, return a set of deltas and
1054 metadata corresponding to nodes. the first delta is
1042 metadata corresponding to nodes. the first delta is
1055 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1043 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1056 have this parent as it has all history before these
1044 have this parent as it has all history before these
1057 changesets. parent is parent[0]
1045 changesets. parent is parent[0]
1058 """
1046 """
1059 revs = [self.rev(n) for n in nodelist]
1047 revs = [self.rev(n) for n in nodelist]
1060
1048
1061 # if we don't have any revisions touched by these changesets, bail
1049 # if we don't have any revisions touched by these changesets, bail
1062 if not revs:
1050 if not revs:
1063 yield changegroup.closechunk()
1051 yield changegroup.closechunk()
1064 return
1052 return
1065
1053
1066 # add the parent of the first rev
1054 # add the parent of the first rev
1067 p = self.parents(self.node(revs[0]))[0]
1055 p = self.parents(self.node(revs[0]))[0]
1068 revs.insert(0, self.rev(p))
1056 revs.insert(0, self.rev(p))
1069
1057
1070 # build deltas
1058 # build deltas
1071 for d in xrange(0, len(revs) - 1):
1059 for d in xrange(0, len(revs) - 1):
1072 a, b = revs[d], revs[d + 1]
1060 a, b = revs[d], revs[d + 1]
1073 nb = self.node(b)
1061 nb = self.node(b)
1074
1062
1075 if infocollect is not None:
1063 if infocollect is not None:
1076 infocollect(nb)
1064 infocollect(nb)
1077
1065
1078 d = self.revdiff(a, b)
1066 d = self.revdiff(a, b)
1079 p = self.parents(nb)
1067 p = self.parents(nb)
1080 meta = nb + p[0] + p[1] + lookup(nb)
1068 meta = nb + p[0] + p[1] + lookup(nb)
1081 yield changegroup.genchunk("%s%s" % (meta, d))
1069 yield changegroup.genchunk("%s%s" % (meta, d))
1082
1070
1083 yield changegroup.closechunk()
1071 yield changegroup.closechunk()
1084
1072
1085 def addgroup(self, revs, linkmapper, transaction, unique=0):
1073 def addgroup(self, revs, linkmapper, transaction, unique=0):
1086 """
1074 """
1087 add a delta group
1075 add a delta group
1088
1076
1089 given a set of deltas, add them to the revision log. the
1077 given a set of deltas, add them to the revision log. the
1090 first delta is against its parent, which should be in our
1078 first delta is against its parent, which should be in our
1091 log, the rest are against the previous delta.
1079 log, the rest are against the previous delta.
1092 """
1080 """
1093
1081
1094 #track the base of the current delta log
1082 #track the base of the current delta log
1095 r = self.count()
1083 r = self.count()
1096 t = r - 1
1084 t = r - 1
1097 node = None
1085 node = None
1098
1086
1099 base = prev = nullrev
1087 base = prev = nullrev
1100 start = end = textlen = 0
1088 start = end = textlen = 0
1101 if r:
1089 if r:
1102 end = self.end(t)
1090 end = self.end(t)
1103
1091
1104 ifh = self.opener(self.indexfile, "a+")
1092 ifh = self.opener(self.indexfile, "a+")
1105 ifh.seek(0, 2)
1093 ifh.seek(0, 2)
1106 transaction.add(self.indexfile, ifh.tell(), self.count())
1094 transaction.add(self.indexfile, ifh.tell(), self.count())
1107 if self._inline:
1095 if self._inline:
1108 dfh = None
1096 dfh = None
1109 else:
1097 else:
1110 transaction.add(self.datafile, end)
1098 transaction.add(self.datafile, end)
1111 dfh = self.opener(self.datafile, "a")
1099 dfh = self.opener(self.datafile, "a")
1112
1100
1113 # loop through our set of deltas
1101 # loop through our set of deltas
1114 chain = None
1102 chain = None
1115 for chunk in revs:
1103 for chunk in revs:
1116 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1104 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1117 link = linkmapper(cs)
1105 link = linkmapper(cs)
1118 if node in self.nodemap:
1106 if node in self.nodemap:
1119 # this can happen if two branches make the same change
1107 # this can happen if two branches make the same change
1120 # if unique:
1108 # if unique:
1121 # raise RevlogError(_("already have %s") % hex(node[:4]))
1109 # raise RevlogError(_("already have %s") % hex(node[:4]))
1122 chain = node
1110 chain = node
1123 continue
1111 continue
1124 delta = chunk[80:]
1112 delta = chunk[80:]
1125
1113
1126 for p in (p1, p2):
1114 for p in (p1, p2):
1127 if not p in self.nodemap:
1115 if not p in self.nodemap:
1128 raise LookupError(_("unknown parent %s") % short(p))
1116 raise LookupError(_("unknown parent %s") % short(p))
1129
1117
1130 if not chain:
1118 if not chain:
1131 # retrieve the parent revision of the delta chain
1119 # retrieve the parent revision of the delta chain
1132 chain = p1
1120 chain = p1
1133 if not chain in self.nodemap:
1121 if not chain in self.nodemap:
1134 raise LookupError(_("unknown base %s") % short(chain[:4]))
1122 raise LookupError(_("unknown base %s") % short(chain[:4]))
1135
1123
1136 # full versions are inserted when the needed deltas become
1124 # full versions are inserted when the needed deltas become
1137 # comparable to the uncompressed text or when the previous
1125 # comparable to the uncompressed text or when the previous
1138 # version is not the one we have a delta against. We use
1126 # version is not the one we have a delta against. We use
1139 # the size of the previous full rev as a proxy for the
1127 # the size of the previous full rev as a proxy for the
1140 # current size.
1128 # current size.
1141
1129
1142 if chain == prev:
1130 if chain == prev:
1143 tempd = compress(delta)
1131 tempd = compress(delta)
1144 cdelta = tempd[0] + tempd[1]
1132 cdelta = tempd[0] + tempd[1]
1145 textlen = mdiff.patchedsize(textlen, delta)
1133 textlen = mdiff.patchedsize(textlen, delta)
1146
1134
1147 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1135 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1148 # flush our writes here so we can read it in revision
1136 # flush our writes here so we can read it in revision
1149 if dfh:
1137 if dfh:
1150 dfh.flush()
1138 dfh.flush()
1151 ifh.flush()
1139 ifh.flush()
1152 text = self.revision(chain)
1140 text = self.revision(chain)
1153 text = self.patches(text, [delta])
1141 text = mdiff.patches(text, [delta])
1154 chk = self._addrevision(text, transaction, link, p1, p2, None,
1142 chk = self._addrevision(text, transaction, link, p1, p2, None,
1155 ifh, dfh)
1143 ifh, dfh)
1156 if not dfh and not self._inline:
1144 if not dfh and not self._inline:
1157 # addrevision switched from inline to conventional
1145 # addrevision switched from inline to conventional
1158 # reopen the index
1146 # reopen the index
1159 dfh = self.opener(self.datafile, "a")
1147 dfh = self.opener(self.datafile, "a")
1160 ifh = self.opener(self.indexfile, "a")
1148 ifh = self.opener(self.indexfile, "a")
1161 if chk != node:
1149 if chk != node:
1162 raise RevlogError(_("consistency error adding group"))
1150 raise RevlogError(_("consistency error adding group"))
1163 textlen = len(text)
1151 textlen = len(text)
1164 else:
1152 else:
1165 e = (offset_type(end, 0), len(cdelta), textlen, base,
1153 e = (offset_type(end, 0), len(cdelta), textlen, base,
1166 link, self.rev(p1), self.rev(p2), node)
1154 link, self.rev(p1), self.rev(p2), node)
1167 self.index.insert(-1, e)
1155 self.index.insert(-1, e)
1168 self.nodemap[node] = r
1156 self.nodemap[node] = r
1169 entry = self._io.packentry(e, self.node, self.version)
1157 entry = self._io.packentry(e, self.node, self.version)
1170 if self._inline:
1158 if self._inline:
1171 ifh.write(entry)
1159 ifh.write(entry)
1172 ifh.write(cdelta)
1160 ifh.write(cdelta)
1173 self.checkinlinesize(transaction, ifh)
1161 self.checkinlinesize(transaction, ifh)
1174 if not self._inline:
1162 if not self._inline:
1175 dfh = self.opener(self.datafile, "a")
1163 dfh = self.opener(self.datafile, "a")
1176 ifh = self.opener(self.indexfile, "a")
1164 ifh = self.opener(self.indexfile, "a")
1177 else:
1165 else:
1178 dfh.write(cdelta)
1166 dfh.write(cdelta)
1179 ifh.write(entry)
1167 ifh.write(entry)
1180
1168
1181 t, r, chain, prev = r, r + 1, node, node
1169 t, r, chain, prev = r, r + 1, node, node
1182 base = self.base(t)
1170 base = self.base(t)
1183 start = self.start(base)
1171 start = self.start(base)
1184 end = self.end(t)
1172 end = self.end(t)
1185
1173
1186 return node
1174 return node
1187
1175
1188 def strip(self, rev, minlink):
1176 def strip(self, rev, minlink):
1189 if self.count() == 0 or rev >= self.count():
1177 if self.count() == 0 or rev >= self.count():
1190 return
1178 return
1191
1179
1192 if isinstance(self.index, lazyindex):
1180 if isinstance(self.index, lazyindex):
1193 self._loadindexmap()
1181 self._loadindexmap()
1194
1182
1195 # When stripping away a revision, we need to make sure it
1183 # When stripping away a revision, we need to make sure it
1196 # does not actually belong to an older changeset.
1184 # does not actually belong to an older changeset.
1197 # The minlink parameter defines the oldest revision
1185 # The minlink parameter defines the oldest revision
1198 # we're allowed to strip away.
1186 # we're allowed to strip away.
1199 while minlink > self.index[rev][4]:
1187 while minlink > self.index[rev][4]:
1200 rev += 1
1188 rev += 1
1201 if rev >= self.count():
1189 if rev >= self.count():
1202 return
1190 return
1203
1191
1204 # first truncate the files on disk
1192 # first truncate the files on disk
1205 end = self.start(rev)
1193 end = self.start(rev)
1206 if not self._inline:
1194 if not self._inline:
1207 df = self.opener(self.datafile, "a")
1195 df = self.opener(self.datafile, "a")
1208 df.truncate(end)
1196 df.truncate(end)
1209 end = rev * self._io.size
1197 end = rev * self._io.size
1210 else:
1198 else:
1211 end += rev * self._io.size
1199 end += rev * self._io.size
1212
1200
1213 indexf = self.opener(self.indexfile, "a")
1201 indexf = self.opener(self.indexfile, "a")
1214 indexf.truncate(end)
1202 indexf.truncate(end)
1215
1203
1216 # then reset internal state in memory to forget those revisions
1204 # then reset internal state in memory to forget those revisions
1217 self._cache = None
1205 self._cache = None
1218 self._chunkcache = None
1206 self._chunkcache = None
1219 for x in xrange(rev, self.count()):
1207 for x in xrange(rev, self.count()):
1220 del self.nodemap[self.node(x)]
1208 del self.nodemap[self.node(x)]
1221
1209
1222 del self.index[rev:-1]
1210 del self.index[rev:-1]
1223
1211
1224 def checksize(self):
1212 def checksize(self):
1225 expected = 0
1213 expected = 0
1226 if self.count():
1214 if self.count():
1227 expected = self.end(self.count() - 1)
1215 expected = self.end(self.count() - 1)
1228
1216
1229 try:
1217 try:
1230 f = self.opener(self.datafile)
1218 f = self.opener(self.datafile)
1231 f.seek(0, 2)
1219 f.seek(0, 2)
1232 actual = f.tell()
1220 actual = f.tell()
1233 dd = actual - expected
1221 dd = actual - expected
1234 except IOError, inst:
1222 except IOError, inst:
1235 if inst.errno != errno.ENOENT:
1223 if inst.errno != errno.ENOENT:
1236 raise
1224 raise
1237 dd = 0
1225 dd = 0
1238
1226
1239 try:
1227 try:
1240 f = self.opener(self.indexfile)
1228 f = self.opener(self.indexfile)
1241 f.seek(0, 2)
1229 f.seek(0, 2)
1242 actual = f.tell()
1230 actual = f.tell()
1243 s = self._io.size
1231 s = self._io.size
1244 i = actual / s
1232 i = actual / s
1245 di = actual - (i * s)
1233 di = actual - (i * s)
1246 if self._inline:
1234 if self._inline:
1247 databytes = 0
1235 databytes = 0
1248 for r in xrange(self.count()):
1236 for r in xrange(self.count()):
1249 databytes += self.length(r)
1237 databytes += self.length(r)
1250 dd = 0
1238 dd = 0
1251 di = actual - self.count() * s - databytes
1239 di = actual - self.count() * s - databytes
1252 except IOError, inst:
1240 except IOError, inst:
1253 if inst.errno != errno.ENOENT:
1241 if inst.errno != errno.ENOENT:
1254 raise
1242 raise
1255 di = 0
1243 di = 0
1256
1244
1257 return (dd, di)
1245 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now