##// END OF EJS Templates
revlog: speed up chunkcache...
Matt Mackall -
r4988:14486eea default
parent child Browse files
Show More
@@ -1,251 +1,251 b''
1 1 """
2 2 bundlerepo.py - repository class for viewing uncompressed bundles
3 3
4 4 This provides a read-only repository interface to bundles as if
5 5 they were part of the actual repository.
6 6
7 7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import changegroup, util, os, struct, bz2, tempfile
16 16
17 17 import localrepo, changelog, manifest, filelog, revlog
18 18
19 19 class bundlerevlog(revlog.revlog):
20 20 def __init__(self, opener, indexfile, bundlefile,
21 21 linkmapper=None):
22 22 # How it works:
23 23 # to retrieve a revision, we need to know the offset of
24 24 # the revision in the bundlefile (an opened file).
25 25 #
26 26 # We store this offset in the index (start), to differentiate a
27 27 # rev in the bundle and from a rev in the revlog, we check
28 28 # len(index[r]). If the tuple is bigger than 7, it is a bundle
29 29 # (it is bigger since we store the node to which the delta is)
30 30 #
31 31 revlog.revlog.__init__(self, opener, indexfile)
32 32 self.bundlefile = bundlefile
33 33 self.basemap = {}
34 34 def chunkpositer():
35 35 for chunk in changegroup.chunkiter(bundlefile):
36 36 pos = bundlefile.tell()
37 37 yield chunk, pos - len(chunk)
38 38 n = self.count()
39 39 prev = None
40 40 for chunk, start in chunkpositer():
41 41 size = len(chunk)
42 42 if size < 80:
43 43 raise util.Abort("invalid changegroup")
44 44 start += 80
45 45 size -= 80
46 46 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
47 47 if node in self.nodemap:
48 48 prev = node
49 49 continue
50 50 for p in (p1, p2):
51 51 if not p in self.nodemap:
52 52 raise revlog.LookupError(_("unknown parent %s") % short(p1))
53 53 if linkmapper is None:
54 54 link = n
55 55 else:
56 56 link = linkmapper(cs)
57 57
58 58 if not prev:
59 59 prev = p1
60 60 # start, size, base is not used, link, p1, p2, delta ref
61 61 e = (revlog.offset_type(start, 0), size, -1, None, link,
62 62 self.rev(p1), self.rev(p2), node)
63 63 self.basemap[n] = prev
64 64 self.index.insert(-1, e)
65 65 self.nodemap[node] = n
66 66 prev = node
67 67 n += 1
68 68
69 69 def bundle(self, rev):
70 70 """is rev from the bundle"""
71 71 if rev < 0:
72 72 return False
73 73 return rev in self.basemap
74 74 def bundlebase(self, rev): return self.basemap[rev]
75 75 def chunk(self, rev, df=None, cachelen=4096):
76 76 # Warning: in case of bundle, the diff is against bundlebase,
77 77 # not against rev - 1
78 78 # XXX: could use some caching
79 79 if not self.bundle(rev):
80 return revlog.revlog.chunk(self, rev, df, cachelen)
80 return revlog.revlog.chunk(self, rev, df)
81 81 self.bundlefile.seek(self.start(rev))
82 82 return self.bundlefile.read(self.length(rev))
83 83
84 84 def revdiff(self, rev1, rev2):
85 85 """return or calculate a delta between two revisions"""
86 86 if self.bundle(rev1) and self.bundle(rev2):
87 87 # hot path for bundle
88 88 revb = self.rev(self.bundlebase(rev2))
89 89 if revb == rev1:
90 90 return self.chunk(rev2)
91 91 elif not self.bundle(rev1) and not self.bundle(rev2):
92 92 return revlog.revlog.revdiff(self, rev1, rev2)
93 93
94 94 return self.diff(self.revision(self.node(rev1)),
95 95 self.revision(self.node(rev2)))
96 96
97 97 def revision(self, node):
98 98 """return an uncompressed revision of a given"""
99 99 if node == nullid: return ""
100 100
101 101 text = None
102 102 chain = []
103 103 iter_node = node
104 104 rev = self.rev(iter_node)
105 105 # reconstruct the revision if it is from a changegroup
106 106 while self.bundle(rev):
107 107 if self._cache and self._cache[0] == iter_node:
108 108 text = self._cache[2]
109 109 break
110 110 chain.append(rev)
111 111 iter_node = self.bundlebase(rev)
112 112 rev = self.rev(iter_node)
113 113 if text is None:
114 114 text = revlog.revlog.revision(self, iter_node)
115 115
116 116 while chain:
117 117 delta = self.chunk(chain.pop())
118 118 text = self.patches(text, [delta])
119 119
120 120 p1, p2 = self.parents(node)
121 121 if node != revlog.hash(text, p1, p2):
122 122 raise revlog.RevlogError(_("integrity check failed on %s:%d")
123 123 % (self.datafile, self.rev(node)))
124 124
125 125 self._cache = (node, self.rev(node), text)
126 126 return text
127 127
128 128 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 129 raise NotImplementedError
130 130 def addgroup(self, revs, linkmapper, transaction, unique=0):
131 131 raise NotImplementedError
132 132 def strip(self, rev, minlink):
133 133 raise NotImplementedError
134 134 def checksize(self):
135 135 raise NotImplementedError
136 136
137 137 class bundlechangelog(bundlerevlog, changelog.changelog):
138 138 def __init__(self, opener, bundlefile):
139 139 changelog.changelog.__init__(self, opener)
140 140 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
141 141
142 142 class bundlemanifest(bundlerevlog, manifest.manifest):
143 143 def __init__(self, opener, bundlefile, linkmapper):
144 144 manifest.manifest.__init__(self, opener)
145 145 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
146 146 linkmapper)
147 147
148 148 class bundlefilelog(bundlerevlog, filelog.filelog):
149 149 def __init__(self, opener, path, bundlefile, linkmapper):
150 150 filelog.filelog.__init__(self, opener, path)
151 151 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
152 152 linkmapper)
153 153
154 154 class bundlerepository(localrepo.localrepository):
155 155 def __init__(self, ui, path, bundlename):
156 156 localrepo.localrepository.__init__(self, ui, path)
157 157
158 158 self._url = 'bundle:' + bundlename
159 159 if path: self._url += '+' + path
160 160
161 161 self.tempfile = None
162 162 self.bundlefile = open(bundlename, "rb")
163 163 header = self.bundlefile.read(6)
164 164 if not header.startswith("HG"):
165 165 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
166 166 elif not header.startswith("HG10"):
167 167 raise util.Abort(_("%s: unknown bundle version") % bundlename)
168 168 elif header == "HG10BZ":
169 169 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
170 170 suffix=".hg10un", dir=self.path)
171 171 self.tempfile = temp
172 172 fptemp = os.fdopen(fdtemp, 'wb')
173 173 def generator(f):
174 174 zd = bz2.BZ2Decompressor()
175 175 zd.decompress("BZ")
176 176 for chunk in f:
177 177 yield zd.decompress(chunk)
178 178 gen = generator(util.filechunkiter(self.bundlefile, 4096))
179 179
180 180 try:
181 181 fptemp.write("HG10UN")
182 182 for chunk in gen:
183 183 fptemp.write(chunk)
184 184 finally:
185 185 fptemp.close()
186 186 self.bundlefile.close()
187 187
188 188 self.bundlefile = open(self.tempfile, "rb")
189 189 # seek right after the header
190 190 self.bundlefile.seek(6)
191 191 elif header == "HG10UN":
192 192 # nothing to do
193 193 pass
194 194 else:
195 195 raise util.Abort(_("%s: unknown bundle compression type")
196 196 % bundlename)
197 197 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
198 198 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
199 199 self.changelog.rev)
200 200 # dict with the mapping 'filename' -> position in the bundle
201 201 self.bundlefilespos = {}
202 202 while 1:
203 203 f = changegroup.getchunk(self.bundlefile)
204 204 if not f:
205 205 break
206 206 self.bundlefilespos[f] = self.bundlefile.tell()
207 207 for c in changegroup.chunkiter(self.bundlefile):
208 208 pass
209 209
210 210 def url(self):
211 211 return self._url
212 212
213 213 def dev(self):
214 214 return -1
215 215
216 216 def file(self, f):
217 217 if f[0] == '/':
218 218 f = f[1:]
219 219 if f in self.bundlefilespos:
220 220 self.bundlefile.seek(self.bundlefilespos[f])
221 221 return bundlefilelog(self.sopener, f, self.bundlefile,
222 222 self.changelog.rev)
223 223 else:
224 224 return filelog.filelog(self.sopener, f)
225 225
226 226 def close(self):
227 227 """Close assigned bundle file immediately."""
228 228 self.bundlefile.close()
229 229
230 230 def __del__(self):
231 231 bundlefile = getattr(self, 'bundlefile', None)
232 232 if bundlefile and not bundlefile.closed:
233 233 bundlefile.close()
234 234 tempfile = getattr(self, 'tempfile', None)
235 235 if tempfile is not None:
236 236 os.unlink(tempfile)
237 237
238 238 def instance(ui, path, create):
239 239 if create:
240 240 raise util.Abort(_('cannot create new bundle repository'))
241 241 path = util.drop_scheme('file', path)
242 242 if path.startswith('bundle:'):
243 243 path = util.drop_scheme('bundle', path)
244 244 s = path.split("+", 1)
245 245 if len(s) == 1:
246 246 repopath, bundlename = "", s[0]
247 247 else:
248 248 repopath, bundlename = s
249 249 else:
250 250 repopath, bundlename = '', path
251 251 return bundlerepository(ui, repopath, bundlename)
@@ -1,1253 +1,1257 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 if q & 0xFFFF:
33 33 raise RevlogError(_('incompatible revision flag %x') % q)
34 34 return int(q >> 16)
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39 def offset_type(offset, type):
40 40 return long(long(offset) << 16 | type)
41 41
42 42 def hash(text, p1, p2):
43 43 """generate a hash from the given text and its parent hashes
44 44
45 45 This hash combines both the current file contents and its history
46 46 in a manner that makes it easy to distinguish nodes with the same
47 47 content in the revision graph.
48 48 """
49 49 l = [p1, p2]
50 50 l.sort()
51 51 s = sha.new(l[0])
52 52 s.update(l[1])
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56 def compress(text):
57 57 """ generate a possibly-compressed representation of text """
58 58 if not text:
59 59 return ("", text)
60 60 if len(text) < 44:
61 61 if text[0] == '\0':
62 62 return ("", text)
63 63 return ('u', text)
64 64 bin = zlib.compress(text)
65 65 if len(bin) > len(text):
66 66 if text[0] == '\0':
67 67 return ("", text)
68 68 return ('u', text)
69 69 return ("", bin)
70 70
71 71 def decompress(bin):
72 72 """ decompress the given input """
73 73 if not bin:
74 74 return bin
75 75 t = bin[0]
76 76 if t == '\0':
77 77 return bin
78 78 if t == 'x':
79 79 return zlib.decompress(bin)
80 80 if t == 'u':
81 81 return bin[1:]
82 82 raise RevlogError(_("unknown compression type %r") % t)
83 83
84 84 class lazyparser(object):
85 85 """
86 86 this class avoids the need to parse the entirety of large indices
87 87 """
88 88
89 89 # lazyparser is not safe to use on windows if win32 extensions not
90 90 # available. it keeps file handle open, which make it not possible
91 91 # to break hardlinks on local cloned repos.
92 92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 93 hasattr(util, 'win32api'))
94 94
95 95 def __init__(self, dataf, size):
96 96 self.dataf = dataf
97 97 self.s = struct.calcsize(indexformatng)
98 98 self.datasize = size
99 99 self.l = size/self.s
100 100 self.index = [None] * self.l
101 101 self.map = {nullid: nullrev}
102 102 self.allmap = 0
103 103 self.all = 0
104 104 self.mapfind_count = 0
105 105
106 106 def loadmap(self):
107 107 """
108 108 during a commit, we need to make sure the rev being added is
109 109 not a duplicate. This requires loading the entire index,
110 110 which is fairly slow. loadmap can load up just the node map,
111 111 which takes much less time.
112 112 """
113 113 if self.allmap:
114 114 return
115 115 end = self.datasize
116 116 self.allmap = 1
117 117 cur = 0
118 118 count = 0
119 119 blocksize = self.s * 256
120 120 self.dataf.seek(0)
121 121 while cur < end:
122 122 data = self.dataf.read(blocksize)
123 123 off = 0
124 124 for x in xrange(256):
125 125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 126 self.map[n] = count
127 127 count += 1
128 128 if count >= self.l:
129 129 break
130 130 off += self.s
131 131 cur += blocksize
132 132
133 133 def loadblock(self, blockstart, blocksize, data=None):
134 134 if self.all:
135 135 return
136 136 if data is None:
137 137 self.dataf.seek(blockstart)
138 138 if blockstart + blocksize > self.datasize:
139 139 # the revlog may have grown since we've started running,
140 140 # but we don't have space in self.index for more entries.
141 141 # limit blocksize so that we don't get too much data.
142 142 blocksize = max(self.datasize - blockstart, 0)
143 143 data = self.dataf.read(blocksize)
144 144 lend = len(data) / self.s
145 145 i = blockstart / self.s
146 146 off = 0
147 147 # lazyindex supports __delitem__
148 148 if lend > len(self.index) - i:
149 149 lend = len(self.index) - i
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[ngshaoffset:ngshaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap:
161 161 return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 187 # we're searching backwards, so weh have to make sure
188 188 # we don't find a changeset where this node is a parent
189 189 off = data.rfind(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all:
205 205 return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 209 blocksize = (512 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 218 blockstart = (i & ~63) * self.s
219 219 blocksize = self.s * 64
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all:
225 225 self.all = True
226 226
227 227 class lazyindex(object):
228 228 """a lazy version of the index array"""
229 229 def __init__(self, parser):
230 230 self.p = parser
231 231 def __len__(self):
232 232 return len(self.p.index)
233 233 def load(self, pos):
234 234 if pos < 0:
235 235 pos += len(self.p.index)
236 236 self.p.loadindex(pos)
237 237 return self.p.index[pos]
238 238 def __getitem__(self, pos):
239 239 ret = self.p.index[pos] or self.load(pos)
240 240 if isinstance(ret, str):
241 241 ret = struct.unpack(indexformatng, ret)
242 242 return ret
243 243 def __setitem__(self, pos, item):
244 244 self.p.index[pos] = item
245 245 def __delitem__(self, pos):
246 246 del self.p.index[pos]
247 247 def insert(self, pos, e):
248 248 self.p.index.insert(pos, e)
249 249 def append(self, e):
250 250 self.p.index.append(e)
251 251
252 252 class lazymap(object):
253 253 """a lazy version of the node map"""
254 254 def __init__(self, parser):
255 255 self.p = parser
256 256 def load(self, key):
257 257 n = self.p.findnode(key)
258 258 if n == None:
259 259 raise KeyError(key)
260 260 def __contains__(self, key):
261 261 if key in self.p.map:
262 262 return True
263 263 self.p.loadmap()
264 264 return key in self.p.map
265 265 def __iter__(self):
266 266 yield nullid
267 267 for i in xrange(self.p.l):
268 268 ret = self.p.index[i]
269 269 if not ret:
270 270 self.p.loadindex(i)
271 271 ret = self.p.index[i]
272 272 if isinstance(ret, str):
273 273 ret = struct.unpack(indexformatng, ret)
274 274 yield ret[7]
275 275 def __getitem__(self, key):
276 276 try:
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 try:
280 280 self.load(key)
281 281 return self.p.map[key]
282 282 except KeyError:
283 283 raise KeyError("node " + hex(key))
284 284 def __setitem__(self, key, val):
285 285 self.p.map[key] = val
286 286 def __delitem__(self, key):
287 287 del self.p.map[key]
288 288
289 289 indexformatv0 = ">4l20s20s20s"
290 290 v0shaoffset = 56
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.size = struct.calcsize(indexformatv0)
295 295
296 296 def parseindex(self, fp, inline):
297 297 s = self.size
298 298 index = []
299 299 nodemap = {nullid: nullrev}
300 300 n = off = 0
301 301 data = fp.read()
302 302 l = len(data)
303 303 while off + s <= l:
304 304 cur = data[off:off + s]
305 305 off += s
306 306 e = struct.unpack(indexformatv0, cur)
307 307 # transform to revlogv1 format
308 308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 309 nodemap[e[4]], nodemap[e[5]], e[6])
310 310 index.append(e2)
311 311 nodemap[e[6]] = n
312 312 n += 1
313 313
314 314 return index, nodemap, None
315 315
316 316 def packentry(self, entry, node, version):
317 317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 318 node(entry[5]), node(entry[6]), entry[7])
319 319 return struct.pack(indexformatv0, *e2)
320 320
321 321 # index ng:
322 322 # 6 bytes offset
323 323 # 2 bytes flags
324 324 # 4 bytes compressed length
325 325 # 4 bytes uncompressed length
326 326 # 4 bytes: base rev
327 327 # 4 bytes link rev
328 328 # 4 bytes parent 1 rev
329 329 # 4 bytes parent 2 rev
330 330 # 32 bytes: nodeid
331 331 indexformatng = ">Qiiiiii20s12x"
332 332 ngshaoffset = 32
333 333 versionformat = ">I"
334 334
335 335 class revlogio(object):
336 336 def __init__(self):
337 337 self.size = struct.calcsize(indexformatng)
338 338
339 339 def parseindex(self, fp, inline):
340 340 try:
341 341 size = util.fstat(fp).st_size
342 342 except AttributeError:
343 343 size = 0
344 344
345 345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 346 # big index, let's parse it on demand
347 347 parser = lazyparser(fp, size)
348 348 index = lazyindex(parser)
349 349 nodemap = lazymap(parser)
350 350 e = list(index[0])
351 351 type = gettype(e[0])
352 352 e[0] = offset_type(0, type)
353 353 index[0] = e
354 354 return index, nodemap, None
355 355
356 356 s = self.size
357 357 cache = None
358 358 index = []
359 359 nodemap = {nullid: nullrev}
360 360 n = off = 0
361 361 # if we're not using lazymap, always read the whole index
362 362 data = fp.read()
363 363 l = len(data)
364 364 if inline:
365 365 cache = (0, data)
366 366 while off + s <= l:
367 367 e = struct.unpack(indexformatng, data[off:off + s])
368 368 index.append(e)
369 369 nodemap[e[7]] = n
370 370 n += 1
371 371 off += s
372 372 if inline:
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1]
376 376
377 377 e = list(index[0])
378 378 type = gettype(e[0])
379 379 e[0] = offset_type(0, type)
380 380 index[0] = e
381 381
382 382 return index, nodemap, cache
383 383
384 384 def packentry(self, entry, node, version):
385 385 p = struct.pack(indexformatng, *entry)
386 386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
387 387 p = struct.pack(versionformat, version) + p[4:]
388 388 return p
389 389
390 390 class revlog(object):
391 391 """
392 392 the underlying revision storage object
393 393
394 394 A revlog consists of two parts, an index and the revision data.
395 395
396 396 The index is a file with a fixed record size containing
397 397 information on each revision, includings its nodeid (hash), the
398 398 nodeids of its parents, the position and offset of its data within
399 399 the data file, and the revision it's based on. Finally, each entry
400 400 contains a linkrev entry that can serve as a pointer to external
401 401 data.
402 402
403 403 The revision data itself is a linear collection of data chunks.
404 404 Each chunk represents a revision and is usually represented as a
405 405 delta against the previous chunk. To bound lookup time, runs of
406 406 deltas are limited to about 2 times the length of the original
407 407 version data. This makes retrieval of a version proportional to
408 408 its size, or O(1) relative to the number of revisions.
409 409
410 410 Both pieces of the revlog are written to in an append-only
411 411 fashion, which means we never need to rewrite a file to insert or
412 412 remove data, and can use some simple techniques to avoid the need
413 413 for locking while reading.
414 414 """
415 415 def __init__(self, opener, indexfile):
416 416 """
417 417 create a revlog object
418 418
419 419 opener is a function that abstracts the file opening operation
420 420 and can be used to implement COW semantics or the like.
421 421 """
422 422 self.indexfile = indexfile
423 423 self.datafile = indexfile[:-2] + ".d"
424 424 self.opener = opener
425 425 self._cache = None
426 426 self._chunkcache = None
427 427 self.nodemap = {nullid: nullrev}
428 428 self.index = []
429 429
430 430 v = REVLOG_DEFAULT_VERSION
431 431 if hasattr(opener, "defversion"):
432 432 v = opener.defversion
433 433 if v & REVLOGNG:
434 434 v |= REVLOGNGINLINEDATA
435 435
436 436 i = ""
437 437 try:
438 438 f = self.opener(self.indexfile)
439 439 i = f.read(4)
440 440 f.seek(0)
441 441 if len(i) > 0:
442 442 v = struct.unpack(versionformat, i)[0]
443 443 except IOError, inst:
444 444 if inst.errno != errno.ENOENT:
445 445 raise
446 446
447 447 self.version = v
448 448 self._inline = v & REVLOGNGINLINEDATA
449 449 flags = v & ~0xFFFF
450 450 fmt = v & 0xFFFF
451 451 if fmt == REVLOGV0 and flags:
452 452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
453 453 % (self.indexfile, flags >> 16))
454 454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
455 455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
456 456 % (self.indexfile, flags >> 16))
457 457 elif fmt > REVLOGNG:
458 458 raise RevlogError(_("index %s unknown format %d")
459 459 % (self.indexfile, fmt))
460 460
461 461 self._io = revlogio()
462 462 if self.version == REVLOGV0:
463 463 self._io = revlogoldio()
464 464 if i:
465 465 d = self._io.parseindex(f, self._inline)
466 466 self.index, self.nodemap, self._chunkcache = d
467 467
468 468 # add the magic null revision at -1
469 469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 def _loadindex(self, start, end):
472 472 """load a block of indexes all at once from the lazy parser"""
473 473 if isinstance(self.index, lazyindex):
474 474 self.index.p.loadindex(start, end)
475 475
476 476 def _loadindexmap(self):
477 477 """loads both the map and the index from the lazy parser"""
478 478 if isinstance(self.index, lazyindex):
479 479 p = self.index.p
480 480 p.loadindex()
481 481 self.nodemap = p.map
482 482
483 483 def _loadmap(self):
484 484 """loads the map from the lazy parser"""
485 485 if isinstance(self.nodemap, lazymap):
486 486 self.nodemap.p.loadmap()
487 487 self.nodemap = self.nodemap.p.map
488 488
489 489 def tip(self):
490 490 return self.node(len(self.index) - 2)
491 491 def count(self):
492 492 return len(self.index) - 1
493 493
494 494 def rev(self, node):
495 495 try:
496 496 return self.nodemap[node]
497 497 except KeyError:
498 498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
499 499 def node(self, rev):
500 500 return self.index[rev][7]
501 501 def linkrev(self, node):
502 502 return self.index[self.rev(node)][4]
503 503 def parents(self, node):
504 504 d = self.index[self.rev(node)][5:7]
505 505 return (self.node(d[0]), self.node(d[1]))
506 506 def parentrevs(self, rev):
507 507 return self.index[rev][5:7]
508 508 def start(self, rev):
509 509 return getoffset(self.index[rev][0])
510 510 def end(self, rev):
511 511 return self.start(rev) + self.length(rev)
512 512 def length(self, rev):
513 513 return self.index[rev][1]
514 514 def base(self, rev):
515 515 return self.index[rev][3]
516 516
517 517 def size(self, rev):
518 518 """return the length of the uncompressed text for a given revision"""
519 519 l = self.index[rev][2]
520 520 if l >= 0:
521 521 return l
522 522
523 523 t = self.revision(self.node(rev))
524 524 return len(t)
525 525
526 526 # alternate implementation, The advantage to this code is it
527 527 # will be faster for a single revision. But, the results are not
528 528 # cached, so finding the size of every revision will be slower.
529 529 """
530 530 if self.cache and self.cache[1] == rev:
531 531 return len(self.cache[2])
532 532
533 533 base = self.base(rev)
534 534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
535 535 base = self.cache[1]
536 536 text = self.cache[2]
537 537 else:
538 538 text = self.revision(self.node(base))
539 539
540 540 l = len(text)
541 541 for x in xrange(base + 1, rev + 1):
542 542 l = mdiff.patchedsize(l, self.chunk(x))
543 543 return l
544 544 """
545 545
546 546 def reachable(self, node, stop=None):
547 547 """return a hash of all nodes ancestral to a given node, including
548 548 the node itself, stopping when stop is matched"""
549 549 reachable = {}
550 550 visit = [node]
551 551 reachable[node] = 1
552 552 if stop:
553 553 stopn = self.rev(stop)
554 554 else:
555 555 stopn = 0
556 556 while visit:
557 557 n = visit.pop(0)
558 558 if n == stop:
559 559 continue
560 560 if n == nullid:
561 561 continue
562 562 for p in self.parents(n):
563 563 if self.rev(p) < stopn:
564 564 continue
565 565 if p not in reachable:
566 566 reachable[p] = 1
567 567 visit.append(p)
568 568 return reachable
569 569
570 570 def nodesbetween(self, roots=None, heads=None):
571 571 """Return a tuple containing three elements. Elements 1 and 2 contain
572 572 a final list bases and heads after all the unreachable ones have been
573 573 pruned. Element 0 contains a topologically sorted list of all
574 574
575 575 nodes that satisfy these constraints:
576 576 1. All nodes must be descended from a node in roots (the nodes on
577 577 roots are considered descended from themselves).
578 578 2. All nodes must also be ancestors of a node in heads (the nodes in
579 579 heads are considered to be their own ancestors).
580 580
581 581 If roots is unspecified, nullid is assumed as the only root.
582 582 If heads is unspecified, it is taken to be the output of the
583 583 heads method (i.e. a list of all nodes in the repository that
584 584 have no children)."""
585 585 nonodes = ([], [], [])
586 586 if roots is not None:
587 587 roots = list(roots)
588 588 if not roots:
589 589 return nonodes
590 590 lowestrev = min([self.rev(n) for n in roots])
591 591 else:
592 592 roots = [nullid] # Everybody's a descendent of nullid
593 593 lowestrev = nullrev
594 594 if (lowestrev == nullrev) and (heads is None):
595 595 # We want _all_ the nodes!
596 596 return ([self.node(r) for r in xrange(0, self.count())],
597 597 [nullid], list(self.heads()))
598 598 if heads is None:
599 599 # All nodes are ancestors, so the latest ancestor is the last
600 600 # node.
601 601 highestrev = self.count() - 1
602 602 # Set ancestors to None to signal that every node is an ancestor.
603 603 ancestors = None
604 604 # Set heads to an empty dictionary for later discovery of heads
605 605 heads = {}
606 606 else:
607 607 heads = list(heads)
608 608 if not heads:
609 609 return nonodes
610 610 ancestors = {}
611 611 # Turn heads into a dictionary so we can remove 'fake' heads.
612 612 # Also, later we will be using it to filter out the heads we can't
613 613 # find from roots.
614 614 heads = dict.fromkeys(heads, 0)
615 615 # Start at the top and keep marking parents until we're done.
616 616 nodestotag = heads.keys()
617 617 # Remember where the top was so we can use it as a limit later.
618 618 highestrev = max([self.rev(n) for n in nodestotag])
619 619 while nodestotag:
620 620 # grab a node to tag
621 621 n = nodestotag.pop()
622 622 # Never tag nullid
623 623 if n == nullid:
624 624 continue
625 625 # A node's revision number represents its place in a
626 626 # topologically sorted list of nodes.
627 627 r = self.rev(n)
628 628 if r >= lowestrev:
629 629 if n not in ancestors:
630 630 # If we are possibly a descendent of one of the roots
631 631 # and we haven't already been marked as an ancestor
632 632 ancestors[n] = 1 # Mark as ancestor
633 633 # Add non-nullid parents to list of nodes to tag.
634 634 nodestotag.extend([p for p in self.parents(n) if
635 635 p != nullid])
636 636 elif n in heads: # We've seen it before, is it a fake head?
637 637 # So it is, real heads should not be the ancestors of
638 638 # any other heads.
639 639 heads.pop(n)
640 640 if not ancestors:
641 641 return nonodes
642 642 # Now that we have our set of ancestors, we want to remove any
643 643 # roots that are not ancestors.
644 644
645 645 # If one of the roots was nullid, everything is included anyway.
646 646 if lowestrev > nullrev:
647 647 # But, since we weren't, let's recompute the lowest rev to not
648 648 # include roots that aren't ancestors.
649 649
650 650 # Filter out roots that aren't ancestors of heads
651 651 roots = [n for n in roots if n in ancestors]
652 652 # Recompute the lowest revision
653 653 if roots:
654 654 lowestrev = min([self.rev(n) for n in roots])
655 655 else:
656 656 # No more roots? Return empty list
657 657 return nonodes
658 658 else:
659 659 # We are descending from nullid, and don't need to care about
660 660 # any other roots.
661 661 lowestrev = nullrev
662 662 roots = [nullid]
663 663 # Transform our roots list into a 'set' (i.e. a dictionary where the
664 664 # values don't matter.
665 665 descendents = dict.fromkeys(roots, 1)
666 666 # Also, keep the original roots so we can filter out roots that aren't
667 667 # 'real' roots (i.e. are descended from other roots).
668 668 roots = descendents.copy()
669 669 # Our topologically sorted list of output nodes.
670 670 orderedout = []
671 671 # Don't start at nullid since we don't want nullid in our output list,
672 672 # and if nullid shows up in descedents, empty parents will look like
673 673 # they're descendents.
674 674 for r in xrange(max(lowestrev, 0), highestrev + 1):
675 675 n = self.node(r)
676 676 isdescendent = False
677 677 if lowestrev == nullrev: # Everybody is a descendent of nullid
678 678 isdescendent = True
679 679 elif n in descendents:
680 680 # n is already a descendent
681 681 isdescendent = True
682 682 # This check only needs to be done here because all the roots
683 683 # will start being marked is descendents before the loop.
684 684 if n in roots:
685 685 # If n was a root, check if it's a 'real' root.
686 686 p = tuple(self.parents(n))
687 687 # If any of its parents are descendents, it's not a root.
688 688 if (p[0] in descendents) or (p[1] in descendents):
689 689 roots.pop(n)
690 690 else:
691 691 p = tuple(self.parents(n))
692 692 # A node is a descendent if either of its parents are
693 693 # descendents. (We seeded the dependents list with the roots
694 694 # up there, remember?)
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 descendents[n] = 1
697 697 isdescendent = True
698 698 if isdescendent and ((ancestors is None) or (n in ancestors)):
699 699 # Only include nodes that are both descendents and ancestors.
700 700 orderedout.append(n)
701 701 if (ancestors is not None) and (n in heads):
702 702 # We're trying to figure out which heads are reachable
703 703 # from roots.
704 704 # Mark this head as having been reached
705 705 heads[n] = 1
706 706 elif ancestors is None:
707 707 # Otherwise, we're trying to discover the heads.
708 708 # Assume this is a head because if it isn't, the next step
709 709 # will eventually remove it.
710 710 heads[n] = 1
711 711 # But, obviously its parents aren't.
712 712 for p in self.parents(n):
713 713 heads.pop(p, None)
714 714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
715 715 roots = roots.keys()
716 716 assert orderedout
717 717 assert roots
718 718 assert heads
719 719 return (orderedout, roots, heads)
720 720
721 721 def heads(self, start=None, stop=None):
722 722 """return the list of all nodes that have no children
723 723
724 724 if start is specified, only heads that are descendants of
725 725 start will be returned
726 726 if stop is specified, it will consider all the revs from stop
727 727 as if they had no children
728 728 """
729 729 if start is None:
730 730 start = nullid
731 731 if stop is None:
732 732 stop = []
733 733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
734 734 startrev = self.rev(start)
735 735 reachable = {startrev: 1}
736 736 heads = {startrev: 1}
737 737
738 738 parentrevs = self.parentrevs
739 739 for r in xrange(startrev + 1, self.count()):
740 740 for p in parentrevs(r):
741 741 if p in reachable:
742 742 if r not in stoprevs:
743 743 reachable[r] = 1
744 744 heads[r] = 1
745 745 if p in heads and p not in stoprevs:
746 746 del heads[p]
747 747
748 748 return [self.node(r) for r in heads]
749 749
750 750 def children(self, node):
751 751 """find the children of a given node"""
752 752 c = []
753 753 p = self.rev(node)
754 754 for r in range(p + 1, self.count()):
755 755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
756 756 if prevs:
757 757 for pr in prevs:
758 758 if pr == p:
759 759 c.append(self.node(r))
760 760 elif p == nullrev:
761 761 c.append(self.node(r))
762 762 return c
763 763
764 764 def _match(self, id):
765 765 if isinstance(id, (long, int)):
766 766 # rev
767 767 return self.node(id)
768 768 if len(id) == 20:
769 769 # possibly a binary node
770 770 # odds of a binary node being all hex in ASCII are 1 in 10**25
771 771 try:
772 772 node = id
773 773 r = self.rev(node) # quick search the index
774 774 return node
775 775 except LookupError:
776 776 pass # may be partial hex id
777 777 try:
778 778 # str(rev)
779 779 rev = int(id)
780 780 if str(rev) != id:
781 781 raise ValueError
782 782 if rev < 0:
783 783 rev = self.count() + rev
784 784 if rev < 0 or rev >= self.count():
785 785 raise ValueError
786 786 return self.node(rev)
787 787 except (ValueError, OverflowError):
788 788 pass
789 789 if len(id) == 40:
790 790 try:
791 791 # a full hex nodeid?
792 792 node = bin(id)
793 793 r = self.rev(node)
794 794 return node
795 795 except TypeError:
796 796 pass
797 797
798 798 def _partialmatch(self, id):
799 799 if len(id) < 40:
800 800 try:
801 801 # hex(node)[:...]
802 802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 803 node = None
804 804 for n in self.nodemap:
805 805 if n.startswith(bin_id) and hex(n).startswith(id):
806 806 if node is not None:
807 807 raise LookupError(_("Ambiguous identifier"))
808 808 node = n
809 809 if node is not None:
810 810 return node
811 811 except TypeError:
812 812 pass
813 813
814 814 def lookup(self, id):
815 815 """locate a node based on:
816 816 - revision number or str(revision number)
817 817 - nodeid or subset of hex nodeid
818 818 """
819 819 n = self._match(id)
820 820 if n is not None:
821 821 return n
822 822 n = self._partialmatch(id)
823 823 if n:
824 824 return n
825 825
826 826 raise LookupError(_("No match found"))
827 827
828 828 def cmp(self, node, text):
829 829 """compare text with a given file revision"""
830 830 p1, p2 = self.parents(node)
831 831 return hash(text, p1, p2) != node
832 832
833 833 def diff(self, a, b):
834 834 """return a delta between two revisions"""
835 835 return mdiff.textdiff(a, b)
836 836
837 837 def patches(self, t, pl):
838 838 """apply a list of patches to a string"""
839 839 return mdiff.patches(t, pl)
840 840
841 def chunk(self, rev, df=None, cachelen=4096):
841 def chunk(self, rev, df=None):
842 842 start, length = self.start(rev), self.length(rev)
843 inline = self._inline
844 if inline:
843 if self._inline:
845 844 start += (rev + 1) * self._io.size
846 845 end = start + length
847 846 def loadcache(df):
848 cache_length = max(cachelen, length) # 4k
847 cache_length = max(65536, length)
849 848 if not df:
850 if inline:
849 if self._inline:
851 850 df = self.opener(self.indexfile)
852 851 else:
853 852 df = self.opener(self.datafile)
854 853 df.seek(start)
855 854 self._chunkcache = (start, df.read(cache_length))
856 855
857 856 if not self._chunkcache:
858 857 loadcache(df)
859 858
860 859 cache_start = self._chunkcache[0]
861 860 cache_end = cache_start + len(self._chunkcache[1])
862 861 if start >= cache_start and end <= cache_end:
863 862 # it is cached
864 863 offset = start - cache_start
865 864 else:
866 865 loadcache(df)
867 866 offset = 0
868 867
869 return decompress(self._chunkcache[1][offset:offset + length])
868 # avoid copying large chunks
869 c = self._chunkcache[1]
870 if len(c) > length:
871 c = c[offset:offset + length]
872
873 return decompress(c)
870 874
871 875 def delta(self, node):
872 876 """return or calculate a delta between a node and its predecessor"""
873 877 r = self.rev(node)
874 878 return self.revdiff(r - 1, r)
875 879
876 880 def revdiff(self, rev1, rev2):
877 881 """return or calculate a delta between two revisions"""
878 882 b1 = self.base(rev1)
879 883 b2 = self.base(rev2)
880 884 if b1 == b2 and rev1 + 1 == rev2:
881 885 return self.chunk(rev2)
882 886 else:
883 887 return self.diff(self.revision(self.node(rev1)),
884 888 self.revision(self.node(rev2)))
885 889
886 890 def revision(self, node):
887 891 """return an uncompressed revision of a given"""
888 892 if node == nullid:
889 893 return ""
890 894 if self._cache and self._cache[0] == node:
891 895 return self._cache[2]
892 896
893 897 # look up what we need to read
894 898 text = None
895 899 rev = self.rev(node)
896 900 base = self.base(rev)
897 901
898 902 if self._inline:
899 903 # we probably have the whole chunk cached
900 904 df = None
901 905 else:
902 906 df = self.opener(self.datafile)
903 907
904 908 # do we have useful data cached?
905 909 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
906 910 base = self._cache[1]
907 911 text = self._cache[2]
908 912 self._loadindex(base, rev + 1)
909 913 else:
910 914 self._loadindex(base, rev + 1)
911 915 text = self.chunk(base, df=df)
912 916
913 917 bins = []
914 918 for r in xrange(base + 1, rev + 1):
915 919 bins.append(self.chunk(r, df=df))
916 920
917 921 text = self.patches(text, bins)
918 922
919 923 p1, p2 = self.parents(node)
920 924 if node != hash(text, p1, p2):
921 925 raise RevlogError(_("integrity check failed on %s:%d")
922 926 % (self.datafile, rev))
923 927
924 928 self._cache = (node, rev, text)
925 929 return text
926 930
927 931 def checkinlinesize(self, tr, fp=None):
928 932 if not self._inline:
929 933 return
930 934 if not fp:
931 935 fp = self.opener(self.indexfile, 'r')
932 936 fp.seek(0, 2)
933 937 size = fp.tell()
934 938 if size < 131072:
935 939 return
936 940 trinfo = tr.find(self.indexfile)
937 941 if trinfo == None:
938 942 raise RevlogError(_("%s not found in the transaction")
939 943 % self.indexfile)
940 944
941 945 trindex = trinfo[2]
942 946 dataoff = self.start(trindex)
943 947
944 948 tr.add(self.datafile, dataoff)
945 949 df = self.opener(self.datafile, 'w')
946 950 calc = self._io.size
947 951 for r in xrange(self.count()):
948 952 start = self.start(r) + (r + 1) * calc
949 953 length = self.length(r)
950 954 fp.seek(start)
951 955 d = fp.read(length)
952 956 df.write(d)
953 957 fp.close()
954 958 df.close()
955 959 fp = self.opener(self.indexfile, 'w', atomictemp=True)
956 960 self.version &= ~(REVLOGNGINLINEDATA)
957 961 self._inline = False
958 962 for i in xrange(self.count()):
959 963 e = self._io.packentry(self.index[i], self.node, self.version)
960 964 fp.write(e)
961 965
962 966 # if we don't call rename, the temp file will never replace the
963 967 # real index
964 968 fp.rename()
965 969
966 970 tr.replace(self.indexfile, trindex * calc)
967 971 self._chunkcache = None
968 972
969 973 def addrevision(self, text, transaction, link, p1, p2, d=None):
970 974 """add a revision to the log
971 975
972 976 text - the revision data to add
973 977 transaction - the transaction object used for rollback
974 978 link - the linkrev data to add
975 979 p1, p2 - the parent nodeids of the revision
976 980 d - an optional precomputed delta
977 981 """
978 982 dfh = None
979 983 if not self._inline:
980 984 dfh = self.opener(self.datafile, "a")
981 985 ifh = self.opener(self.indexfile, "a+")
982 986 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
983 987
984 988 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
985 989 node = hash(text, p1, p2)
986 990 if node in self.nodemap:
987 991 return node
988 992
989 993 curr = self.count()
990 994 prev = curr - 1
991 995 base = self.base(prev)
992 996 offset = self.end(prev)
993 997
994 998 if curr:
995 999 if not d:
996 1000 ptext = self.revision(self.node(prev))
997 1001 d = self.diff(ptext, text)
998 1002 data = compress(d)
999 1003 l = len(data[1]) + len(data[0])
1000 1004 dist = l + offset - self.start(base)
1001 1005
1002 1006 # full versions are inserted when the needed deltas
1003 1007 # become comparable to the uncompressed text
1004 1008 if not curr or dist > len(text) * 2:
1005 1009 data = compress(text)
1006 1010 l = len(data[1]) + len(data[0])
1007 1011 base = curr
1008 1012
1009 1013 e = (offset_type(offset, 0), l, len(text),
1010 1014 base, link, self.rev(p1), self.rev(p2), node)
1011 1015 self.index.insert(-1, e)
1012 1016 self.nodemap[node] = curr
1013 1017
1014 1018 entry = self._io.packentry(e, self.node, self.version)
1015 1019 if not self._inline:
1016 1020 transaction.add(self.datafile, offset)
1017 1021 transaction.add(self.indexfile, curr * len(entry))
1018 1022 if data[0]:
1019 1023 dfh.write(data[0])
1020 1024 dfh.write(data[1])
1021 1025 dfh.flush()
1022 1026 ifh.write(entry)
1023 1027 else:
1024 1028 ifh.seek(0, 2)
1025 1029 transaction.add(self.indexfile, ifh.tell(), prev)
1026 1030 ifh.write(entry)
1027 1031 ifh.write(data[0])
1028 1032 ifh.write(data[1])
1029 1033 self.checkinlinesize(transaction, ifh)
1030 1034
1031 1035 self._cache = (node, curr, text)
1032 1036 return node
1033 1037
1034 1038 def ancestor(self, a, b):
1035 1039 """calculate the least common ancestor of nodes a and b"""
1036 1040
1037 1041 def parents(rev):
1038 1042 return [p for p in self.parentrevs(rev) if p != nullrev]
1039 1043
1040 1044 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1041 1045 if c is None:
1042 1046 return nullid
1043 1047
1044 1048 return self.node(c)
1045 1049
1046 1050 def group(self, nodelist, lookup, infocollect=None):
1047 1051 """calculate a delta group
1048 1052
1049 1053 Given a list of changeset revs, return a set of deltas and
1050 1054 metadata corresponding to nodes. the first delta is
1051 1055 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1052 1056 have this parent as it has all history before these
1053 1057 changesets. parent is parent[0]
1054 1058 """
1055 1059 revs = [self.rev(n) for n in nodelist]
1056 1060
1057 1061 # if we don't have any revisions touched by these changesets, bail
1058 1062 if not revs:
1059 1063 yield changegroup.closechunk()
1060 1064 return
1061 1065
1062 1066 # add the parent of the first rev
1063 1067 p = self.parents(self.node(revs[0]))[0]
1064 1068 revs.insert(0, self.rev(p))
1065 1069
1066 1070 # build deltas
1067 1071 for d in xrange(0, len(revs) - 1):
1068 1072 a, b = revs[d], revs[d + 1]
1069 1073 nb = self.node(b)
1070 1074
1071 1075 if infocollect is not None:
1072 1076 infocollect(nb)
1073 1077
1074 1078 d = self.revdiff(a, b)
1075 1079 p = self.parents(nb)
1076 1080 meta = nb + p[0] + p[1] + lookup(nb)
1077 1081 yield changegroup.genchunk("%s%s" % (meta, d))
1078 1082
1079 1083 yield changegroup.closechunk()
1080 1084
1081 1085 def addgroup(self, revs, linkmapper, transaction, unique=0):
1082 1086 """
1083 1087 add a delta group
1084 1088
1085 1089 given a set of deltas, add them to the revision log. the
1086 1090 first delta is against its parent, which should be in our
1087 1091 log, the rest are against the previous delta.
1088 1092 """
1089 1093
1090 1094 #track the base of the current delta log
1091 1095 r = self.count()
1092 1096 t = r - 1
1093 1097 node = None
1094 1098
1095 1099 base = prev = nullrev
1096 1100 start = end = textlen = 0
1097 1101 if r:
1098 1102 end = self.end(t)
1099 1103
1100 1104 ifh = self.opener(self.indexfile, "a+")
1101 1105 ifh.seek(0, 2)
1102 1106 transaction.add(self.indexfile, ifh.tell(), self.count())
1103 1107 if self._inline:
1104 1108 dfh = None
1105 1109 else:
1106 1110 transaction.add(self.datafile, end)
1107 1111 dfh = self.opener(self.datafile, "a")
1108 1112
1109 1113 # loop through our set of deltas
1110 1114 chain = None
1111 1115 for chunk in revs:
1112 1116 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1113 1117 link = linkmapper(cs)
1114 1118 if node in self.nodemap:
1115 1119 # this can happen if two branches make the same change
1116 1120 # if unique:
1117 1121 # raise RevlogError(_("already have %s") % hex(node[:4]))
1118 1122 chain = node
1119 1123 continue
1120 1124 delta = chunk[80:]
1121 1125
1122 1126 for p in (p1, p2):
1123 1127 if not p in self.nodemap:
1124 1128 raise LookupError(_("unknown parent %s") % short(p))
1125 1129
1126 1130 if not chain:
1127 1131 # retrieve the parent revision of the delta chain
1128 1132 chain = p1
1129 1133 if not chain in self.nodemap:
1130 1134 raise LookupError(_("unknown base %s") % short(chain[:4]))
1131 1135
1132 1136 # full versions are inserted when the needed deltas become
1133 1137 # comparable to the uncompressed text or when the previous
1134 1138 # version is not the one we have a delta against. We use
1135 1139 # the size of the previous full rev as a proxy for the
1136 1140 # current size.
1137 1141
1138 1142 if chain == prev:
1139 1143 tempd = compress(delta)
1140 1144 cdelta = tempd[0] + tempd[1]
1141 1145 textlen = mdiff.patchedsize(textlen, delta)
1142 1146
1143 1147 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1144 1148 # flush our writes here so we can read it in revision
1145 1149 if dfh:
1146 1150 dfh.flush()
1147 1151 ifh.flush()
1148 1152 text = self.revision(chain)
1149 1153 text = self.patches(text, [delta])
1150 1154 chk = self._addrevision(text, transaction, link, p1, p2, None,
1151 1155 ifh, dfh)
1152 1156 if not dfh and not self._inline:
1153 1157 # addrevision switched from inline to conventional
1154 1158 # reopen the index
1155 1159 dfh = self.opener(self.datafile, "a")
1156 1160 ifh = self.opener(self.indexfile, "a")
1157 1161 if chk != node:
1158 1162 raise RevlogError(_("consistency error adding group"))
1159 1163 textlen = len(text)
1160 1164 else:
1161 1165 e = (offset_type(end, 0), len(cdelta), textlen, base,
1162 1166 link, self.rev(p1), self.rev(p2), node)
1163 1167 self.index.insert(-1, e)
1164 1168 self.nodemap[node] = r
1165 1169 entry = self._io.packentry(e, self.node, self.version)
1166 1170 if self._inline:
1167 1171 ifh.write(entry)
1168 1172 ifh.write(cdelta)
1169 1173 self.checkinlinesize(transaction, ifh)
1170 1174 if not self._inline:
1171 1175 dfh = self.opener(self.datafile, "a")
1172 1176 ifh = self.opener(self.indexfile, "a")
1173 1177 else:
1174 1178 dfh.write(cdelta)
1175 1179 ifh.write(entry)
1176 1180
1177 1181 t, r, chain, prev = r, r + 1, node, node
1178 1182 base = self.base(t)
1179 1183 start = self.start(base)
1180 1184 end = self.end(t)
1181 1185
1182 1186 return node
1183 1187
1184 1188 def strip(self, rev, minlink):
1185 1189 if self.count() == 0 or rev >= self.count():
1186 1190 return
1187 1191
1188 1192 if isinstance(self.index, lazyindex):
1189 1193 self._loadindexmap()
1190 1194
1191 1195 # When stripping away a revision, we need to make sure it
1192 1196 # does not actually belong to an older changeset.
1193 1197 # The minlink parameter defines the oldest revision
1194 1198 # we're allowed to strip away.
1195 1199 while minlink > self.index[rev][4]:
1196 1200 rev += 1
1197 1201 if rev >= self.count():
1198 1202 return
1199 1203
1200 1204 # first truncate the files on disk
1201 1205 end = self.start(rev)
1202 1206 if not self._inline:
1203 1207 df = self.opener(self.datafile, "a")
1204 1208 df.truncate(end)
1205 1209 end = rev * self._io.size
1206 1210 else:
1207 1211 end += rev * self._io.size
1208 1212
1209 1213 indexf = self.opener(self.indexfile, "a")
1210 1214 indexf.truncate(end)
1211 1215
1212 1216 # then reset internal state in memory to forget those revisions
1213 1217 self._cache = None
1214 1218 self._chunkcache = None
1215 1219 for x in xrange(rev, self.count()):
1216 1220 del self.nodemap[self.node(x)]
1217 1221
1218 1222 del self.index[rev:-1]
1219 1223
1220 1224 def checksize(self):
1221 1225 expected = 0
1222 1226 if self.count():
1223 1227 expected = self.end(self.count() - 1)
1224 1228
1225 1229 try:
1226 1230 f = self.opener(self.datafile)
1227 1231 f.seek(0, 2)
1228 1232 actual = f.tell()
1229 1233 dd = actual - expected
1230 1234 except IOError, inst:
1231 1235 if inst.errno != errno.ENOENT:
1232 1236 raise
1233 1237 dd = 0
1234 1238
1235 1239 try:
1236 1240 f = self.opener(self.indexfile)
1237 1241 f.seek(0, 2)
1238 1242 actual = f.tell()
1239 1243 s = self._io.size
1240 1244 i = actual / s
1241 1245 di = actual - (i * s)
1242 1246 if self._inline:
1243 1247 databytes = 0
1244 1248 for r in xrange(self.count()):
1245 1249 databytes += self.length(r)
1246 1250 dd = 0
1247 1251 di = actual - self.count() * s - databytes
1248 1252 except IOError, inst:
1249 1253 if inst.errno != errno.ENOENT:
1250 1254 raise
1251 1255 di = 0
1252 1256
1253 1257 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now