##// END OF EJS Templates
revlog: eliminate diff and patches functions...
Matt Mackall -
r4989:1aaed3d6 default
parent child Browse files
Show More
@@ -1,251 +1,250 b''
1 1 """
2 2 bundlerepo.py - repository class for viewing uncompressed bundles
3 3
4 4 This provides a read-only repository interface to bundles as if
5 5 they were part of the actual repository.
6 6
7 7 Copyright 2006, 2007 Benoit Boissinot <bboissin@gmail.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 import changegroup, util, os, struct, bz2, tempfile
16
15 import changegroup, util, os, struct, bz2, tempfile, mdiff
17 16 import localrepo, changelog, manifest, filelog, revlog
18 17
19 18 class bundlerevlog(revlog.revlog):
20 19 def __init__(self, opener, indexfile, bundlefile,
21 20 linkmapper=None):
22 21 # How it works:
23 22 # to retrieve a revision, we need to know the offset of
24 23 # the revision in the bundlefile (an opened file).
25 24 #
26 25 # We store this offset in the index (start), to differentiate a
27 26 # rev in the bundle and from a rev in the revlog, we check
28 27 # len(index[r]). If the tuple is bigger than 7, it is a bundle
29 28 # (it is bigger since we store the node to which the delta is)
30 29 #
31 30 revlog.revlog.__init__(self, opener, indexfile)
32 31 self.bundlefile = bundlefile
33 32 self.basemap = {}
34 33 def chunkpositer():
35 34 for chunk in changegroup.chunkiter(bundlefile):
36 35 pos = bundlefile.tell()
37 36 yield chunk, pos - len(chunk)
38 37 n = self.count()
39 38 prev = None
40 39 for chunk, start in chunkpositer():
41 40 size = len(chunk)
42 41 if size < 80:
43 42 raise util.Abort("invalid changegroup")
44 43 start += 80
45 44 size -= 80
46 45 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
47 46 if node in self.nodemap:
48 47 prev = node
49 48 continue
50 49 for p in (p1, p2):
51 50 if not p in self.nodemap:
52 51 raise revlog.LookupError(_("unknown parent %s") % short(p1))
53 52 if linkmapper is None:
54 53 link = n
55 54 else:
56 55 link = linkmapper(cs)
57 56
58 57 if not prev:
59 58 prev = p1
60 59 # start, size, base is not used, link, p1, p2, delta ref
61 60 e = (revlog.offset_type(start, 0), size, -1, None, link,
62 61 self.rev(p1), self.rev(p2), node)
63 62 self.basemap[n] = prev
64 63 self.index.insert(-1, e)
65 64 self.nodemap[node] = n
66 65 prev = node
67 66 n += 1
68 67
69 68 def bundle(self, rev):
70 69 """is rev from the bundle"""
71 70 if rev < 0:
72 71 return False
73 72 return rev in self.basemap
74 73 def bundlebase(self, rev): return self.basemap[rev]
75 74 def chunk(self, rev, df=None, cachelen=4096):
76 75 # Warning: in case of bundle, the diff is against bundlebase,
77 76 # not against rev - 1
78 77 # XXX: could use some caching
79 78 if not self.bundle(rev):
80 79 return revlog.revlog.chunk(self, rev, df)
81 80 self.bundlefile.seek(self.start(rev))
82 81 return self.bundlefile.read(self.length(rev))
83 82
84 83 def revdiff(self, rev1, rev2):
85 84 """return or calculate a delta between two revisions"""
86 85 if self.bundle(rev1) and self.bundle(rev2):
87 86 # hot path for bundle
88 87 revb = self.rev(self.bundlebase(rev2))
89 88 if revb == rev1:
90 89 return self.chunk(rev2)
91 90 elif not self.bundle(rev1) and not self.bundle(rev2):
92 91 return revlog.revlog.revdiff(self, rev1, rev2)
93 92
94 return self.diff(self.revision(self.node(rev1)),
93 return mdiff.textdiff(self.revision(self.node(rev1)),
95 94 self.revision(self.node(rev2)))
96 95
97 96 def revision(self, node):
98 97 """return an uncompressed revision of a given"""
99 98 if node == nullid: return ""
100 99
101 100 text = None
102 101 chain = []
103 102 iter_node = node
104 103 rev = self.rev(iter_node)
105 104 # reconstruct the revision if it is from a changegroup
106 105 while self.bundle(rev):
107 106 if self._cache and self._cache[0] == iter_node:
108 107 text = self._cache[2]
109 108 break
110 109 chain.append(rev)
111 110 iter_node = self.bundlebase(rev)
112 111 rev = self.rev(iter_node)
113 112 if text is None:
114 113 text = revlog.revlog.revision(self, iter_node)
115 114
116 115 while chain:
117 116 delta = self.chunk(chain.pop())
118 text = self.patches(text, [delta])
117 text = mdiff.patches(text, [delta])
119 118
120 119 p1, p2 = self.parents(node)
121 120 if node != revlog.hash(text, p1, p2):
122 121 raise revlog.RevlogError(_("integrity check failed on %s:%d")
123 122 % (self.datafile, self.rev(node)))
124 123
125 124 self._cache = (node, self.rev(node), text)
126 125 return text
127 126
128 127 def addrevision(self, text, transaction, link, p1=None, p2=None, d=None):
129 128 raise NotImplementedError
130 129 def addgroup(self, revs, linkmapper, transaction, unique=0):
131 130 raise NotImplementedError
132 131 def strip(self, rev, minlink):
133 132 raise NotImplementedError
134 133 def checksize(self):
135 134 raise NotImplementedError
136 135
137 136 class bundlechangelog(bundlerevlog, changelog.changelog):
138 137 def __init__(self, opener, bundlefile):
139 138 changelog.changelog.__init__(self, opener)
140 139 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile)
141 140
142 141 class bundlemanifest(bundlerevlog, manifest.manifest):
143 142 def __init__(self, opener, bundlefile, linkmapper):
144 143 manifest.manifest.__init__(self, opener)
145 144 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
146 145 linkmapper)
147 146
148 147 class bundlefilelog(bundlerevlog, filelog.filelog):
149 148 def __init__(self, opener, path, bundlefile, linkmapper):
150 149 filelog.filelog.__init__(self, opener, path)
151 150 bundlerevlog.__init__(self, opener, self.indexfile, bundlefile,
152 151 linkmapper)
153 152
154 153 class bundlerepository(localrepo.localrepository):
155 154 def __init__(self, ui, path, bundlename):
156 155 localrepo.localrepository.__init__(self, ui, path)
157 156
158 157 self._url = 'bundle:' + bundlename
159 158 if path: self._url += '+' + path
160 159
161 160 self.tempfile = None
162 161 self.bundlefile = open(bundlename, "rb")
163 162 header = self.bundlefile.read(6)
164 163 if not header.startswith("HG"):
165 164 raise util.Abort(_("%s: not a Mercurial bundle file") % bundlename)
166 165 elif not header.startswith("HG10"):
167 166 raise util.Abort(_("%s: unknown bundle version") % bundlename)
168 167 elif header == "HG10BZ":
169 168 fdtemp, temp = tempfile.mkstemp(prefix="hg-bundle-",
170 169 suffix=".hg10un", dir=self.path)
171 170 self.tempfile = temp
172 171 fptemp = os.fdopen(fdtemp, 'wb')
173 172 def generator(f):
174 173 zd = bz2.BZ2Decompressor()
175 174 zd.decompress("BZ")
176 175 for chunk in f:
177 176 yield zd.decompress(chunk)
178 177 gen = generator(util.filechunkiter(self.bundlefile, 4096))
179 178
180 179 try:
181 180 fptemp.write("HG10UN")
182 181 for chunk in gen:
183 182 fptemp.write(chunk)
184 183 finally:
185 184 fptemp.close()
186 185 self.bundlefile.close()
187 186
188 187 self.bundlefile = open(self.tempfile, "rb")
189 188 # seek right after the header
190 189 self.bundlefile.seek(6)
191 190 elif header == "HG10UN":
192 191 # nothing to do
193 192 pass
194 193 else:
195 194 raise util.Abort(_("%s: unknown bundle compression type")
196 195 % bundlename)
197 196 self.changelog = bundlechangelog(self.sopener, self.bundlefile)
198 197 self.manifest = bundlemanifest(self.sopener, self.bundlefile,
199 198 self.changelog.rev)
200 199 # dict with the mapping 'filename' -> position in the bundle
201 200 self.bundlefilespos = {}
202 201 while 1:
203 202 f = changegroup.getchunk(self.bundlefile)
204 203 if not f:
205 204 break
206 205 self.bundlefilespos[f] = self.bundlefile.tell()
207 206 for c in changegroup.chunkiter(self.bundlefile):
208 207 pass
209 208
210 209 def url(self):
211 210 return self._url
212 211
213 212 def dev(self):
214 213 return -1
215 214
216 215 def file(self, f):
217 216 if f[0] == '/':
218 217 f = f[1:]
219 218 if f in self.bundlefilespos:
220 219 self.bundlefile.seek(self.bundlefilespos[f])
221 220 return bundlefilelog(self.sopener, f, self.bundlefile,
222 221 self.changelog.rev)
223 222 else:
224 223 return filelog.filelog(self.sopener, f)
225 224
226 225 def close(self):
227 226 """Close assigned bundle file immediately."""
228 227 self.bundlefile.close()
229 228
230 229 def __del__(self):
231 230 bundlefile = getattr(self, 'bundlefile', None)
232 231 if bundlefile and not bundlefile.closed:
233 232 bundlefile.close()
234 233 tempfile = getattr(self, 'tempfile', None)
235 234 if tempfile is not None:
236 235 os.unlink(tempfile)
237 236
238 237 def instance(ui, path, create):
239 238 if create:
240 239 raise util.Abort(_('cannot create new bundle repository'))
241 240 path = util.drop_scheme('file', path)
242 241 if path.startswith('bundle:'):
243 242 path = util.drop_scheme('bundle', path)
244 243 s = path.split("+", 1)
245 244 if len(s) == 1:
246 245 repopath, bundlename = "", s[0]
247 246 else:
248 247 repopath, bundlename = s
249 248 else:
250 249 repopath, bundlename = '', path
251 250 return bundlerepository(ui, repopath, bundlename)
@@ -1,1257 +1,1245 b''
1 1 """
2 2 revlog.py - storage back-end for mercurial
3 3
4 4 This provides efficient delta storage with O(1) retrieve and append
5 5 and O(changes) merge between branches
6 6
7 7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8 8
9 9 This software may be used and distributed according to the terms
10 10 of the GNU General Public License, incorporated herein by reference.
11 11 """
12 12
13 13 from node import *
14 14 from i18n import _
15 15 import binascii, changegroup, errno, ancestor, mdiff, os
16 16 import sha, struct, util, zlib
17 17
18 18 # revlog flags
19 19 REVLOGV0 = 0
20 20 REVLOGNG = 1
21 21 REVLOGNGINLINEDATA = (1 << 16)
22 22 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
23 23 REVLOG_DEFAULT_FORMAT = REVLOGNG
24 24 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
25 25
26 26 class RevlogError(Exception):
27 27 pass
28 28 class LookupError(RevlogError):
29 29 pass
30 30
31 31 def getoffset(q):
32 32 if q & 0xFFFF:
33 33 raise RevlogError(_('incompatible revision flag %x') % q)
34 34 return int(q >> 16)
35 35
36 36 def gettype(q):
37 37 return int(q & 0xFFFF)
38 38
39 39 def offset_type(offset, type):
40 40 return long(long(offset) << 16 | type)
41 41
42 42 def hash(text, p1, p2):
43 43 """generate a hash from the given text and its parent hashes
44 44
45 45 This hash combines both the current file contents and its history
46 46 in a manner that makes it easy to distinguish nodes with the same
47 47 content in the revision graph.
48 48 """
49 49 l = [p1, p2]
50 50 l.sort()
51 51 s = sha.new(l[0])
52 52 s.update(l[1])
53 53 s.update(text)
54 54 return s.digest()
55 55
56 56 def compress(text):
57 57 """ generate a possibly-compressed representation of text """
58 58 if not text:
59 59 return ("", text)
60 60 if len(text) < 44:
61 61 if text[0] == '\0':
62 62 return ("", text)
63 63 return ('u', text)
64 64 bin = zlib.compress(text)
65 65 if len(bin) > len(text):
66 66 if text[0] == '\0':
67 67 return ("", text)
68 68 return ('u', text)
69 69 return ("", bin)
70 70
71 71 def decompress(bin):
72 72 """ decompress the given input """
73 73 if not bin:
74 74 return bin
75 75 t = bin[0]
76 76 if t == '\0':
77 77 return bin
78 78 if t == 'x':
79 79 return zlib.decompress(bin)
80 80 if t == 'u':
81 81 return bin[1:]
82 82 raise RevlogError(_("unknown compression type %r") % t)
83 83
84 84 class lazyparser(object):
85 85 """
86 86 this class avoids the need to parse the entirety of large indices
87 87 """
88 88
89 89 # lazyparser is not safe to use on windows if win32 extensions not
90 90 # available. it keeps file handle open, which make it not possible
91 91 # to break hardlinks on local cloned repos.
92 92 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
93 93 hasattr(util, 'win32api'))
94 94
95 95 def __init__(self, dataf, size):
96 96 self.dataf = dataf
97 97 self.s = struct.calcsize(indexformatng)
98 98 self.datasize = size
99 99 self.l = size/self.s
100 100 self.index = [None] * self.l
101 101 self.map = {nullid: nullrev}
102 102 self.allmap = 0
103 103 self.all = 0
104 104 self.mapfind_count = 0
105 105
106 106 def loadmap(self):
107 107 """
108 108 during a commit, we need to make sure the rev being added is
109 109 not a duplicate. This requires loading the entire index,
110 110 which is fairly slow. loadmap can load up just the node map,
111 111 which takes much less time.
112 112 """
113 113 if self.allmap:
114 114 return
115 115 end = self.datasize
116 116 self.allmap = 1
117 117 cur = 0
118 118 count = 0
119 119 blocksize = self.s * 256
120 120 self.dataf.seek(0)
121 121 while cur < end:
122 122 data = self.dataf.read(blocksize)
123 123 off = 0
124 124 for x in xrange(256):
125 125 n = data[off + ngshaoffset:off + ngshaoffset + 20]
126 126 self.map[n] = count
127 127 count += 1
128 128 if count >= self.l:
129 129 break
130 130 off += self.s
131 131 cur += blocksize
132 132
133 133 def loadblock(self, blockstart, blocksize, data=None):
134 134 if self.all:
135 135 return
136 136 if data is None:
137 137 self.dataf.seek(blockstart)
138 138 if blockstart + blocksize > self.datasize:
139 139 # the revlog may have grown since we've started running,
140 140 # but we don't have space in self.index for more entries.
141 141 # limit blocksize so that we don't get too much data.
142 142 blocksize = max(self.datasize - blockstart, 0)
143 143 data = self.dataf.read(blocksize)
144 144 lend = len(data) / self.s
145 145 i = blockstart / self.s
146 146 off = 0
147 147 # lazyindex supports __delitem__
148 148 if lend > len(self.index) - i:
149 149 lend = len(self.index) - i
150 150 for x in xrange(lend):
151 151 if self.index[i + x] == None:
152 152 b = data[off : off + self.s]
153 153 self.index[i + x] = b
154 154 n = b[ngshaoffset:ngshaoffset + 20]
155 155 self.map[n] = i + x
156 156 off += self.s
157 157
158 158 def findnode(self, node):
159 159 """search backwards through the index file for a specific node"""
160 160 if self.allmap:
161 161 return None
162 162
163 163 # hg log will cause many many searches for the manifest
164 164 # nodes. After we get called a few times, just load the whole
165 165 # thing.
166 166 if self.mapfind_count > 8:
167 167 self.loadmap()
168 168 if node in self.map:
169 169 return node
170 170 return None
171 171 self.mapfind_count += 1
172 172 last = self.l - 1
173 173 while self.index[last] != None:
174 174 if last == 0:
175 175 self.all = 1
176 176 self.allmap = 1
177 177 return None
178 178 last -= 1
179 179 end = (last + 1) * self.s
180 180 blocksize = self.s * 256
181 181 while end >= 0:
182 182 start = max(end - blocksize, 0)
183 183 self.dataf.seek(start)
184 184 data = self.dataf.read(end - start)
185 185 findend = end - start
186 186 while True:
187 187 # we're searching backwards, so weh have to make sure
188 188 # we don't find a changeset where this node is a parent
189 189 off = data.rfind(node, 0, findend)
190 190 findend = off
191 191 if off >= 0:
192 192 i = off / self.s
193 193 off = i * self.s
194 194 n = data[off + ngshaoffset:off + ngshaoffset + 20]
195 195 if n == node:
196 196 self.map[n] = i + start / self.s
197 197 return node
198 198 else:
199 199 break
200 200 end -= blocksize
201 201 return None
202 202
203 203 def loadindex(self, i=None, end=None):
204 204 if self.all:
205 205 return
206 206 all = False
207 207 if i == None:
208 208 blockstart = 0
209 209 blocksize = (512 / self.s) * self.s
210 210 end = self.datasize
211 211 all = True
212 212 else:
213 213 if end:
214 214 blockstart = i * self.s
215 215 end = end * self.s
216 216 blocksize = end - blockstart
217 217 else:
218 218 blockstart = (i & ~63) * self.s
219 219 blocksize = self.s * 64
220 220 end = blockstart + blocksize
221 221 while blockstart < end:
222 222 self.loadblock(blockstart, blocksize)
223 223 blockstart += blocksize
224 224 if all:
225 225 self.all = True
226 226
227 227 class lazyindex(object):
228 228 """a lazy version of the index array"""
229 229 def __init__(self, parser):
230 230 self.p = parser
231 231 def __len__(self):
232 232 return len(self.p.index)
233 233 def load(self, pos):
234 234 if pos < 0:
235 235 pos += len(self.p.index)
236 236 self.p.loadindex(pos)
237 237 return self.p.index[pos]
238 238 def __getitem__(self, pos):
239 239 ret = self.p.index[pos] or self.load(pos)
240 240 if isinstance(ret, str):
241 241 ret = struct.unpack(indexformatng, ret)
242 242 return ret
243 243 def __setitem__(self, pos, item):
244 244 self.p.index[pos] = item
245 245 def __delitem__(self, pos):
246 246 del self.p.index[pos]
247 247 def insert(self, pos, e):
248 248 self.p.index.insert(pos, e)
249 249 def append(self, e):
250 250 self.p.index.append(e)
251 251
252 252 class lazymap(object):
253 253 """a lazy version of the node map"""
254 254 def __init__(self, parser):
255 255 self.p = parser
256 256 def load(self, key):
257 257 n = self.p.findnode(key)
258 258 if n == None:
259 259 raise KeyError(key)
260 260 def __contains__(self, key):
261 261 if key in self.p.map:
262 262 return True
263 263 self.p.loadmap()
264 264 return key in self.p.map
265 265 def __iter__(self):
266 266 yield nullid
267 267 for i in xrange(self.p.l):
268 268 ret = self.p.index[i]
269 269 if not ret:
270 270 self.p.loadindex(i)
271 271 ret = self.p.index[i]
272 272 if isinstance(ret, str):
273 273 ret = struct.unpack(indexformatng, ret)
274 274 yield ret[7]
275 275 def __getitem__(self, key):
276 276 try:
277 277 return self.p.map[key]
278 278 except KeyError:
279 279 try:
280 280 self.load(key)
281 281 return self.p.map[key]
282 282 except KeyError:
283 283 raise KeyError("node " + hex(key))
284 284 def __setitem__(self, key, val):
285 285 self.p.map[key] = val
286 286 def __delitem__(self, key):
287 287 del self.p.map[key]
288 288
289 289 indexformatv0 = ">4l20s20s20s"
290 290 v0shaoffset = 56
291 291
292 292 class revlogoldio(object):
293 293 def __init__(self):
294 294 self.size = struct.calcsize(indexformatv0)
295 295
296 296 def parseindex(self, fp, inline):
297 297 s = self.size
298 298 index = []
299 299 nodemap = {nullid: nullrev}
300 300 n = off = 0
301 301 data = fp.read()
302 302 l = len(data)
303 303 while off + s <= l:
304 304 cur = data[off:off + s]
305 305 off += s
306 306 e = struct.unpack(indexformatv0, cur)
307 307 # transform to revlogv1 format
308 308 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 309 nodemap[e[4]], nodemap[e[5]], e[6])
310 310 index.append(e2)
311 311 nodemap[e[6]] = n
312 312 n += 1
313 313
314 314 return index, nodemap, None
315 315
316 316 def packentry(self, entry, node, version):
317 317 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 318 node(entry[5]), node(entry[6]), entry[7])
319 319 return struct.pack(indexformatv0, *e2)
320 320
321 321 # index ng:
322 322 # 6 bytes offset
323 323 # 2 bytes flags
324 324 # 4 bytes compressed length
325 325 # 4 bytes uncompressed length
326 326 # 4 bytes: base rev
327 327 # 4 bytes link rev
328 328 # 4 bytes parent 1 rev
329 329 # 4 bytes parent 2 rev
330 330 # 32 bytes: nodeid
331 331 indexformatng = ">Qiiiiii20s12x"
332 332 ngshaoffset = 32
333 333 versionformat = ">I"
334 334
335 335 class revlogio(object):
336 336 def __init__(self):
337 337 self.size = struct.calcsize(indexformatng)
338 338
339 339 def parseindex(self, fp, inline):
340 340 try:
341 341 size = util.fstat(fp).st_size
342 342 except AttributeError:
343 343 size = 0
344 344
345 345 if lazyparser.safe_to_use and not inline and size > 1000000:
346 346 # big index, let's parse it on demand
347 347 parser = lazyparser(fp, size)
348 348 index = lazyindex(parser)
349 349 nodemap = lazymap(parser)
350 350 e = list(index[0])
351 351 type = gettype(e[0])
352 352 e[0] = offset_type(0, type)
353 353 index[0] = e
354 354 return index, nodemap, None
355 355
356 356 s = self.size
357 357 cache = None
358 358 index = []
359 359 nodemap = {nullid: nullrev}
360 360 n = off = 0
361 361 # if we're not using lazymap, always read the whole index
362 362 data = fp.read()
363 363 l = len(data)
364 364 if inline:
365 365 cache = (0, data)
366 366 while off + s <= l:
367 367 e = struct.unpack(indexformatng, data[off:off + s])
368 368 index.append(e)
369 369 nodemap[e[7]] = n
370 370 n += 1
371 371 off += s
372 372 if inline:
373 373 if e[1] < 0:
374 374 break
375 375 off += e[1]
376 376
377 377 e = list(index[0])
378 378 type = gettype(e[0])
379 379 e[0] = offset_type(0, type)
380 380 index[0] = e
381 381
382 382 return index, nodemap, cache
383 383
384 384 def packentry(self, entry, node, version):
385 385 p = struct.pack(indexformatng, *entry)
386 386 if not entry[3] and not getoffset(entry[0]) and entry[5] == nullrev:
387 387 p = struct.pack(versionformat, version) + p[4:]
388 388 return p
389 389
390 390 class revlog(object):
391 391 """
392 392 the underlying revision storage object
393 393
394 394 A revlog consists of two parts, an index and the revision data.
395 395
396 396 The index is a file with a fixed record size containing
397 397 information on each revision, includings its nodeid (hash), the
398 398 nodeids of its parents, the position and offset of its data within
399 399 the data file, and the revision it's based on. Finally, each entry
400 400 contains a linkrev entry that can serve as a pointer to external
401 401 data.
402 402
403 403 The revision data itself is a linear collection of data chunks.
404 404 Each chunk represents a revision and is usually represented as a
405 405 delta against the previous chunk. To bound lookup time, runs of
406 406 deltas are limited to about 2 times the length of the original
407 407 version data. This makes retrieval of a version proportional to
408 408 its size, or O(1) relative to the number of revisions.
409 409
410 410 Both pieces of the revlog are written to in an append-only
411 411 fashion, which means we never need to rewrite a file to insert or
412 412 remove data, and can use some simple techniques to avoid the need
413 413 for locking while reading.
414 414 """
415 415 def __init__(self, opener, indexfile):
416 416 """
417 417 create a revlog object
418 418
419 419 opener is a function that abstracts the file opening operation
420 420 and can be used to implement COW semantics or the like.
421 421 """
422 422 self.indexfile = indexfile
423 423 self.datafile = indexfile[:-2] + ".d"
424 424 self.opener = opener
425 425 self._cache = None
426 426 self._chunkcache = None
427 427 self.nodemap = {nullid: nullrev}
428 428 self.index = []
429 429
430 430 v = REVLOG_DEFAULT_VERSION
431 431 if hasattr(opener, "defversion"):
432 432 v = opener.defversion
433 433 if v & REVLOGNG:
434 434 v |= REVLOGNGINLINEDATA
435 435
436 436 i = ""
437 437 try:
438 438 f = self.opener(self.indexfile)
439 439 i = f.read(4)
440 440 f.seek(0)
441 441 if len(i) > 0:
442 442 v = struct.unpack(versionformat, i)[0]
443 443 except IOError, inst:
444 444 if inst.errno != errno.ENOENT:
445 445 raise
446 446
447 447 self.version = v
448 448 self._inline = v & REVLOGNGINLINEDATA
449 449 flags = v & ~0xFFFF
450 450 fmt = v & 0xFFFF
451 451 if fmt == REVLOGV0 and flags:
452 452 raise RevlogError(_("index %s unknown flags %#04x for format v0")
453 453 % (self.indexfile, flags >> 16))
454 454 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
455 455 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
456 456 % (self.indexfile, flags >> 16))
457 457 elif fmt > REVLOGNG:
458 458 raise RevlogError(_("index %s unknown format %d")
459 459 % (self.indexfile, fmt))
460 460
461 461 self._io = revlogio()
462 462 if self.version == REVLOGV0:
463 463 self._io = revlogoldio()
464 464 if i:
465 465 d = self._io.parseindex(f, self._inline)
466 466 self.index, self.nodemap, self._chunkcache = d
467 467
468 468 # add the magic null revision at -1
469 469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470 470
471 471 def _loadindex(self, start, end):
472 472 """load a block of indexes all at once from the lazy parser"""
473 473 if isinstance(self.index, lazyindex):
474 474 self.index.p.loadindex(start, end)
475 475
476 476 def _loadindexmap(self):
477 477 """loads both the map and the index from the lazy parser"""
478 478 if isinstance(self.index, lazyindex):
479 479 p = self.index.p
480 480 p.loadindex()
481 481 self.nodemap = p.map
482 482
483 483 def _loadmap(self):
484 484 """loads the map from the lazy parser"""
485 485 if isinstance(self.nodemap, lazymap):
486 486 self.nodemap.p.loadmap()
487 487 self.nodemap = self.nodemap.p.map
488 488
489 489 def tip(self):
490 490 return self.node(len(self.index) - 2)
491 491 def count(self):
492 492 return len(self.index) - 1
493 493
494 494 def rev(self, node):
495 495 try:
496 496 return self.nodemap[node]
497 497 except KeyError:
498 498 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
499 499 def node(self, rev):
500 500 return self.index[rev][7]
501 501 def linkrev(self, node):
502 502 return self.index[self.rev(node)][4]
503 503 def parents(self, node):
504 504 d = self.index[self.rev(node)][5:7]
505 505 return (self.node(d[0]), self.node(d[1]))
506 506 def parentrevs(self, rev):
507 507 return self.index[rev][5:7]
508 508 def start(self, rev):
509 509 return getoffset(self.index[rev][0])
510 510 def end(self, rev):
511 511 return self.start(rev) + self.length(rev)
512 512 def length(self, rev):
513 513 return self.index[rev][1]
514 514 def base(self, rev):
515 515 return self.index[rev][3]
516 516
517 517 def size(self, rev):
518 518 """return the length of the uncompressed text for a given revision"""
519 519 l = self.index[rev][2]
520 520 if l >= 0:
521 521 return l
522 522
523 523 t = self.revision(self.node(rev))
524 524 return len(t)
525 525
526 526 # alternate implementation, The advantage to this code is it
527 527 # will be faster for a single revision. But, the results are not
528 528 # cached, so finding the size of every revision will be slower.
529 529 """
530 530 if self.cache and self.cache[1] == rev:
531 531 return len(self.cache[2])
532 532
533 533 base = self.base(rev)
534 534 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
535 535 base = self.cache[1]
536 536 text = self.cache[2]
537 537 else:
538 538 text = self.revision(self.node(base))
539 539
540 540 l = len(text)
541 541 for x in xrange(base + 1, rev + 1):
542 542 l = mdiff.patchedsize(l, self.chunk(x))
543 543 return l
544 544 """
545 545
546 546 def reachable(self, node, stop=None):
547 547 """return a hash of all nodes ancestral to a given node, including
548 548 the node itself, stopping when stop is matched"""
549 549 reachable = {}
550 550 visit = [node]
551 551 reachable[node] = 1
552 552 if stop:
553 553 stopn = self.rev(stop)
554 554 else:
555 555 stopn = 0
556 556 while visit:
557 557 n = visit.pop(0)
558 558 if n == stop:
559 559 continue
560 560 if n == nullid:
561 561 continue
562 562 for p in self.parents(n):
563 563 if self.rev(p) < stopn:
564 564 continue
565 565 if p not in reachable:
566 566 reachable[p] = 1
567 567 visit.append(p)
568 568 return reachable
569 569
570 570 def nodesbetween(self, roots=None, heads=None):
571 571 """Return a tuple containing three elements. Elements 1 and 2 contain
572 572 a final list bases and heads after all the unreachable ones have been
573 573 pruned. Element 0 contains a topologically sorted list of all
574 574
575 575 nodes that satisfy these constraints:
576 576 1. All nodes must be descended from a node in roots (the nodes on
577 577 roots are considered descended from themselves).
578 578 2. All nodes must also be ancestors of a node in heads (the nodes in
579 579 heads are considered to be their own ancestors).
580 580
581 581 If roots is unspecified, nullid is assumed as the only root.
582 582 If heads is unspecified, it is taken to be the output of the
583 583 heads method (i.e. a list of all nodes in the repository that
584 584 have no children)."""
585 585 nonodes = ([], [], [])
586 586 if roots is not None:
587 587 roots = list(roots)
588 588 if not roots:
589 589 return nonodes
590 590 lowestrev = min([self.rev(n) for n in roots])
591 591 else:
592 592 roots = [nullid] # Everybody's a descendent of nullid
593 593 lowestrev = nullrev
594 594 if (lowestrev == nullrev) and (heads is None):
595 595 # We want _all_ the nodes!
596 596 return ([self.node(r) for r in xrange(0, self.count())],
597 597 [nullid], list(self.heads()))
598 598 if heads is None:
599 599 # All nodes are ancestors, so the latest ancestor is the last
600 600 # node.
601 601 highestrev = self.count() - 1
602 602 # Set ancestors to None to signal that every node is an ancestor.
603 603 ancestors = None
604 604 # Set heads to an empty dictionary for later discovery of heads
605 605 heads = {}
606 606 else:
607 607 heads = list(heads)
608 608 if not heads:
609 609 return nonodes
610 610 ancestors = {}
611 611 # Turn heads into a dictionary so we can remove 'fake' heads.
612 612 # Also, later we will be using it to filter out the heads we can't
613 613 # find from roots.
614 614 heads = dict.fromkeys(heads, 0)
615 615 # Start at the top and keep marking parents until we're done.
616 616 nodestotag = heads.keys()
617 617 # Remember where the top was so we can use it as a limit later.
618 618 highestrev = max([self.rev(n) for n in nodestotag])
619 619 while nodestotag:
620 620 # grab a node to tag
621 621 n = nodestotag.pop()
622 622 # Never tag nullid
623 623 if n == nullid:
624 624 continue
625 625 # A node's revision number represents its place in a
626 626 # topologically sorted list of nodes.
627 627 r = self.rev(n)
628 628 if r >= lowestrev:
629 629 if n not in ancestors:
630 630 # If we are possibly a descendent of one of the roots
631 631 # and we haven't already been marked as an ancestor
632 632 ancestors[n] = 1 # Mark as ancestor
633 633 # Add non-nullid parents to list of nodes to tag.
634 634 nodestotag.extend([p for p in self.parents(n) if
635 635 p != nullid])
636 636 elif n in heads: # We've seen it before, is it a fake head?
637 637 # So it is, real heads should not be the ancestors of
638 638 # any other heads.
639 639 heads.pop(n)
640 640 if not ancestors:
641 641 return nonodes
642 642 # Now that we have our set of ancestors, we want to remove any
643 643 # roots that are not ancestors.
644 644
645 645 # If one of the roots was nullid, everything is included anyway.
646 646 if lowestrev > nullrev:
647 647 # But, since we weren't, let's recompute the lowest rev to not
648 648 # include roots that aren't ancestors.
649 649
650 650 # Filter out roots that aren't ancestors of heads
651 651 roots = [n for n in roots if n in ancestors]
652 652 # Recompute the lowest revision
653 653 if roots:
654 654 lowestrev = min([self.rev(n) for n in roots])
655 655 else:
656 656 # No more roots? Return empty list
657 657 return nonodes
658 658 else:
659 659 # We are descending from nullid, and don't need to care about
660 660 # any other roots.
661 661 lowestrev = nullrev
662 662 roots = [nullid]
663 663 # Transform our roots list into a 'set' (i.e. a dictionary where the
664 664 # values don't matter.
665 665 descendents = dict.fromkeys(roots, 1)
666 666 # Also, keep the original roots so we can filter out roots that aren't
667 667 # 'real' roots (i.e. are descended from other roots).
668 668 roots = descendents.copy()
669 669 # Our topologically sorted list of output nodes.
670 670 orderedout = []
671 671 # Don't start at nullid since we don't want nullid in our output list,
672 672 # and if nullid shows up in descedents, empty parents will look like
673 673 # they're descendents.
674 674 for r in xrange(max(lowestrev, 0), highestrev + 1):
675 675 n = self.node(r)
676 676 isdescendent = False
677 677 if lowestrev == nullrev: # Everybody is a descendent of nullid
678 678 isdescendent = True
679 679 elif n in descendents:
680 680 # n is already a descendent
681 681 isdescendent = True
682 682 # This check only needs to be done here because all the roots
683 683 # will start being marked is descendents before the loop.
684 684 if n in roots:
685 685 # If n was a root, check if it's a 'real' root.
686 686 p = tuple(self.parents(n))
687 687 # If any of its parents are descendents, it's not a root.
688 688 if (p[0] in descendents) or (p[1] in descendents):
689 689 roots.pop(n)
690 690 else:
691 691 p = tuple(self.parents(n))
692 692 # A node is a descendent if either of its parents are
693 693 # descendents. (We seeded the dependents list with the roots
694 694 # up there, remember?)
695 695 if (p[0] in descendents) or (p[1] in descendents):
696 696 descendents[n] = 1
697 697 isdescendent = True
698 698 if isdescendent and ((ancestors is None) or (n in ancestors)):
699 699 # Only include nodes that are both descendents and ancestors.
700 700 orderedout.append(n)
701 701 if (ancestors is not None) and (n in heads):
702 702 # We're trying to figure out which heads are reachable
703 703 # from roots.
704 704 # Mark this head as having been reached
705 705 heads[n] = 1
706 706 elif ancestors is None:
707 707 # Otherwise, we're trying to discover the heads.
708 708 # Assume this is a head because if it isn't, the next step
709 709 # will eventually remove it.
710 710 heads[n] = 1
711 711 # But, obviously its parents aren't.
712 712 for p in self.parents(n):
713 713 heads.pop(p, None)
714 714 heads = [n for n in heads.iterkeys() if heads[n] != 0]
715 715 roots = roots.keys()
716 716 assert orderedout
717 717 assert roots
718 718 assert heads
719 719 return (orderedout, roots, heads)
720 720
721 721 def heads(self, start=None, stop=None):
722 722 """return the list of all nodes that have no children
723 723
724 724 if start is specified, only heads that are descendants of
725 725 start will be returned
726 726 if stop is specified, it will consider all the revs from stop
727 727 as if they had no children
728 728 """
729 729 if start is None:
730 730 start = nullid
731 731 if stop is None:
732 732 stop = []
733 733 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
734 734 startrev = self.rev(start)
735 735 reachable = {startrev: 1}
736 736 heads = {startrev: 1}
737 737
738 738 parentrevs = self.parentrevs
739 739 for r in xrange(startrev + 1, self.count()):
740 740 for p in parentrevs(r):
741 741 if p in reachable:
742 742 if r not in stoprevs:
743 743 reachable[r] = 1
744 744 heads[r] = 1
745 745 if p in heads and p not in stoprevs:
746 746 del heads[p]
747 747
748 748 return [self.node(r) for r in heads]
749 749
750 750 def children(self, node):
751 751 """find the children of a given node"""
752 752 c = []
753 753 p = self.rev(node)
754 754 for r in range(p + 1, self.count()):
755 755 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
756 756 if prevs:
757 757 for pr in prevs:
758 758 if pr == p:
759 759 c.append(self.node(r))
760 760 elif p == nullrev:
761 761 c.append(self.node(r))
762 762 return c
763 763
764 764 def _match(self, id):
765 765 if isinstance(id, (long, int)):
766 766 # rev
767 767 return self.node(id)
768 768 if len(id) == 20:
769 769 # possibly a binary node
770 770 # odds of a binary node being all hex in ASCII are 1 in 10**25
771 771 try:
772 772 node = id
773 773 r = self.rev(node) # quick search the index
774 774 return node
775 775 except LookupError:
776 776 pass # may be partial hex id
777 777 try:
778 778 # str(rev)
779 779 rev = int(id)
780 780 if str(rev) != id:
781 781 raise ValueError
782 782 if rev < 0:
783 783 rev = self.count() + rev
784 784 if rev < 0 or rev >= self.count():
785 785 raise ValueError
786 786 return self.node(rev)
787 787 except (ValueError, OverflowError):
788 788 pass
789 789 if len(id) == 40:
790 790 try:
791 791 # a full hex nodeid?
792 792 node = bin(id)
793 793 r = self.rev(node)
794 794 return node
795 795 except TypeError:
796 796 pass
797 797
798 798 def _partialmatch(self, id):
799 799 if len(id) < 40:
800 800 try:
801 801 # hex(node)[:...]
802 802 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
803 803 node = None
804 804 for n in self.nodemap:
805 805 if n.startswith(bin_id) and hex(n).startswith(id):
806 806 if node is not None:
807 807 raise LookupError(_("Ambiguous identifier"))
808 808 node = n
809 809 if node is not None:
810 810 return node
811 811 except TypeError:
812 812 pass
813 813
814 814 def lookup(self, id):
815 815 """locate a node based on:
816 816 - revision number or str(revision number)
817 817 - nodeid or subset of hex nodeid
818 818 """
819 819 n = self._match(id)
820 820 if n is not None:
821 821 return n
822 822 n = self._partialmatch(id)
823 823 if n:
824 824 return n
825 825
826 826 raise LookupError(_("No match found"))
827 827
828 828 def cmp(self, node, text):
829 829 """compare text with a given file revision"""
830 830 p1, p2 = self.parents(node)
831 831 return hash(text, p1, p2) != node
832 832
833 def diff(self, a, b):
834 """return a delta between two revisions"""
835 return mdiff.textdiff(a, b)
836
837 def patches(self, t, pl):
838 """apply a list of patches to a string"""
839 return mdiff.patches(t, pl)
840
841 833 def chunk(self, rev, df=None):
842 834 start, length = self.start(rev), self.length(rev)
843 835 if self._inline:
844 836 start += (rev + 1) * self._io.size
845 837 end = start + length
846 838 def loadcache(df):
847 839 cache_length = max(65536, length)
848 840 if not df:
849 841 if self._inline:
850 842 df = self.opener(self.indexfile)
851 843 else:
852 844 df = self.opener(self.datafile)
853 845 df.seek(start)
854 846 self._chunkcache = (start, df.read(cache_length))
855 847
856 848 if not self._chunkcache:
857 849 loadcache(df)
858 850
859 851 cache_start = self._chunkcache[0]
860 852 cache_end = cache_start + len(self._chunkcache[1])
861 853 if start >= cache_start and end <= cache_end:
862 854 # it is cached
863 855 offset = start - cache_start
864 856 else:
865 857 loadcache(df)
866 858 offset = 0
867 859
868 860 # avoid copying large chunks
869 861 c = self._chunkcache[1]
870 862 if len(c) > length:
871 863 c = c[offset:offset + length]
872 864
873 865 return decompress(c)
874 866
875 867 def delta(self, node):
876 868 """return or calculate a delta between a node and its predecessor"""
877 869 r = self.rev(node)
878 870 return self.revdiff(r - 1, r)
879 871
880 872 def revdiff(self, rev1, rev2):
881 873 """return or calculate a delta between two revisions"""
882 874 b1 = self.base(rev1)
883 875 b2 = self.base(rev2)
884 876 if b1 == b2 and rev1 + 1 == rev2:
885 877 return self.chunk(rev2)
886 878 else:
887 return self.diff(self.revision(self.node(rev1)),
888 self.revision(self.node(rev2)))
879 return mdiff.textdiff(self.revision(self.node(rev1)),
880 self.revision(self.node(rev2)))
889 881
890 882 def revision(self, node):
891 883 """return an uncompressed revision of a given"""
892 884 if node == nullid:
893 885 return ""
894 886 if self._cache and self._cache[0] == node:
895 887 return self._cache[2]
896 888
897 889 # look up what we need to read
898 890 text = None
899 891 rev = self.rev(node)
900 892 base = self.base(rev)
901 893
902 894 if self._inline:
903 895 # we probably have the whole chunk cached
904 896 df = None
905 897 else:
906 898 df = self.opener(self.datafile)
907 899
908 900 # do we have useful data cached?
909 901 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
910 902 base = self._cache[1]
911 903 text = self._cache[2]
912 904 self._loadindex(base, rev + 1)
913 905 else:
914 906 self._loadindex(base, rev + 1)
915 907 text = self.chunk(base, df=df)
916 908
917 bins = []
918 for r in xrange(base + 1, rev + 1):
919 bins.append(self.chunk(r, df=df))
920
921 text = self.patches(text, bins)
922
909 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
910 text = mdiff.patches(text, bins)
923 911 p1, p2 = self.parents(node)
924 912 if node != hash(text, p1, p2):
925 913 raise RevlogError(_("integrity check failed on %s:%d")
926 914 % (self.datafile, rev))
927 915
928 916 self._cache = (node, rev, text)
929 917 return text
930 918
931 919 def checkinlinesize(self, tr, fp=None):
932 920 if not self._inline:
933 921 return
934 922 if not fp:
935 923 fp = self.opener(self.indexfile, 'r')
936 924 fp.seek(0, 2)
937 925 size = fp.tell()
938 926 if size < 131072:
939 927 return
940 928 trinfo = tr.find(self.indexfile)
941 929 if trinfo == None:
942 930 raise RevlogError(_("%s not found in the transaction")
943 931 % self.indexfile)
944 932
945 933 trindex = trinfo[2]
946 934 dataoff = self.start(trindex)
947 935
948 936 tr.add(self.datafile, dataoff)
949 937 df = self.opener(self.datafile, 'w')
950 938 calc = self._io.size
951 939 for r in xrange(self.count()):
952 940 start = self.start(r) + (r + 1) * calc
953 941 length = self.length(r)
954 942 fp.seek(start)
955 943 d = fp.read(length)
956 944 df.write(d)
957 945 fp.close()
958 946 df.close()
959 947 fp = self.opener(self.indexfile, 'w', atomictemp=True)
960 948 self.version &= ~(REVLOGNGINLINEDATA)
961 949 self._inline = False
962 950 for i in xrange(self.count()):
963 951 e = self._io.packentry(self.index[i], self.node, self.version)
964 952 fp.write(e)
965 953
966 954 # if we don't call rename, the temp file will never replace the
967 955 # real index
968 956 fp.rename()
969 957
970 958 tr.replace(self.indexfile, trindex * calc)
971 959 self._chunkcache = None
972 960
973 961 def addrevision(self, text, transaction, link, p1, p2, d=None):
974 962 """add a revision to the log
975 963
976 964 text - the revision data to add
977 965 transaction - the transaction object used for rollback
978 966 link - the linkrev data to add
979 967 p1, p2 - the parent nodeids of the revision
980 968 d - an optional precomputed delta
981 969 """
982 970 dfh = None
983 971 if not self._inline:
984 972 dfh = self.opener(self.datafile, "a")
985 973 ifh = self.opener(self.indexfile, "a+")
986 974 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
987 975
988 976 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
989 977 node = hash(text, p1, p2)
990 978 if node in self.nodemap:
991 979 return node
992 980
993 981 curr = self.count()
994 982 prev = curr - 1
995 983 base = self.base(prev)
996 984 offset = self.end(prev)
997 985
998 986 if curr:
999 987 if not d:
1000 988 ptext = self.revision(self.node(prev))
1001 d = self.diff(ptext, text)
989 d = mdiff.textdiff(ptext, text)
1002 990 data = compress(d)
1003 991 l = len(data[1]) + len(data[0])
1004 992 dist = l + offset - self.start(base)
1005 993
1006 994 # full versions are inserted when the needed deltas
1007 995 # become comparable to the uncompressed text
1008 996 if not curr or dist > len(text) * 2:
1009 997 data = compress(text)
1010 998 l = len(data[1]) + len(data[0])
1011 999 base = curr
1012 1000
1013 1001 e = (offset_type(offset, 0), l, len(text),
1014 1002 base, link, self.rev(p1), self.rev(p2), node)
1015 1003 self.index.insert(-1, e)
1016 1004 self.nodemap[node] = curr
1017 1005
1018 1006 entry = self._io.packentry(e, self.node, self.version)
1019 1007 if not self._inline:
1020 1008 transaction.add(self.datafile, offset)
1021 1009 transaction.add(self.indexfile, curr * len(entry))
1022 1010 if data[0]:
1023 1011 dfh.write(data[0])
1024 1012 dfh.write(data[1])
1025 1013 dfh.flush()
1026 1014 ifh.write(entry)
1027 1015 else:
1028 1016 ifh.seek(0, 2)
1029 1017 transaction.add(self.indexfile, ifh.tell(), prev)
1030 1018 ifh.write(entry)
1031 1019 ifh.write(data[0])
1032 1020 ifh.write(data[1])
1033 1021 self.checkinlinesize(transaction, ifh)
1034 1022
1035 1023 self._cache = (node, curr, text)
1036 1024 return node
1037 1025
1038 1026 def ancestor(self, a, b):
1039 1027 """calculate the least common ancestor of nodes a and b"""
1040 1028
1041 1029 def parents(rev):
1042 1030 return [p for p in self.parentrevs(rev) if p != nullrev]
1043 1031
1044 1032 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1045 1033 if c is None:
1046 1034 return nullid
1047 1035
1048 1036 return self.node(c)
1049 1037
1050 1038 def group(self, nodelist, lookup, infocollect=None):
1051 1039 """calculate a delta group
1052 1040
1053 1041 Given a list of changeset revs, return a set of deltas and
1054 1042 metadata corresponding to nodes. the first delta is
1055 1043 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1056 1044 have this parent as it has all history before these
1057 1045 changesets. parent is parent[0]
1058 1046 """
1059 1047 revs = [self.rev(n) for n in nodelist]
1060 1048
1061 1049 # if we don't have any revisions touched by these changesets, bail
1062 1050 if not revs:
1063 1051 yield changegroup.closechunk()
1064 1052 return
1065 1053
1066 1054 # add the parent of the first rev
1067 1055 p = self.parents(self.node(revs[0]))[0]
1068 1056 revs.insert(0, self.rev(p))
1069 1057
1070 1058 # build deltas
1071 1059 for d in xrange(0, len(revs) - 1):
1072 1060 a, b = revs[d], revs[d + 1]
1073 1061 nb = self.node(b)
1074 1062
1075 1063 if infocollect is not None:
1076 1064 infocollect(nb)
1077 1065
1078 1066 d = self.revdiff(a, b)
1079 1067 p = self.parents(nb)
1080 1068 meta = nb + p[0] + p[1] + lookup(nb)
1081 1069 yield changegroup.genchunk("%s%s" % (meta, d))
1082 1070
1083 1071 yield changegroup.closechunk()
1084 1072
1085 1073 def addgroup(self, revs, linkmapper, transaction, unique=0):
1086 1074 """
1087 1075 add a delta group
1088 1076
1089 1077 given a set of deltas, add them to the revision log. the
1090 1078 first delta is against its parent, which should be in our
1091 1079 log, the rest are against the previous delta.
1092 1080 """
1093 1081
1094 1082 #track the base of the current delta log
1095 1083 r = self.count()
1096 1084 t = r - 1
1097 1085 node = None
1098 1086
1099 1087 base = prev = nullrev
1100 1088 start = end = textlen = 0
1101 1089 if r:
1102 1090 end = self.end(t)
1103 1091
1104 1092 ifh = self.opener(self.indexfile, "a+")
1105 1093 ifh.seek(0, 2)
1106 1094 transaction.add(self.indexfile, ifh.tell(), self.count())
1107 1095 if self._inline:
1108 1096 dfh = None
1109 1097 else:
1110 1098 transaction.add(self.datafile, end)
1111 1099 dfh = self.opener(self.datafile, "a")
1112 1100
1113 1101 # loop through our set of deltas
1114 1102 chain = None
1115 1103 for chunk in revs:
1116 1104 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1117 1105 link = linkmapper(cs)
1118 1106 if node in self.nodemap:
1119 1107 # this can happen if two branches make the same change
1120 1108 # if unique:
1121 1109 # raise RevlogError(_("already have %s") % hex(node[:4]))
1122 1110 chain = node
1123 1111 continue
1124 1112 delta = chunk[80:]
1125 1113
1126 1114 for p in (p1, p2):
1127 1115 if not p in self.nodemap:
1128 1116 raise LookupError(_("unknown parent %s") % short(p))
1129 1117
1130 1118 if not chain:
1131 1119 # retrieve the parent revision of the delta chain
1132 1120 chain = p1
1133 1121 if not chain in self.nodemap:
1134 1122 raise LookupError(_("unknown base %s") % short(chain[:4]))
1135 1123
1136 1124 # full versions are inserted when the needed deltas become
1137 1125 # comparable to the uncompressed text or when the previous
1138 1126 # version is not the one we have a delta against. We use
1139 1127 # the size of the previous full rev as a proxy for the
1140 1128 # current size.
1141 1129
1142 1130 if chain == prev:
1143 1131 tempd = compress(delta)
1144 1132 cdelta = tempd[0] + tempd[1]
1145 1133 textlen = mdiff.patchedsize(textlen, delta)
1146 1134
1147 1135 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1148 1136 # flush our writes here so we can read it in revision
1149 1137 if dfh:
1150 1138 dfh.flush()
1151 1139 ifh.flush()
1152 1140 text = self.revision(chain)
1153 text = self.patches(text, [delta])
1141 text = mdiff.patches(text, [delta])
1154 1142 chk = self._addrevision(text, transaction, link, p1, p2, None,
1155 1143 ifh, dfh)
1156 1144 if not dfh and not self._inline:
1157 1145 # addrevision switched from inline to conventional
1158 1146 # reopen the index
1159 1147 dfh = self.opener(self.datafile, "a")
1160 1148 ifh = self.opener(self.indexfile, "a")
1161 1149 if chk != node:
1162 1150 raise RevlogError(_("consistency error adding group"))
1163 1151 textlen = len(text)
1164 1152 else:
1165 1153 e = (offset_type(end, 0), len(cdelta), textlen, base,
1166 1154 link, self.rev(p1), self.rev(p2), node)
1167 1155 self.index.insert(-1, e)
1168 1156 self.nodemap[node] = r
1169 1157 entry = self._io.packentry(e, self.node, self.version)
1170 1158 if self._inline:
1171 1159 ifh.write(entry)
1172 1160 ifh.write(cdelta)
1173 1161 self.checkinlinesize(transaction, ifh)
1174 1162 if not self._inline:
1175 1163 dfh = self.opener(self.datafile, "a")
1176 1164 ifh = self.opener(self.indexfile, "a")
1177 1165 else:
1178 1166 dfh.write(cdelta)
1179 1167 ifh.write(entry)
1180 1168
1181 1169 t, r, chain, prev = r, r + 1, node, node
1182 1170 base = self.base(t)
1183 1171 start = self.start(base)
1184 1172 end = self.end(t)
1185 1173
1186 1174 return node
1187 1175
1188 1176 def strip(self, rev, minlink):
1189 1177 if self.count() == 0 or rev >= self.count():
1190 1178 return
1191 1179
1192 1180 if isinstance(self.index, lazyindex):
1193 1181 self._loadindexmap()
1194 1182
1195 1183 # When stripping away a revision, we need to make sure it
1196 1184 # does not actually belong to an older changeset.
1197 1185 # The minlink parameter defines the oldest revision
1198 1186 # we're allowed to strip away.
1199 1187 while minlink > self.index[rev][4]:
1200 1188 rev += 1
1201 1189 if rev >= self.count():
1202 1190 return
1203 1191
1204 1192 # first truncate the files on disk
1205 1193 end = self.start(rev)
1206 1194 if not self._inline:
1207 1195 df = self.opener(self.datafile, "a")
1208 1196 df.truncate(end)
1209 1197 end = rev * self._io.size
1210 1198 else:
1211 1199 end += rev * self._io.size
1212 1200
1213 1201 indexf = self.opener(self.indexfile, "a")
1214 1202 indexf.truncate(end)
1215 1203
1216 1204 # then reset internal state in memory to forget those revisions
1217 1205 self._cache = None
1218 1206 self._chunkcache = None
1219 1207 for x in xrange(rev, self.count()):
1220 1208 del self.nodemap[self.node(x)]
1221 1209
1222 1210 del self.index[rev:-1]
1223 1211
1224 1212 def checksize(self):
1225 1213 expected = 0
1226 1214 if self.count():
1227 1215 expected = self.end(self.count() - 1)
1228 1216
1229 1217 try:
1230 1218 f = self.opener(self.datafile)
1231 1219 f.seek(0, 2)
1232 1220 actual = f.tell()
1233 1221 dd = actual - expected
1234 1222 except IOError, inst:
1235 1223 if inst.errno != errno.ENOENT:
1236 1224 raise
1237 1225 dd = 0
1238 1226
1239 1227 try:
1240 1228 f = self.opener(self.indexfile)
1241 1229 f.seek(0, 2)
1242 1230 actual = f.tell()
1243 1231 s = self._io.size
1244 1232 i = actual / s
1245 1233 di = actual - (i * s)
1246 1234 if self._inline:
1247 1235 databytes = 0
1248 1236 for r in xrange(self.count()):
1249 1237 databytes += self.length(r)
1250 1238 dd = 0
1251 1239 di = actual - self.count() * s - databytes
1252 1240 except IOError, inst:
1253 1241 if inst.errno != errno.ENOENT:
1254 1242 raise
1255 1243 di = 0
1256 1244
1257 1245 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now