##// END OF EJS Templates
revlog: refactor chunk cache interface again...
Matt Mackall -
r8650:ef393d6e default
parent child Browse files
Show More
@@ -1,1389 +1,1376 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf):
122 def __init__(self, dataf):
123 try:
123 try:
124 size = util.fstat(dataf).st_size
124 size = util.fstat(dataf).st_size
125 except AttributeError:
125 except AttributeError:
126 size = 0
126 size = 0
127 self.dataf = dataf
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
129 self.datasize = size
130 self.l = size/self.s
130 self.l = size/self.s
131 self.index = [None] * self.l
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
132 self.map = {nullid: nullrev}
133 self.allmap = 0
133 self.allmap = 0
134 self.all = 0
134 self.all = 0
135 self.mapfind_count = 0
135 self.mapfind_count = 0
136
136
137 def loadmap(self):
137 def loadmap(self):
138 """
138 """
139 during a commit, we need to make sure the rev being added is
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
142 which takes much less time.
143 """
143 """
144 if self.allmap:
144 if self.allmap:
145 return
145 return
146 end = self.datasize
146 end = self.datasize
147 self.allmap = 1
147 self.allmap = 1
148 cur = 0
148 cur = 0
149 count = 0
149 count = 0
150 blocksize = self.s * 256
150 blocksize = self.s * 256
151 self.dataf.seek(0)
151 self.dataf.seek(0)
152 while cur < end:
152 while cur < end:
153 data = self.dataf.read(blocksize)
153 data = self.dataf.read(blocksize)
154 off = 0
154 off = 0
155 for x in xrange(256):
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
157 self.map[n] = count
158 count += 1
158 count += 1
159 if count >= self.l:
159 if count >= self.l:
160 break
160 break
161 off += self.s
161 off += self.s
162 cur += blocksize
162 cur += blocksize
163
163
164 def loadblock(self, blockstart, blocksize, data=None):
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
165 if self.all:
166 return
166 return
167 if data is None:
167 if data is None:
168 self.dataf.seek(blockstart)
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
175 lend = len(data) / self.s
176 i = blockstart / self.s
176 i = blockstart / self.s
177 off = 0
177 off = 0
178 # lazyindex supports __delitem__
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
180 lend = len(self.index) - i
181 for x in xrange(lend):
181 for x in xrange(lend):
182 if self.index[i + x] is None:
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
183 b = data[off : off + self.s]
184 self.index[i + x] = b
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
186 self.map[n] = i + x
187 off += self.s
187 off += self.s
188
188
189 def findnode(self, node):
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
191 if self.allmap:
192 return None
192 return None
193
193
194 # hg log will cause many many searches for the manifest
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
195 # nodes. After we get called a few times, just load the whole
196 # thing.
196 # thing.
197 if self.mapfind_count > 8:
197 if self.mapfind_count > 8:
198 self.loadmap()
198 self.loadmap()
199 if node in self.map:
199 if node in self.map:
200 return node
200 return node
201 return None
201 return None
202 self.mapfind_count += 1
202 self.mapfind_count += 1
203 last = self.l - 1
203 last = self.l - 1
204 while self.index[last] != None:
204 while self.index[last] != None:
205 if last == 0:
205 if last == 0:
206 self.all = 1
206 self.all = 1
207 self.allmap = 1
207 self.allmap = 1
208 return None
208 return None
209 last -= 1
209 last -= 1
210 end = (last + 1) * self.s
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
211 blocksize = self.s * 256
212 while end >= 0:
212 while end >= 0:
213 start = max(end - blocksize, 0)
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
215 data = self.dataf.read(end - start)
216 findend = end - start
216 findend = end - start
217 while True:
217 while True:
218 # we're searching backwards, so we have to make sure
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
220 off = data.find(node, 0, findend)
221 findend = off
221 findend = off
222 if off >= 0:
222 if off >= 0:
223 i = off / self.s
223 i = off / self.s
224 off = i * self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
226 if n == node:
227 self.map[n] = i + start / self.s
227 self.map[n] = i + start / self.s
228 return node
228 return node
229 else:
229 else:
230 break
230 break
231 end -= blocksize
231 end -= blocksize
232 return None
232 return None
233
233
234 def loadindex(self, i=None, end=None):
234 def loadindex(self, i=None, end=None):
235 if self.all:
235 if self.all:
236 return
236 return
237 all = False
237 all = False
238 if i is None:
238 if i is None:
239 blockstart = 0
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
241 end = self.datasize
242 all = True
242 all = True
243 else:
243 else:
244 if end:
244 if end:
245 blockstart = i * self.s
245 blockstart = i * self.s
246 end = end * self.s
246 end = end * self.s
247 blocksize = end - blockstart
247 blocksize = end - blockstart
248 else:
248 else:
249 blockstart = (i & ~1023) * self.s
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
251 end = blockstart + blocksize
252 while blockstart < end:
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
254 blockstart += blocksize
255 if all:
255 if all:
256 self.all = True
256 self.all = True
257
257
258 class lazyindex(object):
258 class lazyindex(object):
259 """a lazy version of the index array"""
259 """a lazy version of the index array"""
260 def __init__(self, parser):
260 def __init__(self, parser):
261 self.p = parser
261 self.p = parser
262 def __len__(self):
262 def __len__(self):
263 return len(self.p.index)
263 return len(self.p.index)
264 def load(self, pos):
264 def load(self, pos):
265 if pos < 0:
265 if pos < 0:
266 pos += len(self.p.index)
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
268 return self.p.index[pos]
269 def __getitem__(self, pos):
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
273 def __delitem__(self, pos):
274 del self.p.index[pos]
274 del self.p.index[pos]
275 def insert(self, pos, e):
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
278 self.p.index.append(_pack(indexformatng, *e))
279
279
280 class lazymap(object):
280 class lazymap(object):
281 """a lazy version of the node map"""
281 """a lazy version of the node map"""
282 def __init__(self, parser):
282 def __init__(self, parser):
283 self.p = parser
283 self.p = parser
284 def load(self, key):
284 def load(self, key):
285 n = self.p.findnode(key)
285 n = self.p.findnode(key)
286 if n is None:
286 if n is None:
287 raise KeyError(key)
287 raise KeyError(key)
288 def __contains__(self, key):
288 def __contains__(self, key):
289 if key in self.p.map:
289 if key in self.p.map:
290 return True
290 return True
291 self.p.loadmap()
291 self.p.loadmap()
292 return key in self.p.map
292 return key in self.p.map
293 def __iter__(self):
293 def __iter__(self):
294 yield nullid
294 yield nullid
295 for i in xrange(self.p.l):
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
296 ret = self.p.index[i]
297 if not ret:
297 if not ret:
298 self.p.loadindex(i)
298 self.p.loadindex(i)
299 ret = self.p.index[i]
299 ret = self.p.index[i]
300 if isinstance(ret, str):
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
302 yield ret[7]
303 def __getitem__(self, key):
303 def __getitem__(self, key):
304 try:
304 try:
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 try:
307 try:
308 self.load(key)
308 self.load(key)
309 return self.p.map[key]
309 return self.p.map[key]
310 except KeyError:
310 except KeyError:
311 raise KeyError("node " + hex(key))
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
313 self.p.map[key] = val
314 def __delitem__(self, key):
314 def __delitem__(self, key):
315 del self.p.map[key]
315 del self.p.map[key]
316
316
317 indexformatv0 = ">4l20s20s20s"
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
318 v0shaoffset = 56
319
319
320 class revlogoldio(object):
320 class revlogoldio(object):
321 def __init__(self):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
322 self.size = struct.calcsize(indexformatv0)
323
323
324 def parseindex(self, fp, data, inline):
324 def parseindex(self, fp, data, inline):
325 s = self.size
325 s = self.size
326 index = []
326 index = []
327 nodemap = {nullid: nullrev}
327 nodemap = {nullid: nullrev}
328 n = off = 0
328 n = off = 0
329 if len(data) == _prereadsize:
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
330 data += fp.read() # read the rest
331 l = len(data)
331 l = len(data)
332 while off + s <= l:
332 while off + s <= l:
333 cur = data[off:off + s]
333 cur = data[off:off + s]
334 off += s
334 off += s
335 e = _unpack(indexformatv0, cur)
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
339 index.append(e2)
340 nodemap[e[6]] = n
340 nodemap[e[6]] = n
341 n += 1
341 n += 1
342
342
343 return index, nodemap, None
343 return index, nodemap, None
344
344
345 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
347 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
348 return _pack(indexformatv0, *e2)
349
349
350 # index ng:
350 # index ng:
351 # 6 bytes offset
351 # 6 bytes offset
352 # 2 bytes flags
352 # 2 bytes flags
353 # 4 bytes compressed length
353 # 4 bytes compressed length
354 # 4 bytes uncompressed length
354 # 4 bytes uncompressed length
355 # 4 bytes: base rev
355 # 4 bytes: base rev
356 # 4 bytes link rev
356 # 4 bytes link rev
357 # 4 bytes parent 1 rev
357 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
358 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
359 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
360 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
361 ngshaoffset = 32
362 versionformat = ">I"
362 versionformat = ">I"
363
363
364 class revlogio(object):
364 class revlogio(object):
365 def __init__(self):
365 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
366 self.size = struct.calcsize(indexformatng)
367
367
368 def parseindex(self, fp, data, inline):
368 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
369 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
370 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
371 # big index, let's parse it on demand
372 parser = lazyparser(fp)
372 parser = lazyparser(fp)
373 index = lazyindex(parser)
373 index = lazyindex(parser)
374 nodemap = lazymap(parser)
374 nodemap = lazymap(parser)
375 e = list(index[0])
375 e = list(index[0])
376 type = gettype(e[0])
376 type = gettype(e[0])
377 e[0] = offset_type(0, type)
377 e[0] = offset_type(0, type)
378 index[0] = e
378 index[0] = e
379 return index, nodemap, None
379 return index, nodemap, None
380 else:
380 else:
381 data += fp.read()
381 data += fp.read()
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
473 self._chunkclear()
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # alternate implementation, The advantage to this code is it
538 # alternate implementation, The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. But, the results are not
540 # cached, so finding the size of every revision will be slower.
540 # cached, so finding the size of every revision will be slower.
541 """
541 """
542 if self.cache and self.cache[1] == rev:
542 if self.cache and self.cache[1] == rev:
543 return len(self.cache[2])
543 return len(self.cache[2])
544
544
545 base = self.base(rev)
545 base = self.base(rev)
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 base = self.cache[1]
547 base = self.cache[1]
548 text = self.cache[2]
548 text = self.cache[2]
549 else:
549 else:
550 text = self.revision(self.node(base))
550 text = self.revision(self.node(base))
551
551
552 l = len(text)
552 l = len(text)
553 for x in xrange(base + 1, rev + 1):
553 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x))
554 l = mdiff.patchedsize(l, self._chunk(x))
555 return l
555 return l
556 """
556 """
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
559 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
561 reachable = set((node,))
562 visit = [node]
562 visit = [node]
563 if stop:
563 if stop:
564 stopn = self.rev(stop)
564 stopn = self.rev(stop)
565 else:
565 else:
566 stopn = 0
566 stopn = 0
567 while visit:
567 while visit:
568 n = visit.pop(0)
568 n = visit.pop(0)
569 if n == stop:
569 if n == stop:
570 continue
570 continue
571 if n == nullid:
571 if n == nullid:
572 continue
572 continue
573 for p in self.parents(n):
573 for p in self.parents(n):
574 if self.rev(p) < stopn:
574 if self.rev(p) < stopn:
575 continue
575 continue
576 if p not in reachable:
576 if p not in reachable:
577 reachable.add(p)
577 reachable.add(p)
578 visit.append(p)
578 visit.append(p)
579 return reachable
579 return reachable
580
580
581 def ancestors(self, *revs):
581 def ancestors(self, *revs):
582 'Generate the ancestors of revs using a breadth-first visit'
582 'Generate the ancestors of revs using a breadth-first visit'
583 visit = list(revs)
583 visit = list(revs)
584 seen = set([nullrev])
584 seen = set([nullrev])
585 while visit:
585 while visit:
586 for parent in self.parentrevs(visit.pop(0)):
586 for parent in self.parentrevs(visit.pop(0)):
587 if parent not in seen:
587 if parent not in seen:
588 visit.append(parent)
588 visit.append(parent)
589 seen.add(parent)
589 seen.add(parent)
590 yield parent
590 yield parent
591
591
592 def descendants(self, *revs):
592 def descendants(self, *revs):
593 'Generate the descendants of revs in topological order'
593 'Generate the descendants of revs in topological order'
594 seen = set(revs)
594 seen = set(revs)
595 for i in xrange(min(revs) + 1, len(self)):
595 for i in xrange(min(revs) + 1, len(self)):
596 for x in self.parentrevs(i):
596 for x in self.parentrevs(i):
597 if x != nullrev and x in seen:
597 if x != nullrev and x in seen:
598 seen.add(i)
598 seen.add(i)
599 yield i
599 yield i
600 break
600 break
601
601
602 def findmissing(self, common=None, heads=None):
602 def findmissing(self, common=None, heads=None):
603 '''
603 '''
604 returns the topologically sorted list of nodes from the set:
604 returns the topologically sorted list of nodes from the set:
605 missing = (ancestors(heads) \ ancestors(common))
605 missing = (ancestors(heads) \ ancestors(common))
606
606
607 where ancestors() is the set of ancestors from heads, heads included
607 where ancestors() is the set of ancestors from heads, heads included
608
608
609 if heads is None, the heads of the revlog are used
609 if heads is None, the heads of the revlog are used
610 if common is None, nullid is assumed to be a common node
610 if common is None, nullid is assumed to be a common node
611 '''
611 '''
612 if common is None:
612 if common is None:
613 common = [nullid]
613 common = [nullid]
614 if heads is None:
614 if heads is None:
615 heads = self.heads()
615 heads = self.heads()
616
616
617 common = [self.rev(n) for n in common]
617 common = [self.rev(n) for n in common]
618 heads = [self.rev(n) for n in heads]
618 heads = [self.rev(n) for n in heads]
619
619
620 # we want the ancestors, but inclusive
620 # we want the ancestors, but inclusive
621 has = set(self.ancestors(*common))
621 has = set(self.ancestors(*common))
622 has.add(nullrev)
622 has.add(nullrev)
623 has.update(common)
623 has.update(common)
624
624
625 # take all ancestors from heads that aren't in has
625 # take all ancestors from heads that aren't in has
626 missing = set()
626 missing = set()
627 visit = [r for r in heads if r not in has]
627 visit = [r for r in heads if r not in has]
628 while visit:
628 while visit:
629 r = visit.pop(0)
629 r = visit.pop(0)
630 if r in missing:
630 if r in missing:
631 continue
631 continue
632 else:
632 else:
633 missing.add(r)
633 missing.add(r)
634 for p in self.parentrevs(r):
634 for p in self.parentrevs(r):
635 if p not in has:
635 if p not in has:
636 visit.append(p)
636 visit.append(p)
637 missing = list(missing)
637 missing = list(missing)
638 missing.sort()
638 missing.sort()
639 return [self.node(r) for r in missing]
639 return [self.node(r) for r in missing]
640
640
641 def nodesbetween(self, roots=None, heads=None):
641 def nodesbetween(self, roots=None, heads=None):
642 """Return a tuple containing three elements. Elements 1 and 2 contain
642 """Return a tuple containing three elements. Elements 1 and 2 contain
643 a final list bases and heads after all the unreachable ones have been
643 a final list bases and heads after all the unreachable ones have been
644 pruned. Element 0 contains a topologically sorted list of all
644 pruned. Element 0 contains a topologically sorted list of all
645
645
646 nodes that satisfy these constraints:
646 nodes that satisfy these constraints:
647 1. All nodes must be descended from a node in roots (the nodes on
647 1. All nodes must be descended from a node in roots (the nodes on
648 roots are considered descended from themselves).
648 roots are considered descended from themselves).
649 2. All nodes must also be ancestors of a node in heads (the nodes in
649 2. All nodes must also be ancestors of a node in heads (the nodes in
650 heads are considered to be their own ancestors).
650 heads are considered to be their own ancestors).
651
651
652 If roots is unspecified, nullid is assumed as the only root.
652 If roots is unspecified, nullid is assumed as the only root.
653 If heads is unspecified, it is taken to be the output of the
653 If heads is unspecified, it is taken to be the output of the
654 heads method (i.e. a list of all nodes in the repository that
654 heads method (i.e. a list of all nodes in the repository that
655 have no children)."""
655 have no children)."""
656 nonodes = ([], [], [])
656 nonodes = ([], [], [])
657 if roots is not None:
657 if roots is not None:
658 roots = list(roots)
658 roots = list(roots)
659 if not roots:
659 if not roots:
660 return nonodes
660 return nonodes
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 roots = [nullid] # Everybody's a descendent of nullid
663 roots = [nullid] # Everybody's a descendent of nullid
664 lowestrev = nullrev
664 lowestrev = nullrev
665 if (lowestrev == nullrev) and (heads is None):
665 if (lowestrev == nullrev) and (heads is None):
666 # We want _all_ the nodes!
666 # We want _all_ the nodes!
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 if heads is None:
668 if heads is None:
669 # All nodes are ancestors, so the latest ancestor is the last
669 # All nodes are ancestors, so the latest ancestor is the last
670 # node.
670 # node.
671 highestrev = len(self) - 1
671 highestrev = len(self) - 1
672 # Set ancestors to None to signal that every node is an ancestor.
672 # Set ancestors to None to signal that every node is an ancestor.
673 ancestors = None
673 ancestors = None
674 # Set heads to an empty dictionary for later discovery of heads
674 # Set heads to an empty dictionary for later discovery of heads
675 heads = {}
675 heads = {}
676 else:
676 else:
677 heads = list(heads)
677 heads = list(heads)
678 if not heads:
678 if not heads:
679 return nonodes
679 return nonodes
680 ancestors = set()
680 ancestors = set()
681 # Turn heads into a dictionary so we can remove 'fake' heads.
681 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Also, later we will be using it to filter out the heads we can't
682 # Also, later we will be using it to filter out the heads we can't
683 # find from roots.
683 # find from roots.
684 heads = dict.fromkeys(heads, 0)
684 heads = dict.fromkeys(heads, 0)
685 # Start at the top and keep marking parents until we're done.
685 # Start at the top and keep marking parents until we're done.
686 nodestotag = set(heads)
686 nodestotag = set(heads)
687 # Remember where the top was so we can use it as a limit later.
687 # Remember where the top was so we can use it as a limit later.
688 highestrev = max([self.rev(n) for n in nodestotag])
688 highestrev = max([self.rev(n) for n in nodestotag])
689 while nodestotag:
689 while nodestotag:
690 # grab a node to tag
690 # grab a node to tag
691 n = nodestotag.pop()
691 n = nodestotag.pop()
692 # Never tag nullid
692 # Never tag nullid
693 if n == nullid:
693 if n == nullid:
694 continue
694 continue
695 # A node's revision number represents its place in a
695 # A node's revision number represents its place in a
696 # topologically sorted list of nodes.
696 # topologically sorted list of nodes.
697 r = self.rev(n)
697 r = self.rev(n)
698 if r >= lowestrev:
698 if r >= lowestrev:
699 if n not in ancestors:
699 if n not in ancestors:
700 # If we are possibly a descendent of one of the roots
700 # If we are possibly a descendent of one of the roots
701 # and we haven't already been marked as an ancestor
701 # and we haven't already been marked as an ancestor
702 ancestors.add(n) # Mark as ancestor
702 ancestors.add(n) # Mark as ancestor
703 # Add non-nullid parents to list of nodes to tag.
703 # Add non-nullid parents to list of nodes to tag.
704 nodestotag.update([p for p in self.parents(n) if
704 nodestotag.update([p for p in self.parents(n) if
705 p != nullid])
705 p != nullid])
706 elif n in heads: # We've seen it before, is it a fake head?
706 elif n in heads: # We've seen it before, is it a fake head?
707 # So it is, real heads should not be the ancestors of
707 # So it is, real heads should not be the ancestors of
708 # any other heads.
708 # any other heads.
709 heads.pop(n)
709 heads.pop(n)
710 if not ancestors:
710 if not ancestors:
711 return nonodes
711 return nonodes
712 # Now that we have our set of ancestors, we want to remove any
712 # Now that we have our set of ancestors, we want to remove any
713 # roots that are not ancestors.
713 # roots that are not ancestors.
714
714
715 # If one of the roots was nullid, everything is included anyway.
715 # If one of the roots was nullid, everything is included anyway.
716 if lowestrev > nullrev:
716 if lowestrev > nullrev:
717 # But, since we weren't, let's recompute the lowest rev to not
717 # But, since we weren't, let's recompute the lowest rev to not
718 # include roots that aren't ancestors.
718 # include roots that aren't ancestors.
719
719
720 # Filter out roots that aren't ancestors of heads
720 # Filter out roots that aren't ancestors of heads
721 roots = [n for n in roots if n in ancestors]
721 roots = [n for n in roots if n in ancestors]
722 # Recompute the lowest revision
722 # Recompute the lowest revision
723 if roots:
723 if roots:
724 lowestrev = min([self.rev(n) for n in roots])
724 lowestrev = min([self.rev(n) for n in roots])
725 else:
725 else:
726 # No more roots? Return empty list
726 # No more roots? Return empty list
727 return nonodes
727 return nonodes
728 else:
728 else:
729 # We are descending from nullid, and don't need to care about
729 # We are descending from nullid, and don't need to care about
730 # any other roots.
730 # any other roots.
731 lowestrev = nullrev
731 lowestrev = nullrev
732 roots = [nullid]
732 roots = [nullid]
733 # Transform our roots list into a set.
733 # Transform our roots list into a set.
734 descendents = set(roots)
734 descendents = set(roots)
735 # Also, keep the original roots so we can filter out roots that aren't
735 # Also, keep the original roots so we can filter out roots that aren't
736 # 'real' roots (i.e. are descended from other roots).
736 # 'real' roots (i.e. are descended from other roots).
737 roots = descendents.copy()
737 roots = descendents.copy()
738 # Our topologically sorted list of output nodes.
738 # Our topologically sorted list of output nodes.
739 orderedout = []
739 orderedout = []
740 # Don't start at nullid since we don't want nullid in our output list,
740 # Don't start at nullid since we don't want nullid in our output list,
741 # and if nullid shows up in descedents, empty parents will look like
741 # and if nullid shows up in descedents, empty parents will look like
742 # they're descendents.
742 # they're descendents.
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 n = self.node(r)
744 n = self.node(r)
745 isdescendent = False
745 isdescendent = False
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 isdescendent = True
747 isdescendent = True
748 elif n in descendents:
748 elif n in descendents:
749 # n is already a descendent
749 # n is already a descendent
750 isdescendent = True
750 isdescendent = True
751 # This check only needs to be done here because all the roots
751 # This check only needs to be done here because all the roots
752 # will start being marked is descendents before the loop.
752 # will start being marked is descendents before the loop.
753 if n in roots:
753 if n in roots:
754 # If n was a root, check if it's a 'real' root.
754 # If n was a root, check if it's a 'real' root.
755 p = tuple(self.parents(n))
755 p = tuple(self.parents(n))
756 # If any of its parents are descendents, it's not a root.
756 # If any of its parents are descendents, it's not a root.
757 if (p[0] in descendents) or (p[1] in descendents):
757 if (p[0] in descendents) or (p[1] in descendents):
758 roots.remove(n)
758 roots.remove(n)
759 else:
759 else:
760 p = tuple(self.parents(n))
760 p = tuple(self.parents(n))
761 # A node is a descendent if either of its parents are
761 # A node is a descendent if either of its parents are
762 # descendents. (We seeded the dependents list with the roots
762 # descendents. (We seeded the dependents list with the roots
763 # up there, remember?)
763 # up there, remember?)
764 if (p[0] in descendents) or (p[1] in descendents):
764 if (p[0] in descendents) or (p[1] in descendents):
765 descendents.add(n)
765 descendents.add(n)
766 isdescendent = True
766 isdescendent = True
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 # Only include nodes that are both descendents and ancestors.
768 # Only include nodes that are both descendents and ancestors.
769 orderedout.append(n)
769 orderedout.append(n)
770 if (ancestors is not None) and (n in heads):
770 if (ancestors is not None) and (n in heads):
771 # We're trying to figure out which heads are reachable
771 # We're trying to figure out which heads are reachable
772 # from roots.
772 # from roots.
773 # Mark this head as having been reached
773 # Mark this head as having been reached
774 heads[n] = 1
774 heads[n] = 1
775 elif ancestors is None:
775 elif ancestors is None:
776 # Otherwise, we're trying to discover the heads.
776 # Otherwise, we're trying to discover the heads.
777 # Assume this is a head because if it isn't, the next step
777 # Assume this is a head because if it isn't, the next step
778 # will eventually remove it.
778 # will eventually remove it.
779 heads[n] = 1
779 heads[n] = 1
780 # But, obviously its parents aren't.
780 # But, obviously its parents aren't.
781 for p in self.parents(n):
781 for p in self.parents(n):
782 heads.pop(p, None)
782 heads.pop(p, None)
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 roots = list(roots)
784 roots = list(roots)
785 assert orderedout
785 assert orderedout
786 assert roots
786 assert roots
787 assert heads
787 assert heads
788 return (orderedout, roots, heads)
788 return (orderedout, roots, heads)
789
789
790 def heads(self, start=None, stop=None):
790 def heads(self, start=None, stop=None):
791 """return the list of all nodes that have no children
791 """return the list of all nodes that have no children
792
792
793 if start is specified, only heads that are descendants of
793 if start is specified, only heads that are descendants of
794 start will be returned
794 start will be returned
795 if stop is specified, it will consider all the revs from stop
795 if stop is specified, it will consider all the revs from stop
796 as if they had no children
796 as if they had no children
797 """
797 """
798 if start is None and stop is None:
798 if start is None and stop is None:
799 count = len(self)
799 count = len(self)
800 if not count:
800 if not count:
801 return [nullid]
801 return [nullid]
802 ishead = [1] * (count + 1)
802 ishead = [1] * (count + 1)
803 index = self.index
803 index = self.index
804 for r in xrange(count):
804 for r in xrange(count):
805 e = index[r]
805 e = index[r]
806 ishead[e[5]] = ishead[e[6]] = 0
806 ishead[e[5]] = ishead[e[6]] = 0
807 return [self.node(r) for r in xrange(count) if ishead[r]]
807 return [self.node(r) for r in xrange(count) if ishead[r]]
808
808
809 if start is None:
809 if start is None:
810 start = nullid
810 start = nullid
811 if stop is None:
811 if stop is None:
812 stop = []
812 stop = []
813 stoprevs = set([self.rev(n) for n in stop])
813 stoprevs = set([self.rev(n) for n in stop])
814 startrev = self.rev(start)
814 startrev = self.rev(start)
815 reachable = set((startrev,))
815 reachable = set((startrev,))
816 heads = set((startrev,))
816 heads = set((startrev,))
817
817
818 parentrevs = self.parentrevs
818 parentrevs = self.parentrevs
819 for r in xrange(startrev + 1, len(self)):
819 for r in xrange(startrev + 1, len(self)):
820 for p in parentrevs(r):
820 for p in parentrevs(r):
821 if p in reachable:
821 if p in reachable:
822 if r not in stoprevs:
822 if r not in stoprevs:
823 reachable.add(r)
823 reachable.add(r)
824 heads.add(r)
824 heads.add(r)
825 if p in heads and p not in stoprevs:
825 if p in heads and p not in stoprevs:
826 heads.remove(p)
826 heads.remove(p)
827
827
828 return [self.node(r) for r in heads]
828 return [self.node(r) for r in heads]
829
829
830 def children(self, node):
830 def children(self, node):
831 """find the children of a given node"""
831 """find the children of a given node"""
832 c = []
832 c = []
833 p = self.rev(node)
833 p = self.rev(node)
834 for r in range(p + 1, len(self)):
834 for r in range(p + 1, len(self)):
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 if prevs:
836 if prevs:
837 for pr in prevs:
837 for pr in prevs:
838 if pr == p:
838 if pr == p:
839 c.append(self.node(r))
839 c.append(self.node(r))
840 elif p == nullrev:
840 elif p == nullrev:
841 c.append(self.node(r))
841 c.append(self.node(r))
842 return c
842 return c
843
843
844 def _match(self, id):
844 def _match(self, id):
845 if isinstance(id, (long, int)):
845 if isinstance(id, (long, int)):
846 # rev
846 # rev
847 return self.node(id)
847 return self.node(id)
848 if len(id) == 20:
848 if len(id) == 20:
849 # possibly a binary node
849 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
851 try:
852 node = id
852 node = id
853 self.rev(node) # quick search the index
853 self.rev(node) # quick search the index
854 return node
854 return node
855 except LookupError:
855 except LookupError:
856 pass # may be partial hex id
856 pass # may be partial hex id
857 try:
857 try:
858 # str(rev)
858 # str(rev)
859 rev = int(id)
859 rev = int(id)
860 if str(rev) != id:
860 if str(rev) != id:
861 raise ValueError
861 raise ValueError
862 if rev < 0:
862 if rev < 0:
863 rev = len(self) + rev
863 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
864 if rev < 0 or rev >= len(self):
865 raise ValueError
865 raise ValueError
866 return self.node(rev)
866 return self.node(rev)
867 except (ValueError, OverflowError):
867 except (ValueError, OverflowError):
868 pass
868 pass
869 if len(id) == 40:
869 if len(id) == 40:
870 try:
870 try:
871 # a full hex nodeid?
871 # a full hex nodeid?
872 node = bin(id)
872 node = bin(id)
873 self.rev(node)
873 self.rev(node)
874 return node
874 return node
875 except (TypeError, LookupError):
875 except (TypeError, LookupError):
876 pass
876 pass
877
877
878 def _partialmatch(self, id):
878 def _partialmatch(self, id):
879 if len(id) < 40:
879 if len(id) < 40:
880 try:
880 try:
881 # hex(node)[:...]
881 # hex(node)[:...]
882 l = len(id) / 2 # grab an even number of digits
882 l = len(id) / 2 # grab an even number of digits
883 bin_id = bin(id[:l*2])
883 bin_id = bin(id[:l*2])
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in nl if hex(n).startswith(id)]
885 nl = [n for n in nl if hex(n).startswith(id)]
886 if len(nl) > 0:
886 if len(nl) > 0:
887 if len(nl) == 1:
887 if len(nl) == 1:
888 return nl[0]
888 return nl[0]
889 raise LookupError(id, self.indexfile,
889 raise LookupError(id, self.indexfile,
890 _('ambiguous identifier'))
890 _('ambiguous identifier'))
891 return None
891 return None
892 except TypeError:
892 except TypeError:
893 pass
893 pass
894
894
895 def lookup(self, id):
895 def lookup(self, id):
896 """locate a node based on:
896 """locate a node based on:
897 - revision number or str(revision number)
897 - revision number or str(revision number)
898 - nodeid or subset of hex nodeid
898 - nodeid or subset of hex nodeid
899 """
899 """
900 n = self._match(id)
900 n = self._match(id)
901 if n is not None:
901 if n is not None:
902 return n
902 return n
903 n = self._partialmatch(id)
903 n = self._partialmatch(id)
904 if n:
904 if n:
905 return n
905 return n
906
906
907 raise LookupError(id, self.indexfile, _('no match found'))
907 raise LookupError(id, self.indexfile, _('no match found'))
908
908
909 def cmp(self, node, text):
909 def cmp(self, node, text):
910 """compare text with a given file revision"""
910 """compare text with a given file revision"""
911 p1, p2 = self.parents(node)
911 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
912 return hash(text, p1, p2) != node
913
913
914 def _addchunk(self, offset, data):
914 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
915 o, d = self._chunkcache
916 # try to add to existing cache
916 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data
918 self._chunkcache = o, d + data
919 else:
919 else:
920 self._chunkcache = offset, data
920 self._chunkcache = offset, data
921
921
922 def _loadchunk(self, offset, length, df=None):
922 def _loadchunk(self, offset, length):
923 if not df:
923 if self._inline:
924 if self._inline:
924 df = self.opener(self.indexfile)
925 df = self.opener(self.indexfile)
925 else:
926 else:
926 df = self.opener(self.datafile)
927 df = self.opener(self.datafile)
928
927
929 readahead = max(65536, length)
928 readahead = max(65536, length)
930 df.seek(offset)
929 df.seek(offset)
931 d = df.read(readahead)
930 d = df.read(readahead)
932 self._addchunk(offset, d)
931 self._addchunk(offset, d)
933 if readahead > length:
932 if readahead > length:
934 return d[:length]
933 return d[:length]
935 return d
934 return d
936
935
937 def _getchunk(self, offset, length, df=None):
936 def _getchunk(self, offset, length):
938 o, d = self._chunkcache
937 o, d = self._chunkcache
939 l = len(d)
938 l = len(d)
940
939
941 # is it in the cache?
940 # is it in the cache?
942 cachestart = offset - o
941 cachestart = offset - o
943 cacheend = cachestart + length
942 cacheend = cachestart + length
944 if cachestart >= 0 and cacheend <= l:
943 if cachestart >= 0 and cacheend <= l:
945 if cachestart == 0 and cacheend == l:
944 if cachestart == 0 and cacheend == l:
946 return d # avoid a copy
945 return d # avoid a copy
947 return d[cachestart:cacheend]
946 return d[cachestart:cacheend]
948
947
949 return self._loadchunk(offset, length, df)
948 return self._loadchunk(offset, length)
950
949
951 def _prime(self, startrev, endrev, df):
950 def _chunkraw(self, startrev, endrev):
952 start = self.start(startrev)
951 start = self.start(startrev)
953 end = self.end(endrev)
952 length = self.end(endrev) - start
954 if self._inline:
953 if self._inline:
955 start += (startrev + 1) * self._io.size
954 start += (startrev + 1) * self._io.size
956 end += (startrev + 1) * self._io.size
955 return self._getchunk(start, length)
957 self._loadchunk(start, end - start, df)
958
956
959 def chunk(self, rev, df=None):
957 def _chunk(self, rev):
960 start, length = self.start(rev), self.length(rev)
958 return decompress(self._chunkraw(rev, rev))
961 if self._inline:
959
962 start += (rev + 1) * self._io.size
960 def _chunkclear(self):
963 return decompress(self._getchunk(start, length, df))
961 self._chunkcache = (0, '')
964
962
965 def revdiff(self, rev1, rev2):
963 def revdiff(self, rev1, rev2):
966 """return or calculate a delta between two revisions"""
964 """return or calculate a delta between two revisions"""
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 return self.chunk(rev2)
966 return self._chunk(rev2)
969
967
970 return mdiff.textdiff(self.revision(self.node(rev1)),
968 return mdiff.textdiff(self.revision(self.node(rev1)),
971 self.revision(self.node(rev2)))
969 self.revision(self.node(rev2)))
972
970
973 def revision(self, node):
971 def revision(self, node):
974 """return an uncompressed revision of a given node"""
972 """return an uncompressed revision of a given node"""
975 if node == nullid:
973 if node == nullid:
976 return ""
974 return ""
977 if self._cache and self._cache[0] == node:
975 if self._cache and self._cache[0] == node:
978 return str(self._cache[2])
976 return str(self._cache[2])
979
977
980 # look up what we need to read
978 # look up what we need to read
981 text = None
979 text = None
982 rev = self.rev(node)
980 rev = self.rev(node)
983 base = self.base(rev)
981 base = self.base(rev)
984
982
985 # check rev flags
983 # check rev flags
986 if self.index[rev][0] & 0xFFFF:
984 if self.index[rev][0] & 0xFFFF:
987 raise RevlogError(_('incompatible revision flag %x') %
985 raise RevlogError(_('incompatible revision flag %x') %
988 (self.index[rev][0] & 0xFFFF))
986 (self.index[rev][0] & 0xFFFF))
989
987
990 df = None
991
992 # do we have useful data cached?
988 # do we have useful data cached?
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 base = self._cache[1]
990 base = self._cache[1]
995 text = str(self._cache[2])
991 text = str(self._cache[2])
996 self._loadindex(base, rev + 1)
997 if not self._inline and rev > base + 1:
998 df = self.opener(self.datafile)
999 self._prime(base, rev, df)
1000 else:
1001 self._loadindex(base, rev + 1)
1002 if not self._inline and rev > base:
1003 df = self.opener(self.datafile)
1004 self._prime(base, rev, df)
1005 text = self.chunk(base, df=df)
1006
992
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
993 self._loadindex(base, rev + 1)
994 self._chunkraw(base, rev)
995 if text is None:
996 text = self._chunk(base)
997
998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1008 text = mdiff.patches(text, bins)
999 text = mdiff.patches(text, bins)
1009 p1, p2 = self.parents(node)
1000 p1, p2 = self.parents(node)
1010 if node != hash(text, p1, p2):
1001 if node != hash(text, p1, p2):
1011 raise RevlogError(_("integrity check failed on %s:%d")
1002 raise RevlogError(_("integrity check failed on %s:%d")
1012 % (self.indexfile, rev))
1003 % (self.indexfile, rev))
1013
1004
1014 self._cache = (node, rev, text)
1005 self._cache = (node, rev, text)
1015 return text
1006 return text
1016
1007
1017 def checkinlinesize(self, tr, fp=None):
1008 def checkinlinesize(self, tr, fp=None):
1018 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 return
1010 return
1020
1011
1021 trinfo = tr.find(self.indexfile)
1012 trinfo = tr.find(self.indexfile)
1022 if trinfo is None:
1013 if trinfo is None:
1023 raise RevlogError(_("%s not found in the transaction")
1014 raise RevlogError(_("%s not found in the transaction")
1024 % self.indexfile)
1015 % self.indexfile)
1025
1016
1026 trindex = trinfo[2]
1017 trindex = trinfo[2]
1027 dataoff = self.start(trindex)
1018 dataoff = self.start(trindex)
1028
1019
1029 tr.add(self.datafile, dataoff)
1020 tr.add(self.datafile, dataoff)
1030
1021
1031 if fp:
1022 if fp:
1032 fp.flush()
1023 fp.flush()
1033 fp.close()
1024 fp.close()
1034
1025
1035 df = self.opener(self.datafile, 'w')
1026 df = self.opener(self.datafile, 'w')
1036 try:
1027 try:
1037 calc = self._io.size
1038 for r in self:
1028 for r in self:
1039 start = self.start(r) + (r + 1) * calc
1029 df.write(self._chunkraw(r, r))
1040 length = self.length(r)
1041 d = self._getchunk(start, length)
1042 df.write(d)
1043 finally:
1030 finally:
1044 df.close()
1031 df.close()
1045
1032
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1033 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 self.version &= ~(REVLOGNGINLINEDATA)
1034 self.version &= ~(REVLOGNGINLINEDATA)
1048 self._inline = False
1035 self._inline = False
1049 for i in self:
1036 for i in self:
1050 e = self._io.packentry(self.index[i], self.node, self.version, i)
1037 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 fp.write(e)
1038 fp.write(e)
1052
1039
1053 # if we don't call rename, the temp file will never replace the
1040 # if we don't call rename, the temp file will never replace the
1054 # real index
1041 # real index
1055 fp.rename()
1042 fp.rename()
1056
1043
1057 tr.replace(self.indexfile, trindex * calc)
1044 tr.replace(self.indexfile, trindex * self._io.size)
1058 self._chunkcache = (0, '')
1045 self._chunkclear()
1059
1046
1060 def addrevision(self, text, transaction, link, p1, p2, d=None):
1047 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 """add a revision to the log
1048 """add a revision to the log
1062
1049
1063 text - the revision data to add
1050 text - the revision data to add
1064 transaction - the transaction object used for rollback
1051 transaction - the transaction object used for rollback
1065 link - the linkrev data to add
1052 link - the linkrev data to add
1066 p1, p2 - the parent nodeids of the revision
1053 p1, p2 - the parent nodeids of the revision
1067 d - an optional precomputed delta
1054 d - an optional precomputed delta
1068 """
1055 """
1069 dfh = None
1056 dfh = None
1070 if not self._inline:
1057 if not self._inline:
1071 dfh = self.opener(self.datafile, "a")
1058 dfh = self.opener(self.datafile, "a")
1072 ifh = self.opener(self.indexfile, "a+")
1059 ifh = self.opener(self.indexfile, "a+")
1073 try:
1060 try:
1074 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1061 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 finally:
1062 finally:
1076 if dfh:
1063 if dfh:
1077 dfh.close()
1064 dfh.close()
1078 ifh.close()
1065 ifh.close()
1079
1066
1080 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1067 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 node = hash(text, p1, p2)
1068 node = hash(text, p1, p2)
1082 if node in self.nodemap:
1069 if node in self.nodemap:
1083 return node
1070 return node
1084
1071
1085 curr = len(self)
1072 curr = len(self)
1086 prev = curr - 1
1073 prev = curr - 1
1087 base = self.base(prev)
1074 base = self.base(prev)
1088 offset = self.end(prev)
1075 offset = self.end(prev)
1089
1076
1090 if curr:
1077 if curr:
1091 if not d:
1078 if not d:
1092 ptext = self.revision(self.node(prev))
1079 ptext = self.revision(self.node(prev))
1093 d = mdiff.textdiff(ptext, text)
1080 d = mdiff.textdiff(ptext, text)
1094 data = compress(d)
1081 data = compress(d)
1095 l = len(data[1]) + len(data[0])
1082 l = len(data[1]) + len(data[0])
1096 dist = l + offset - self.start(base)
1083 dist = l + offset - self.start(base)
1097
1084
1098 # full versions are inserted when the needed deltas
1085 # full versions are inserted when the needed deltas
1099 # become comparable to the uncompressed text
1086 # become comparable to the uncompressed text
1100 if not curr or dist > len(text) * 2:
1087 if not curr or dist > len(text) * 2:
1101 data = compress(text)
1088 data = compress(text)
1102 l = len(data[1]) + len(data[0])
1089 l = len(data[1]) + len(data[0])
1103 base = curr
1090 base = curr
1104
1091
1105 e = (offset_type(offset, 0), l, len(text),
1092 e = (offset_type(offset, 0), l, len(text),
1106 base, link, self.rev(p1), self.rev(p2), node)
1093 base, link, self.rev(p1), self.rev(p2), node)
1107 self.index.insert(-1, e)
1094 self.index.insert(-1, e)
1108 self.nodemap[node] = curr
1095 self.nodemap[node] = curr
1109
1096
1110 entry = self._io.packentry(e, self.node, self.version, curr)
1097 entry = self._io.packentry(e, self.node, self.version, curr)
1111 if not self._inline:
1098 if not self._inline:
1112 transaction.add(self.datafile, offset)
1099 transaction.add(self.datafile, offset)
1113 transaction.add(self.indexfile, curr * len(entry))
1100 transaction.add(self.indexfile, curr * len(entry))
1114 if data[0]:
1101 if data[0]:
1115 dfh.write(data[0])
1102 dfh.write(data[0])
1116 dfh.write(data[1])
1103 dfh.write(data[1])
1117 dfh.flush()
1104 dfh.flush()
1118 ifh.write(entry)
1105 ifh.write(entry)
1119 else:
1106 else:
1120 offset += curr * self._io.size
1107 offset += curr * self._io.size
1121 transaction.add(self.indexfile, offset, curr)
1108 transaction.add(self.indexfile, offset, curr)
1122 ifh.write(entry)
1109 ifh.write(entry)
1123 ifh.write(data[0])
1110 ifh.write(data[0])
1124 ifh.write(data[1])
1111 ifh.write(data[1])
1125 self.checkinlinesize(transaction, ifh)
1112 self.checkinlinesize(transaction, ifh)
1126
1113
1127 self._cache = (node, curr, text)
1114 self._cache = (node, curr, text)
1128 return node
1115 return node
1129
1116
1130 def ancestor(self, a, b):
1117 def ancestor(self, a, b):
1131 """calculate the least common ancestor of nodes a and b"""
1118 """calculate the least common ancestor of nodes a and b"""
1132
1119
1133 def parents(rev):
1120 def parents(rev):
1134 return [p for p in self.parentrevs(rev) if p != nullrev]
1121 return [p for p in self.parentrevs(rev) if p != nullrev]
1135
1122
1136 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1123 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 if c is None:
1124 if c is None:
1138 return nullid
1125 return nullid
1139
1126
1140 return self.node(c)
1127 return self.node(c)
1141
1128
1142 def group(self, nodelist, lookup, infocollect=None):
1129 def group(self, nodelist, lookup, infocollect=None):
1143 """calculate a delta group
1130 """calculate a delta group
1144
1131
1145 Given a list of changeset revs, return a set of deltas and
1132 Given a list of changeset revs, return a set of deltas and
1146 metadata corresponding to nodes. the first delta is
1133 metadata corresponding to nodes. the first delta is
1147 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1134 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 have this parent as it has all history before these
1135 have this parent as it has all history before these
1149 changesets. parent is parent[0]
1136 changesets. parent is parent[0]
1150 """
1137 """
1151
1138
1152 revs = [self.rev(n) for n in nodelist]
1139 revs = [self.rev(n) for n in nodelist]
1153
1140
1154 # if we don't have any revisions touched by these changesets, bail
1141 # if we don't have any revisions touched by these changesets, bail
1155 if not revs:
1142 if not revs:
1156 yield changegroup.closechunk()
1143 yield changegroup.closechunk()
1157 return
1144 return
1158
1145
1159 # add the parent of the first rev
1146 # add the parent of the first rev
1160 p = self.parentrevs(revs[0])[0]
1147 p = self.parentrevs(revs[0])[0]
1161 revs.insert(0, p)
1148 revs.insert(0, p)
1162
1149
1163 # build deltas
1150 # build deltas
1164 for d in xrange(len(revs) - 1):
1151 for d in xrange(len(revs) - 1):
1165 a, b = revs[d], revs[d + 1]
1152 a, b = revs[d], revs[d + 1]
1166 nb = self.node(b)
1153 nb = self.node(b)
1167
1154
1168 if infocollect is not None:
1155 if infocollect is not None:
1169 infocollect(nb)
1156 infocollect(nb)
1170
1157
1171 p = self.parents(nb)
1158 p = self.parents(nb)
1172 meta = nb + p[0] + p[1] + lookup(nb)
1159 meta = nb + p[0] + p[1] + lookup(nb)
1173 if a == -1:
1160 if a == -1:
1174 d = self.revision(nb)
1161 d = self.revision(nb)
1175 meta += mdiff.trivialdiffheader(len(d))
1162 meta += mdiff.trivialdiffheader(len(d))
1176 else:
1163 else:
1177 d = self.revdiff(a, b)
1164 d = self.revdiff(a, b)
1178 yield changegroup.chunkheader(len(meta) + len(d))
1165 yield changegroup.chunkheader(len(meta) + len(d))
1179 yield meta
1166 yield meta
1180 if len(d) > 2**20:
1167 if len(d) > 2**20:
1181 pos = 0
1168 pos = 0
1182 while pos < len(d):
1169 while pos < len(d):
1183 pos2 = pos + 2 ** 18
1170 pos2 = pos + 2 ** 18
1184 yield d[pos:pos2]
1171 yield d[pos:pos2]
1185 pos = pos2
1172 pos = pos2
1186 else:
1173 else:
1187 yield d
1174 yield d
1188
1175
1189 yield changegroup.closechunk()
1176 yield changegroup.closechunk()
1190
1177
1191 def addgroup(self, revs, linkmapper, transaction):
1178 def addgroup(self, revs, linkmapper, transaction):
1192 """
1179 """
1193 add a delta group
1180 add a delta group
1194
1181
1195 given a set of deltas, add them to the revision log. the
1182 given a set of deltas, add them to the revision log. the
1196 first delta is against its parent, which should be in our
1183 first delta is against its parent, which should be in our
1197 log, the rest are against the previous delta.
1184 log, the rest are against the previous delta.
1198 """
1185 """
1199
1186
1200 #track the base of the current delta log
1187 #track the base of the current delta log
1201 r = len(self)
1188 r = len(self)
1202 t = r - 1
1189 t = r - 1
1203 node = None
1190 node = None
1204
1191
1205 base = prev = nullrev
1192 base = prev = nullrev
1206 start = end = textlen = 0
1193 start = end = textlen = 0
1207 if r:
1194 if r:
1208 end = self.end(t)
1195 end = self.end(t)
1209
1196
1210 ifh = self.opener(self.indexfile, "a+")
1197 ifh = self.opener(self.indexfile, "a+")
1211 isize = r * self._io.size
1198 isize = r * self._io.size
1212 if self._inline:
1199 if self._inline:
1213 transaction.add(self.indexfile, end + isize, r)
1200 transaction.add(self.indexfile, end + isize, r)
1214 dfh = None
1201 dfh = None
1215 else:
1202 else:
1216 transaction.add(self.indexfile, isize, r)
1203 transaction.add(self.indexfile, isize, r)
1217 transaction.add(self.datafile, end)
1204 transaction.add(self.datafile, end)
1218 dfh = self.opener(self.datafile, "a")
1205 dfh = self.opener(self.datafile, "a")
1219
1206
1220 try:
1207 try:
1221 # loop through our set of deltas
1208 # loop through our set of deltas
1222 chain = None
1209 chain = None
1223 for chunk in revs:
1210 for chunk in revs:
1224 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1211 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1225 link = linkmapper(cs)
1212 link = linkmapper(cs)
1226 if node in self.nodemap:
1213 if node in self.nodemap:
1227 # this can happen if two branches make the same change
1214 # this can happen if two branches make the same change
1228 chain = node
1215 chain = node
1229 continue
1216 continue
1230 delta = buffer(chunk, 80)
1217 delta = buffer(chunk, 80)
1231 del chunk
1218 del chunk
1232
1219
1233 for p in (p1, p2):
1220 for p in (p1, p2):
1234 if not p in self.nodemap:
1221 if not p in self.nodemap:
1235 raise LookupError(p, self.indexfile, _('unknown parent'))
1222 raise LookupError(p, self.indexfile, _('unknown parent'))
1236
1223
1237 if not chain:
1224 if not chain:
1238 # retrieve the parent revision of the delta chain
1225 # retrieve the parent revision of the delta chain
1239 chain = p1
1226 chain = p1
1240 if not chain in self.nodemap:
1227 if not chain in self.nodemap:
1241 raise LookupError(chain, self.indexfile, _('unknown base'))
1228 raise LookupError(chain, self.indexfile, _('unknown base'))
1242
1229
1243 # full versions are inserted when the needed deltas become
1230 # full versions are inserted when the needed deltas become
1244 # comparable to the uncompressed text or when the previous
1231 # comparable to the uncompressed text or when the previous
1245 # version is not the one we have a delta against. We use
1232 # version is not the one we have a delta against. We use
1246 # the size of the previous full rev as a proxy for the
1233 # the size of the previous full rev as a proxy for the
1247 # current size.
1234 # current size.
1248
1235
1249 if chain == prev:
1236 if chain == prev:
1250 cdelta = compress(delta)
1237 cdelta = compress(delta)
1251 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1238 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1252 textlen = mdiff.patchedsize(textlen, delta)
1239 textlen = mdiff.patchedsize(textlen, delta)
1253
1240
1254 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1241 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1255 # flush our writes here so we can read it in revision
1242 # flush our writes here so we can read it in revision
1256 if dfh:
1243 if dfh:
1257 dfh.flush()
1244 dfh.flush()
1258 ifh.flush()
1245 ifh.flush()
1259 text = self.revision(chain)
1246 text = self.revision(chain)
1260 if len(text) == 0:
1247 if len(text) == 0:
1261 # skip over trivial delta header
1248 # skip over trivial delta header
1262 text = buffer(delta, 12)
1249 text = buffer(delta, 12)
1263 else:
1250 else:
1264 text = mdiff.patches(text, [delta])
1251 text = mdiff.patches(text, [delta])
1265 del delta
1252 del delta
1266 chk = self._addrevision(text, transaction, link, p1, p2, None,
1253 chk = self._addrevision(text, transaction, link, p1, p2, None,
1267 ifh, dfh)
1254 ifh, dfh)
1268 if not dfh and not self._inline:
1255 if not dfh and not self._inline:
1269 # addrevision switched from inline to conventional
1256 # addrevision switched from inline to conventional
1270 # reopen the index
1257 # reopen the index
1271 dfh = self.opener(self.datafile, "a")
1258 dfh = self.opener(self.datafile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1273 if chk != node:
1260 if chk != node:
1274 raise RevlogError(_("consistency error adding group"))
1261 raise RevlogError(_("consistency error adding group"))
1275 textlen = len(text)
1262 textlen = len(text)
1276 else:
1263 else:
1277 e = (offset_type(end, 0), cdeltalen, textlen, base,
1264 e = (offset_type(end, 0), cdeltalen, textlen, base,
1278 link, self.rev(p1), self.rev(p2), node)
1265 link, self.rev(p1), self.rev(p2), node)
1279 self.index.insert(-1, e)
1266 self.index.insert(-1, e)
1280 self.nodemap[node] = r
1267 self.nodemap[node] = r
1281 entry = self._io.packentry(e, self.node, self.version, r)
1268 entry = self._io.packentry(e, self.node, self.version, r)
1282 if self._inline:
1269 if self._inline:
1283 ifh.write(entry)
1270 ifh.write(entry)
1284 ifh.write(cdelta[0])
1271 ifh.write(cdelta[0])
1285 ifh.write(cdelta[1])
1272 ifh.write(cdelta[1])
1286 self.checkinlinesize(transaction, ifh)
1273 self.checkinlinesize(transaction, ifh)
1287 if not self._inline:
1274 if not self._inline:
1288 dfh = self.opener(self.datafile, "a")
1275 dfh = self.opener(self.datafile, "a")
1289 ifh = self.opener(self.indexfile, "a")
1276 ifh = self.opener(self.indexfile, "a")
1290 else:
1277 else:
1291 dfh.write(cdelta[0])
1278 dfh.write(cdelta[0])
1292 dfh.write(cdelta[1])
1279 dfh.write(cdelta[1])
1293 ifh.write(entry)
1280 ifh.write(entry)
1294
1281
1295 t, r, chain, prev = r, r + 1, node, node
1282 t, r, chain, prev = r, r + 1, node, node
1296 base = self.base(t)
1283 base = self.base(t)
1297 start = self.start(base)
1284 start = self.start(base)
1298 end = self.end(t)
1285 end = self.end(t)
1299 finally:
1286 finally:
1300 if dfh:
1287 if dfh:
1301 dfh.close()
1288 dfh.close()
1302 ifh.close()
1289 ifh.close()
1303
1290
1304 return node
1291 return node
1305
1292
1306 def strip(self, minlink, transaction):
1293 def strip(self, minlink, transaction):
1307 """truncate the revlog on the first revision with a linkrev >= minlink
1294 """truncate the revlog on the first revision with a linkrev >= minlink
1308
1295
1309 This function is called when we're stripping revision minlink and
1296 This function is called when we're stripping revision minlink and
1310 its descendants from the repository.
1297 its descendants from the repository.
1311
1298
1312 We have to remove all revisions with linkrev >= minlink, because
1299 We have to remove all revisions with linkrev >= minlink, because
1313 the equivalent changelog revisions will be renumbered after the
1300 the equivalent changelog revisions will be renumbered after the
1314 strip.
1301 strip.
1315
1302
1316 So we truncate the revlog on the first of these revisions, and
1303 So we truncate the revlog on the first of these revisions, and
1317 trust that the caller has saved the revisions that shouldn't be
1304 trust that the caller has saved the revisions that shouldn't be
1318 removed and that it'll readd them after this truncation.
1305 removed and that it'll readd them after this truncation.
1319 """
1306 """
1320 if len(self) == 0:
1307 if len(self) == 0:
1321 return
1308 return
1322
1309
1323 if isinstance(self.index, lazyindex):
1310 if isinstance(self.index, lazyindex):
1324 self._loadindexmap()
1311 self._loadindexmap()
1325
1312
1326 for rev in self:
1313 for rev in self:
1327 if self.index[rev][4] >= minlink:
1314 if self.index[rev][4] >= minlink:
1328 break
1315 break
1329 else:
1316 else:
1330 return
1317 return
1331
1318
1332 # first truncate the files on disk
1319 # first truncate the files on disk
1333 end = self.start(rev)
1320 end = self.start(rev)
1334 if not self._inline:
1321 if not self._inline:
1335 transaction.add(self.datafile, end)
1322 transaction.add(self.datafile, end)
1336 end = rev * self._io.size
1323 end = rev * self._io.size
1337 else:
1324 else:
1338 end += rev * self._io.size
1325 end += rev * self._io.size
1339
1326
1340 transaction.add(self.indexfile, end)
1327 transaction.add(self.indexfile, end)
1341
1328
1342 # then reset internal state in memory to forget those revisions
1329 # then reset internal state in memory to forget those revisions
1343 self._cache = None
1330 self._cache = None
1344 self._chunkcache = (0, '')
1331 self._chunkclear()
1345 for x in xrange(rev, len(self)):
1332 for x in xrange(rev, len(self)):
1346 del self.nodemap[self.node(x)]
1333 del self.nodemap[self.node(x)]
1347
1334
1348 del self.index[rev:-1]
1335 del self.index[rev:-1]
1349
1336
1350 def checksize(self):
1337 def checksize(self):
1351 expected = 0
1338 expected = 0
1352 if len(self):
1339 if len(self):
1353 expected = max(0, self.end(len(self) - 1))
1340 expected = max(0, self.end(len(self) - 1))
1354
1341
1355 try:
1342 try:
1356 f = self.opener(self.datafile)
1343 f = self.opener(self.datafile)
1357 f.seek(0, 2)
1344 f.seek(0, 2)
1358 actual = f.tell()
1345 actual = f.tell()
1359 dd = actual - expected
1346 dd = actual - expected
1360 except IOError, inst:
1347 except IOError, inst:
1361 if inst.errno != errno.ENOENT:
1348 if inst.errno != errno.ENOENT:
1362 raise
1349 raise
1363 dd = 0
1350 dd = 0
1364
1351
1365 try:
1352 try:
1366 f = self.opener(self.indexfile)
1353 f = self.opener(self.indexfile)
1367 f.seek(0, 2)
1354 f.seek(0, 2)
1368 actual = f.tell()
1355 actual = f.tell()
1369 s = self._io.size
1356 s = self._io.size
1370 i = max(0, actual / s)
1357 i = max(0, actual / s)
1371 di = actual - (i * s)
1358 di = actual - (i * s)
1372 if self._inline:
1359 if self._inline:
1373 databytes = 0
1360 databytes = 0
1374 for r in self:
1361 for r in self:
1375 databytes += max(0, self.length(r))
1362 databytes += max(0, self.length(r))
1376 dd = 0
1363 dd = 0
1377 di = actual - len(self) * s - databytes
1364 di = actual - len(self) * s - databytes
1378 except IOError, inst:
1365 except IOError, inst:
1379 if inst.errno != errno.ENOENT:
1366 if inst.errno != errno.ENOENT:
1380 raise
1367 raise
1381 di = 0
1368 di = 0
1382
1369
1383 return (dd, di)
1370 return (dd, di)
1384
1371
1385 def files(self):
1372 def files(self):
1386 res = [ self.indexfile ]
1373 res = [ self.indexfile ]
1387 if not self._inline:
1374 if not self._inline:
1388 res.append(self.datafile)
1375 res.append(self.datafile)
1389 return res
1376 return res
General Comments 0
You need to be logged in to leave comments. Login now