##// END OF EJS Templates
revlog.missing(): use sets instead of a dict
Benoit Boissinot -
r8453:d1ca637b default
parent child Browse files
Show More
@@ -1,1390 +1,1390
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
123 self.dataf = dataf
123 self.dataf = dataf
124 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
125 self.datasize = size
125 self.datasize = size
126 self.l = size/self.s
126 self.l = size/self.s
127 self.index = [None] * self.l
127 self.index = [None] * self.l
128 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
129 self.allmap = 0
129 self.allmap = 0
130 self.all = 0
130 self.all = 0
131 self.mapfind_count = 0
131 self.mapfind_count = 0
132
132
133 def loadmap(self):
133 def loadmap(self):
134 """
134 """
135 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
136 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
137 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
138 which takes much less time.
138 which takes much less time.
139 """
139 """
140 if self.allmap:
140 if self.allmap:
141 return
141 return
142 end = self.datasize
142 end = self.datasize
143 self.allmap = 1
143 self.allmap = 1
144 cur = 0
144 cur = 0
145 count = 0
145 count = 0
146 blocksize = self.s * 256
146 blocksize = self.s * 256
147 self.dataf.seek(0)
147 self.dataf.seek(0)
148 while cur < end:
148 while cur < end:
149 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
150 off = 0
150 off = 0
151 for x in xrange(256):
151 for x in xrange(256):
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 self.map[n] = count
153 self.map[n] = count
154 count += 1
154 count += 1
155 if count >= self.l:
155 if count >= self.l:
156 break
156 break
157 off += self.s
157 off += self.s
158 cur += blocksize
158 cur += blocksize
159
159
160 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
161 if self.all:
161 if self.all:
162 return
162 return
163 if data is None:
163 if data is None:
164 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
165 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
166 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
167 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
168 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
169 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
170 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
171 lend = len(data) / self.s
171 lend = len(data) / self.s
172 i = blockstart / self.s
172 i = blockstart / self.s
173 off = 0
173 off = 0
174 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
175 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
176 lend = len(self.index) - i
176 lend = len(self.index) - i
177 for x in xrange(lend):
177 for x in xrange(lend):
178 if self.index[i + x] == None:
178 if self.index[i + x] == None:
179 b = data[off : off + self.s]
179 b = data[off : off + self.s]
180 self.index[i + x] = b
180 self.index[i + x] = b
181 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
182 self.map[n] = i + x
182 self.map[n] = i + x
183 off += self.s
183 off += self.s
184
184
185 def findnode(self, node):
185 def findnode(self, node):
186 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
187 if self.allmap:
187 if self.allmap:
188 return None
188 return None
189
189
190 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
191 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
192 # thing.
192 # thing.
193 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
194 self.loadmap()
194 self.loadmap()
195 if node in self.map:
195 if node in self.map:
196 return node
196 return node
197 return None
197 return None
198 self.mapfind_count += 1
198 self.mapfind_count += 1
199 last = self.l - 1
199 last = self.l - 1
200 while self.index[last] != None:
200 while self.index[last] != None:
201 if last == 0:
201 if last == 0:
202 self.all = 1
202 self.all = 1
203 self.allmap = 1
203 self.allmap = 1
204 return None
204 return None
205 last -= 1
205 last -= 1
206 end = (last + 1) * self.s
206 end = (last + 1) * self.s
207 blocksize = self.s * 256
207 blocksize = self.s * 256
208 while end >= 0:
208 while end >= 0:
209 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
210 self.dataf.seek(start)
210 self.dataf.seek(start)
211 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
212 findend = end - start
212 findend = end - start
213 while True:
213 while True:
214 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
215 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
216 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
217 findend = off
217 findend = off
218 if off >= 0:
218 if off >= 0:
219 i = off / self.s
219 i = off / self.s
220 off = i * self.s
220 off = i * self.s
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 if n == node:
222 if n == node:
223 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
224 return node
224 return node
225 else:
225 else:
226 break
226 break
227 end -= blocksize
227 end -= blocksize
228 return None
228 return None
229
229
230 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
231 if self.all:
231 if self.all:
232 return
232 return
233 all = False
233 all = False
234 if i == None:
234 if i == None:
235 blockstart = 0
235 blockstart = 0
236 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
237 end = self.datasize
237 end = self.datasize
238 all = True
238 all = True
239 else:
239 else:
240 if end:
240 if end:
241 blockstart = i * self.s
241 blockstart = i * self.s
242 end = end * self.s
242 end = end * self.s
243 blocksize = end - blockstart
243 blocksize = end - blockstart
244 else:
244 else:
245 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
246 blocksize = self.s * 1024
246 blocksize = self.s * 1024
247 end = blockstart + blocksize
247 end = blockstart + blocksize
248 while blockstart < end:
248 while blockstart < end:
249 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
250 blockstart += blocksize
250 blockstart += blocksize
251 if all:
251 if all:
252 self.all = True
252 self.all = True
253
253
254 class lazyindex(object):
254 class lazyindex(object):
255 """a lazy version of the index array"""
255 """a lazy version of the index array"""
256 def __init__(self, parser):
256 def __init__(self, parser):
257 self.p = parser
257 self.p = parser
258 def __len__(self):
258 def __len__(self):
259 return len(self.p.index)
259 return len(self.p.index)
260 def load(self, pos):
260 def load(self, pos):
261 if pos < 0:
261 if pos < 0:
262 pos += len(self.p.index)
262 pos += len(self.p.index)
263 self.p.loadindex(pos)
263 self.p.loadindex(pos)
264 return self.p.index[pos]
264 return self.p.index[pos]
265 def __getitem__(self, pos):
265 def __getitem__(self, pos):
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
268 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
269 def __delitem__(self, pos):
269 def __delitem__(self, pos):
270 del self.p.index[pos]
270 del self.p.index[pos]
271 def insert(self, pos, e):
271 def insert(self, pos, e):
272 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 def append(self, e):
273 def append(self, e):
274 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
275
275
276 class lazymap(object):
276 class lazymap(object):
277 """a lazy version of the node map"""
277 """a lazy version of the node map"""
278 def __init__(self, parser):
278 def __init__(self, parser):
279 self.p = parser
279 self.p = parser
280 def load(self, key):
280 def load(self, key):
281 n = self.p.findnode(key)
281 n = self.p.findnode(key)
282 if n == None:
282 if n == None:
283 raise KeyError(key)
283 raise KeyError(key)
284 def __contains__(self, key):
284 def __contains__(self, key):
285 if key in self.p.map:
285 if key in self.p.map:
286 return True
286 return True
287 self.p.loadmap()
287 self.p.loadmap()
288 return key in self.p.map
288 return key in self.p.map
289 def __iter__(self):
289 def __iter__(self):
290 yield nullid
290 yield nullid
291 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if not ret:
293 if not ret:
294 self.p.loadindex(i)
294 self.p.loadindex(i)
295 ret = self.p.index[i]
295 ret = self.p.index[i]
296 if isinstance(ret, str):
296 if isinstance(ret, str):
297 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
298 yield ret[7]
298 yield ret[7]
299 def __getitem__(self, key):
299 def __getitem__(self, key):
300 try:
300 try:
301 return self.p.map[key]
301 return self.p.map[key]
302 except KeyError:
302 except KeyError:
303 try:
303 try:
304 self.load(key)
304 self.load(key)
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
308 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
309 self.p.map[key] = val
309 self.p.map[key] = val
310 def __delitem__(self, key):
310 def __delitem__(self, key):
311 del self.p.map[key]
311 del self.p.map[key]
312
312
313 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
314 v0shaoffset = 56
314 v0shaoffset = 56
315
315
316 class revlogoldio(object):
316 class revlogoldio(object):
317 def __init__(self):
317 def __init__(self):
318 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
319
319
320 def parseindex(self, fp, data, inline):
320 def parseindex(self, fp, data, inline):
321 s = self.size
321 s = self.size
322 index = []
322 index = []
323 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
324 n = off = 0
324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) < _prereadsize:
326 data += fp.read() # read the rest
326 data += fp.read() # read the rest
327 l = len(data)
327 l = len(data)
328 while off + s <= l:
328 while off + s <= l:
329 cur = data[off:off + s]
329 cur = data[off:off + s]
330 off += s
330 off += s
331 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
332 # transform to revlogv1 format
332 # transform to revlogv1 format
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 index.append(e2)
335 index.append(e2)
336 nodemap[e[6]] = n
336 nodemap[e[6]] = n
337 n += 1
337 n += 1
338
338
339 return index, nodemap, None
339 return index, nodemap, None
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
344 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
345
345
346 # index ng:
346 # index ng:
347 # 6 bytes offset
347 # 6 bytes offset
348 # 2 bytes flags
348 # 2 bytes flags
349 # 4 bytes compressed length
349 # 4 bytes compressed length
350 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
351 # 4 bytes: base rev
351 # 4 bytes: base rev
352 # 4 bytes link rev
352 # 4 bytes link rev
353 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
354 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
355 # 32 bytes: nodeid
355 # 32 bytes: nodeid
356 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
357 ngshaoffset = 32
357 ngshaoffset = 32
358 versionformat = ">I"
358 versionformat = ">I"
359
359
360 class revlogio(object):
360 class revlogio(object):
361 def __init__(self):
361 def __init__(self):
362 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
363
363
364 def parseindex(self, fp, data, inline):
364 def parseindex(self, fp, data, inline):
365 try:
365 try:
366 size = len(data)
366 size = len(data)
367 if size == _prereadsize:
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
368 size = util.fstat(fp).st_size
369 except AttributeError:
369 except AttributeError:
370 size = 0
370 size = 0
371
371
372 if util.openhardlinks() and not inline and size > _prereadsize:
372 if util.openhardlinks() and not inline and size > _prereadsize:
373 # big index, let's parse it on demand
373 # big index, let's parse it on demand
374 parser = lazyparser(fp, size)
374 parser = lazyparser(fp, size)
375 index = lazyindex(parser)
375 index = lazyindex(parser)
376 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381 return index, nodemap, None
381 return index, nodemap, None
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
473 self._chunkcache = (0, '')
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # alternate implementation, The advantage to this code is it
538 # alternate implementation, The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. But, the results are not
540 # cached, so finding the size of every revision will be slower.
540 # cached, so finding the size of every revision will be slower.
541 """
541 """
542 if self.cache and self.cache[1] == rev:
542 if self.cache and self.cache[1] == rev:
543 return len(self.cache[2])
543 return len(self.cache[2])
544
544
545 base = self.base(rev)
545 base = self.base(rev)
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 base = self.cache[1]
547 base = self.cache[1]
548 text = self.cache[2]
548 text = self.cache[2]
549 else:
549 else:
550 text = self.revision(self.node(base))
550 text = self.revision(self.node(base))
551
551
552 l = len(text)
552 l = len(text)
553 for x in xrange(base + 1, rev + 1):
553 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x))
554 l = mdiff.patchedsize(l, self.chunk(x))
555 return l
555 return l
556 """
556 """
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return a hash of all nodes ancestral to a given node, including
559 """return a hash of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = {}
561 reachable = {}
562 visit = [node]
562 visit = [node]
563 reachable[node] = 1
563 reachable[node] = 1
564 if stop:
564 if stop:
565 stopn = self.rev(stop)
565 stopn = self.rev(stop)
566 else:
566 else:
567 stopn = 0
567 stopn = 0
568 while visit:
568 while visit:
569 n = visit.pop(0)
569 n = visit.pop(0)
570 if n == stop:
570 if n == stop:
571 continue
571 continue
572 if n == nullid:
572 if n == nullid:
573 continue
573 continue
574 for p in self.parents(n):
574 for p in self.parents(n):
575 if self.rev(p) < stopn:
575 if self.rev(p) < stopn:
576 continue
576 continue
577 if p not in reachable:
577 if p not in reachable:
578 reachable[p] = 1
578 reachable[p] = 1
579 visit.append(p)
579 visit.append(p)
580 return reachable
580 return reachable
581
581
582 def ancestors(self, *revs):
582 def ancestors(self, *revs):
583 'Generate the ancestors of revs using a breadth-first visit'
583 'Generate the ancestors of revs using a breadth-first visit'
584 visit = list(revs)
584 visit = list(revs)
585 seen = set([nullrev])
585 seen = set([nullrev])
586 while visit:
586 while visit:
587 for parent in self.parentrevs(visit.pop(0)):
587 for parent in self.parentrevs(visit.pop(0)):
588 if parent not in seen:
588 if parent not in seen:
589 visit.append(parent)
589 visit.append(parent)
590 seen.add(parent)
590 seen.add(parent)
591 yield parent
591 yield parent
592
592
593 def descendants(self, *revs):
593 def descendants(self, *revs):
594 'Generate the descendants of revs in topological order'
594 'Generate the descendants of revs in topological order'
595 seen = set(revs)
595 seen = set(revs)
596 for i in xrange(min(revs) + 1, len(self)):
596 for i in xrange(min(revs) + 1, len(self)):
597 for x in self.parentrevs(i):
597 for x in self.parentrevs(i):
598 if x != nullrev and x in seen:
598 if x != nullrev and x in seen:
599 seen.add(i)
599 seen.add(i)
600 yield i
600 yield i
601 break
601 break
602
602
603 def findmissing(self, common=None, heads=None):
603 def findmissing(self, common=None, heads=None):
604 '''
604 '''
605 returns the topologically sorted list of nodes from the set:
605 returns the topologically sorted list of nodes from the set:
606 missing = (ancestors(heads) \ ancestors(common))
606 missing = (ancestors(heads) \ ancestors(common))
607
607
608 where ancestors() is the set of ancestors from heads, heads included
608 where ancestors() is the set of ancestors from heads, heads included
609
609
610 if heads is None, the heads of the revlog are used
610 if heads is None, the heads of the revlog are used
611 if common is None, nullid is assumed to be a common node
611 if common is None, nullid is assumed to be a common node
612 '''
612 '''
613 if common is None:
613 if common is None:
614 common = [nullid]
614 common = [nullid]
615 if heads is None:
615 if heads is None:
616 heads = self.heads()
616 heads = self.heads()
617
617
618 common = [self.rev(n) for n in common]
618 common = [self.rev(n) for n in common]
619 heads = [self.rev(n) for n in heads]
619 heads = [self.rev(n) for n in heads]
620
620
621 # we want the ancestors, but inclusive
621 # we want the ancestors, but inclusive
622 has = set(self.ancestors(*common))
622 has = set(self.ancestors(*common))
623 has.add(nullrev)
623 has.add(nullrev)
624 has.update(common)
624 has.update(common)
625
625
626 # take all ancestors from heads that aren't in has
626 # take all ancestors from heads that aren't in has
627 missing = {}
627 missing = set()
628 visit = [r for r in heads if r not in has]
628 visit = [r for r in heads if r not in has]
629 while visit:
629 while visit:
630 r = visit.pop(0)
630 r = visit.pop(0)
631 if r in missing:
631 if r in missing:
632 continue
632 continue
633 else:
633 else:
634 missing[r] = None
634 missing.add(r)
635 for p in self.parentrevs(r):
635 for p in self.parentrevs(r):
636 if p not in has:
636 if p not in has:
637 visit.append(p)
637 visit.append(p)
638 missing = missing.keys()
638 missing = list(missing)
639 missing.sort()
639 missing.sort()
640 return [self.node(r) for r in missing]
640 return [self.node(r) for r in missing]
641
641
642 def nodesbetween(self, roots=None, heads=None):
642 def nodesbetween(self, roots=None, heads=None):
643 """Return a tuple containing three elements. Elements 1 and 2 contain
643 """Return a tuple containing three elements. Elements 1 and 2 contain
644 a final list bases and heads after all the unreachable ones have been
644 a final list bases and heads after all the unreachable ones have been
645 pruned. Element 0 contains a topologically sorted list of all
645 pruned. Element 0 contains a topologically sorted list of all
646
646
647 nodes that satisfy these constraints:
647 nodes that satisfy these constraints:
648 1. All nodes must be descended from a node in roots (the nodes on
648 1. All nodes must be descended from a node in roots (the nodes on
649 roots are considered descended from themselves).
649 roots are considered descended from themselves).
650 2. All nodes must also be ancestors of a node in heads (the nodes in
650 2. All nodes must also be ancestors of a node in heads (the nodes in
651 heads are considered to be their own ancestors).
651 heads are considered to be their own ancestors).
652
652
653 If roots is unspecified, nullid is assumed as the only root.
653 If roots is unspecified, nullid is assumed as the only root.
654 If heads is unspecified, it is taken to be the output of the
654 If heads is unspecified, it is taken to be the output of the
655 heads method (i.e. a list of all nodes in the repository that
655 heads method (i.e. a list of all nodes in the repository that
656 have no children)."""
656 have no children)."""
657 nonodes = ([], [], [])
657 nonodes = ([], [], [])
658 if roots is not None:
658 if roots is not None:
659 roots = list(roots)
659 roots = list(roots)
660 if not roots:
660 if not roots:
661 return nonodes
661 return nonodes
662 lowestrev = min([self.rev(n) for n in roots])
662 lowestrev = min([self.rev(n) for n in roots])
663 else:
663 else:
664 roots = [nullid] # Everybody's a descendent of nullid
664 roots = [nullid] # Everybody's a descendent of nullid
665 lowestrev = nullrev
665 lowestrev = nullrev
666 if (lowestrev == nullrev) and (heads is None):
666 if (lowestrev == nullrev) and (heads is None):
667 # We want _all_ the nodes!
667 # We want _all_ the nodes!
668 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 return ([self.node(r) for r in self], [nullid], list(self.heads()))
669 if heads is None:
669 if heads is None:
670 # All nodes are ancestors, so the latest ancestor is the last
670 # All nodes are ancestors, so the latest ancestor is the last
671 # node.
671 # node.
672 highestrev = len(self) - 1
672 highestrev = len(self) - 1
673 # Set ancestors to None to signal that every node is an ancestor.
673 # Set ancestors to None to signal that every node is an ancestor.
674 ancestors = None
674 ancestors = None
675 # Set heads to an empty dictionary for later discovery of heads
675 # Set heads to an empty dictionary for later discovery of heads
676 heads = {}
676 heads = {}
677 else:
677 else:
678 heads = list(heads)
678 heads = list(heads)
679 if not heads:
679 if not heads:
680 return nonodes
680 return nonodes
681 ancestors = {}
681 ancestors = {}
682 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Turn heads into a dictionary so we can remove 'fake' heads.
683 # Also, later we will be using it to filter out the heads we can't
683 # Also, later we will be using it to filter out the heads we can't
684 # find from roots.
684 # find from roots.
685 heads = dict.fromkeys(heads, 0)
685 heads = dict.fromkeys(heads, 0)
686 # Start at the top and keep marking parents until we're done.
686 # Start at the top and keep marking parents until we're done.
687 nodestotag = set(heads)
687 nodestotag = set(heads)
688 # Remember where the top was so we can use it as a limit later.
688 # Remember where the top was so we can use it as a limit later.
689 highestrev = max([self.rev(n) for n in nodestotag])
689 highestrev = max([self.rev(n) for n in nodestotag])
690 while nodestotag:
690 while nodestotag:
691 # grab a node to tag
691 # grab a node to tag
692 n = nodestotag.pop()
692 n = nodestotag.pop()
693 # Never tag nullid
693 # Never tag nullid
694 if n == nullid:
694 if n == nullid:
695 continue
695 continue
696 # A node's revision number represents its place in a
696 # A node's revision number represents its place in a
697 # topologically sorted list of nodes.
697 # topologically sorted list of nodes.
698 r = self.rev(n)
698 r = self.rev(n)
699 if r >= lowestrev:
699 if r >= lowestrev:
700 if n not in ancestors:
700 if n not in ancestors:
701 # If we are possibly a descendent of one of the roots
701 # If we are possibly a descendent of one of the roots
702 # and we haven't already been marked as an ancestor
702 # and we haven't already been marked as an ancestor
703 ancestors[n] = 1 # Mark as ancestor
703 ancestors[n] = 1 # Mark as ancestor
704 # Add non-nullid parents to list of nodes to tag.
704 # Add non-nullid parents to list of nodes to tag.
705 nodestotag.update([p for p in self.parents(n) if
705 nodestotag.update([p for p in self.parents(n) if
706 p != nullid])
706 p != nullid])
707 elif n in heads: # We've seen it before, is it a fake head?
707 elif n in heads: # We've seen it before, is it a fake head?
708 # So it is, real heads should not be the ancestors of
708 # So it is, real heads should not be the ancestors of
709 # any other heads.
709 # any other heads.
710 heads.pop(n)
710 heads.pop(n)
711 if not ancestors:
711 if not ancestors:
712 return nonodes
712 return nonodes
713 # Now that we have our set of ancestors, we want to remove any
713 # Now that we have our set of ancestors, we want to remove any
714 # roots that are not ancestors.
714 # roots that are not ancestors.
715
715
716 # If one of the roots was nullid, everything is included anyway.
716 # If one of the roots was nullid, everything is included anyway.
717 if lowestrev > nullrev:
717 if lowestrev > nullrev:
718 # But, since we weren't, let's recompute the lowest rev to not
718 # But, since we weren't, let's recompute the lowest rev to not
719 # include roots that aren't ancestors.
719 # include roots that aren't ancestors.
720
720
721 # Filter out roots that aren't ancestors of heads
721 # Filter out roots that aren't ancestors of heads
722 roots = [n for n in roots if n in ancestors]
722 roots = [n for n in roots if n in ancestors]
723 # Recompute the lowest revision
723 # Recompute the lowest revision
724 if roots:
724 if roots:
725 lowestrev = min([self.rev(n) for n in roots])
725 lowestrev = min([self.rev(n) for n in roots])
726 else:
726 else:
727 # No more roots? Return empty list
727 # No more roots? Return empty list
728 return nonodes
728 return nonodes
729 else:
729 else:
730 # We are descending from nullid, and don't need to care about
730 # We are descending from nullid, and don't need to care about
731 # any other roots.
731 # any other roots.
732 lowestrev = nullrev
732 lowestrev = nullrev
733 roots = [nullid]
733 roots = [nullid]
734 # Transform our roots list into a set.
734 # Transform our roots list into a set.
735 descendents = set(roots)
735 descendents = set(roots)
736 # Also, keep the original roots so we can filter out roots that aren't
736 # Also, keep the original roots so we can filter out roots that aren't
737 # 'real' roots (i.e. are descended from other roots).
737 # 'real' roots (i.e. are descended from other roots).
738 roots = descendents.copy()
738 roots = descendents.copy()
739 # Our topologically sorted list of output nodes.
739 # Our topologically sorted list of output nodes.
740 orderedout = []
740 orderedout = []
741 # Don't start at nullid since we don't want nullid in our output list,
741 # Don't start at nullid since we don't want nullid in our output list,
742 # and if nullid shows up in descedents, empty parents will look like
742 # and if nullid shows up in descedents, empty parents will look like
743 # they're descendents.
743 # they're descendents.
744 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 for r in xrange(max(lowestrev, 0), highestrev + 1):
745 n = self.node(r)
745 n = self.node(r)
746 isdescendent = False
746 isdescendent = False
747 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 if lowestrev == nullrev: # Everybody is a descendent of nullid
748 isdescendent = True
748 isdescendent = True
749 elif n in descendents:
749 elif n in descendents:
750 # n is already a descendent
750 # n is already a descendent
751 isdescendent = True
751 isdescendent = True
752 # This check only needs to be done here because all the roots
752 # This check only needs to be done here because all the roots
753 # will start being marked is descendents before the loop.
753 # will start being marked is descendents before the loop.
754 if n in roots:
754 if n in roots:
755 # If n was a root, check if it's a 'real' root.
755 # If n was a root, check if it's a 'real' root.
756 p = tuple(self.parents(n))
756 p = tuple(self.parents(n))
757 # If any of its parents are descendents, it's not a root.
757 # If any of its parents are descendents, it's not a root.
758 if (p[0] in descendents) or (p[1] in descendents):
758 if (p[0] in descendents) or (p[1] in descendents):
759 roots.remove(n)
759 roots.remove(n)
760 else:
760 else:
761 p = tuple(self.parents(n))
761 p = tuple(self.parents(n))
762 # A node is a descendent if either of its parents are
762 # A node is a descendent if either of its parents are
763 # descendents. (We seeded the dependents list with the roots
763 # descendents. (We seeded the dependents list with the roots
764 # up there, remember?)
764 # up there, remember?)
765 if (p[0] in descendents) or (p[1] in descendents):
765 if (p[0] in descendents) or (p[1] in descendents):
766 descendents.add(n)
766 descendents.add(n)
767 isdescendent = True
767 isdescendent = True
768 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 if isdescendent and ((ancestors is None) or (n in ancestors)):
769 # Only include nodes that are both descendents and ancestors.
769 # Only include nodes that are both descendents and ancestors.
770 orderedout.append(n)
770 orderedout.append(n)
771 if (ancestors is not None) and (n in heads):
771 if (ancestors is not None) and (n in heads):
772 # We're trying to figure out which heads are reachable
772 # We're trying to figure out which heads are reachable
773 # from roots.
773 # from roots.
774 # Mark this head as having been reached
774 # Mark this head as having been reached
775 heads[n] = 1
775 heads[n] = 1
776 elif ancestors is None:
776 elif ancestors is None:
777 # Otherwise, we're trying to discover the heads.
777 # Otherwise, we're trying to discover the heads.
778 # Assume this is a head because if it isn't, the next step
778 # Assume this is a head because if it isn't, the next step
779 # will eventually remove it.
779 # will eventually remove it.
780 heads[n] = 1
780 heads[n] = 1
781 # But, obviously its parents aren't.
781 # But, obviously its parents aren't.
782 for p in self.parents(n):
782 for p in self.parents(n):
783 heads.pop(p, None)
783 heads.pop(p, None)
784 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 heads = [n for n in heads.iterkeys() if heads[n] != 0]
785 roots = list(roots)
785 roots = list(roots)
786 assert orderedout
786 assert orderedout
787 assert roots
787 assert roots
788 assert heads
788 assert heads
789 return (orderedout, roots, heads)
789 return (orderedout, roots, heads)
790
790
791 def heads(self, start=None, stop=None):
791 def heads(self, start=None, stop=None):
792 """return the list of all nodes that have no children
792 """return the list of all nodes that have no children
793
793
794 if start is specified, only heads that are descendants of
794 if start is specified, only heads that are descendants of
795 start will be returned
795 start will be returned
796 if stop is specified, it will consider all the revs from stop
796 if stop is specified, it will consider all the revs from stop
797 as if they had no children
797 as if they had no children
798 """
798 """
799 if start is None and stop is None:
799 if start is None and stop is None:
800 count = len(self)
800 count = len(self)
801 if not count:
801 if not count:
802 return [nullid]
802 return [nullid]
803 ishead = [1] * (count + 1)
803 ishead = [1] * (count + 1)
804 index = self.index
804 index = self.index
805 for r in xrange(count):
805 for r in xrange(count):
806 e = index[r]
806 e = index[r]
807 ishead[e[5]] = ishead[e[6]] = 0
807 ishead[e[5]] = ishead[e[6]] = 0
808 return [self.node(r) for r in xrange(count) if ishead[r]]
808 return [self.node(r) for r in xrange(count) if ishead[r]]
809
809
810 if start is None:
810 if start is None:
811 start = nullid
811 start = nullid
812 if stop is None:
812 if stop is None:
813 stop = []
813 stop = []
814 stoprevs = set([self.rev(n) for n in stop])
814 stoprevs = set([self.rev(n) for n in stop])
815 startrev = self.rev(start)
815 startrev = self.rev(start)
816 reachable = {startrev: 1}
816 reachable = {startrev: 1}
817 heads = {startrev: 1}
817 heads = {startrev: 1}
818
818
819 parentrevs = self.parentrevs
819 parentrevs = self.parentrevs
820 for r in xrange(startrev + 1, len(self)):
820 for r in xrange(startrev + 1, len(self)):
821 for p in parentrevs(r):
821 for p in parentrevs(r):
822 if p in reachable:
822 if p in reachable:
823 if r not in stoprevs:
823 if r not in stoprevs:
824 reachable[r] = 1
824 reachable[r] = 1
825 heads[r] = 1
825 heads[r] = 1
826 if p in heads and p not in stoprevs:
826 if p in heads and p not in stoprevs:
827 del heads[p]
827 del heads[p]
828
828
829 return [self.node(r) for r in heads]
829 return [self.node(r) for r in heads]
830
830
831 def children(self, node):
831 def children(self, node):
832 """find the children of a given node"""
832 """find the children of a given node"""
833 c = []
833 c = []
834 p = self.rev(node)
834 p = self.rev(node)
835 for r in range(p + 1, len(self)):
835 for r in range(p + 1, len(self)):
836 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
837 if prevs:
837 if prevs:
838 for pr in prevs:
838 for pr in prevs:
839 if pr == p:
839 if pr == p:
840 c.append(self.node(r))
840 c.append(self.node(r))
841 elif p == nullrev:
841 elif p == nullrev:
842 c.append(self.node(r))
842 c.append(self.node(r))
843 return c
843 return c
844
844
845 def _match(self, id):
845 def _match(self, id):
846 if isinstance(id, (long, int)):
846 if isinstance(id, (long, int)):
847 # rev
847 # rev
848 return self.node(id)
848 return self.node(id)
849 if len(id) == 20:
849 if len(id) == 20:
850 # possibly a binary node
850 # possibly a binary node
851 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 # odds of a binary node being all hex in ASCII are 1 in 10**25
852 try:
852 try:
853 node = id
853 node = id
854 self.rev(node) # quick search the index
854 self.rev(node) # quick search the index
855 return node
855 return node
856 except LookupError:
856 except LookupError:
857 pass # may be partial hex id
857 pass # may be partial hex id
858 try:
858 try:
859 # str(rev)
859 # str(rev)
860 rev = int(id)
860 rev = int(id)
861 if str(rev) != id:
861 if str(rev) != id:
862 raise ValueError
862 raise ValueError
863 if rev < 0:
863 if rev < 0:
864 rev = len(self) + rev
864 rev = len(self) + rev
865 if rev < 0 or rev >= len(self):
865 if rev < 0 or rev >= len(self):
866 raise ValueError
866 raise ValueError
867 return self.node(rev)
867 return self.node(rev)
868 except (ValueError, OverflowError):
868 except (ValueError, OverflowError):
869 pass
869 pass
870 if len(id) == 40:
870 if len(id) == 40:
871 try:
871 try:
872 # a full hex nodeid?
872 # a full hex nodeid?
873 node = bin(id)
873 node = bin(id)
874 self.rev(node)
874 self.rev(node)
875 return node
875 return node
876 except (TypeError, LookupError):
876 except (TypeError, LookupError):
877 pass
877 pass
878
878
879 def _partialmatch(self, id):
879 def _partialmatch(self, id):
880 if len(id) < 40:
880 if len(id) < 40:
881 try:
881 try:
882 # hex(node)[:...]
882 # hex(node)[:...]
883 l = len(id) / 2 # grab an even number of digits
883 l = len(id) / 2 # grab an even number of digits
884 bin_id = bin(id[:l*2])
884 bin_id = bin(id[:l*2])
885 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in self.nodemap if n[:l] == bin_id]
886 nl = [n for n in nl if hex(n).startswith(id)]
886 nl = [n for n in nl if hex(n).startswith(id)]
887 if len(nl) > 0:
887 if len(nl) > 0:
888 if len(nl) == 1:
888 if len(nl) == 1:
889 return nl[0]
889 return nl[0]
890 raise LookupError(id, self.indexfile,
890 raise LookupError(id, self.indexfile,
891 _('ambiguous identifier'))
891 _('ambiguous identifier'))
892 return None
892 return None
893 except TypeError:
893 except TypeError:
894 pass
894 pass
895
895
896 def lookup(self, id):
896 def lookup(self, id):
897 """locate a node based on:
897 """locate a node based on:
898 - revision number or str(revision number)
898 - revision number or str(revision number)
899 - nodeid or subset of hex nodeid
899 - nodeid or subset of hex nodeid
900 """
900 """
901 n = self._match(id)
901 n = self._match(id)
902 if n is not None:
902 if n is not None:
903 return n
903 return n
904 n = self._partialmatch(id)
904 n = self._partialmatch(id)
905 if n:
905 if n:
906 return n
906 return n
907
907
908 raise LookupError(id, self.indexfile, _('no match found'))
908 raise LookupError(id, self.indexfile, _('no match found'))
909
909
910 def cmp(self, node, text):
910 def cmp(self, node, text):
911 """compare text with a given file revision"""
911 """compare text with a given file revision"""
912 p1, p2 = self.parents(node)
912 p1, p2 = self.parents(node)
913 return hash(text, p1, p2) != node
913 return hash(text, p1, p2) != node
914
914
915 def _addchunk(self, offset, data):
915 def _addchunk(self, offset, data):
916 o, d = self._chunkcache
916 o, d = self._chunkcache
917 # try to add to existing cache
917 # try to add to existing cache
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
919 self._chunkcache = o, d + data
919 self._chunkcache = o, d + data
920 else:
920 else:
921 self._chunkcache = offset, data
921 self._chunkcache = offset, data
922
922
923 def _loadchunk(self, offset, length, df=None):
923 def _loadchunk(self, offset, length, df=None):
924 if not df:
924 if not df:
925 if self._inline:
925 if self._inline:
926 df = self.opener(self.indexfile)
926 df = self.opener(self.indexfile)
927 else:
927 else:
928 df = self.opener(self.datafile)
928 df = self.opener(self.datafile)
929
929
930 readahead = max(65536, length)
930 readahead = max(65536, length)
931 df.seek(offset)
931 df.seek(offset)
932 d = df.read(readahead)
932 d = df.read(readahead)
933 self._addchunk(offset, d)
933 self._addchunk(offset, d)
934 if readahead > length:
934 if readahead > length:
935 return d[:length]
935 return d[:length]
936 return d
936 return d
937
937
938 def _getchunk(self, offset, length, df=None):
938 def _getchunk(self, offset, length, df=None):
939 o, d = self._chunkcache
939 o, d = self._chunkcache
940 l = len(d)
940 l = len(d)
941
941
942 # is it in the cache?
942 # is it in the cache?
943 cachestart = offset - o
943 cachestart = offset - o
944 cacheend = cachestart + length
944 cacheend = cachestart + length
945 if cachestart >= 0 and cacheend <= l:
945 if cachestart >= 0 and cacheend <= l:
946 if cachestart == 0 and cacheend == l:
946 if cachestart == 0 and cacheend == l:
947 return d # avoid a copy
947 return d # avoid a copy
948 return d[cachestart:cacheend]
948 return d[cachestart:cacheend]
949
949
950 return self._loadchunk(offset, length, df)
950 return self._loadchunk(offset, length, df)
951
951
952 def _prime(self, startrev, endrev, df):
952 def _prime(self, startrev, endrev, df):
953 start = self.start(startrev)
953 start = self.start(startrev)
954 end = self.end(endrev)
954 end = self.end(endrev)
955 if self._inline:
955 if self._inline:
956 start += (startrev + 1) * self._io.size
956 start += (startrev + 1) * self._io.size
957 end += (startrev + 1) * self._io.size
957 end += (startrev + 1) * self._io.size
958 self._loadchunk(start, end - start, df)
958 self._loadchunk(start, end - start, df)
959
959
960 def chunk(self, rev, df=None):
960 def chunk(self, rev, df=None):
961 start, length = self.start(rev), self.length(rev)
961 start, length = self.start(rev), self.length(rev)
962 if self._inline:
962 if self._inline:
963 start += (rev + 1) * self._io.size
963 start += (rev + 1) * self._io.size
964 return decompress(self._getchunk(start, length, df))
964 return decompress(self._getchunk(start, length, df))
965
965
966 def revdiff(self, rev1, rev2):
966 def revdiff(self, rev1, rev2):
967 """return or calculate a delta between two revisions"""
967 """return or calculate a delta between two revisions"""
968 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
969 return self.chunk(rev2)
969 return self.chunk(rev2)
970
970
971 return mdiff.textdiff(self.revision(self.node(rev1)),
971 return mdiff.textdiff(self.revision(self.node(rev1)),
972 self.revision(self.node(rev2)))
972 self.revision(self.node(rev2)))
973
973
974 def revision(self, node):
974 def revision(self, node):
975 """return an uncompressed revision of a given node"""
975 """return an uncompressed revision of a given node"""
976 if node == nullid:
976 if node == nullid:
977 return ""
977 return ""
978 if self._cache and self._cache[0] == node:
978 if self._cache and self._cache[0] == node:
979 return str(self._cache[2])
979 return str(self._cache[2])
980
980
981 # look up what we need to read
981 # look up what we need to read
982 text = None
982 text = None
983 rev = self.rev(node)
983 rev = self.rev(node)
984 base = self.base(rev)
984 base = self.base(rev)
985
985
986 # check rev flags
986 # check rev flags
987 if self.index[rev][0] & 0xFFFF:
987 if self.index[rev][0] & 0xFFFF:
988 raise RevlogError(_('incompatible revision flag %x') %
988 raise RevlogError(_('incompatible revision flag %x') %
989 (self.index[rev][0] & 0xFFFF))
989 (self.index[rev][0] & 0xFFFF))
990
990
991 df = None
991 df = None
992
992
993 # do we have useful data cached?
993 # do we have useful data cached?
994 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
995 base = self._cache[1]
995 base = self._cache[1]
996 text = str(self._cache[2])
996 text = str(self._cache[2])
997 self._loadindex(base, rev + 1)
997 self._loadindex(base, rev + 1)
998 if not self._inline and rev > base + 1:
998 if not self._inline and rev > base + 1:
999 df = self.opener(self.datafile)
999 df = self.opener(self.datafile)
1000 self._prime(base, rev, df)
1000 self._prime(base, rev, df)
1001 else:
1001 else:
1002 self._loadindex(base, rev + 1)
1002 self._loadindex(base, rev + 1)
1003 if not self._inline and rev > base:
1003 if not self._inline and rev > base:
1004 df = self.opener(self.datafile)
1004 df = self.opener(self.datafile)
1005 self._prime(base, rev, df)
1005 self._prime(base, rev, df)
1006 text = self.chunk(base, df=df)
1006 text = self.chunk(base, df=df)
1007
1007
1008 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1009 text = mdiff.patches(text, bins)
1009 text = mdiff.patches(text, bins)
1010 p1, p2 = self.parents(node)
1010 p1, p2 = self.parents(node)
1011 if node != hash(text, p1, p2):
1011 if node != hash(text, p1, p2):
1012 raise RevlogError(_("integrity check failed on %s:%d")
1012 raise RevlogError(_("integrity check failed on %s:%d")
1013 % (self.datafile, rev))
1013 % (self.datafile, rev))
1014
1014
1015 self._cache = (node, rev, text)
1015 self._cache = (node, rev, text)
1016 return text
1016 return text
1017
1017
1018 def checkinlinesize(self, tr, fp=None):
1018 def checkinlinesize(self, tr, fp=None):
1019 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1020 return
1020 return
1021
1021
1022 trinfo = tr.find(self.indexfile)
1022 trinfo = tr.find(self.indexfile)
1023 if trinfo == None:
1023 if trinfo == None:
1024 raise RevlogError(_("%s not found in the transaction")
1024 raise RevlogError(_("%s not found in the transaction")
1025 % self.indexfile)
1025 % self.indexfile)
1026
1026
1027 trindex = trinfo[2]
1027 trindex = trinfo[2]
1028 dataoff = self.start(trindex)
1028 dataoff = self.start(trindex)
1029
1029
1030 tr.add(self.datafile, dataoff)
1030 tr.add(self.datafile, dataoff)
1031
1031
1032 if fp:
1032 if fp:
1033 fp.flush()
1033 fp.flush()
1034 fp.close()
1034 fp.close()
1035
1035
1036 df = self.opener(self.datafile, 'w')
1036 df = self.opener(self.datafile, 'w')
1037 try:
1037 try:
1038 calc = self._io.size
1038 calc = self._io.size
1039 for r in self:
1039 for r in self:
1040 start = self.start(r) + (r + 1) * calc
1040 start = self.start(r) + (r + 1) * calc
1041 length = self.length(r)
1041 length = self.length(r)
1042 d = self._getchunk(start, length)
1042 d = self._getchunk(start, length)
1043 df.write(d)
1043 df.write(d)
1044 finally:
1044 finally:
1045 df.close()
1045 df.close()
1046
1046
1047 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1048 self.version &= ~(REVLOGNGINLINEDATA)
1048 self.version &= ~(REVLOGNGINLINEDATA)
1049 self._inline = False
1049 self._inline = False
1050 for i in self:
1050 for i in self:
1051 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 e = self._io.packentry(self.index[i], self.node, self.version, i)
1052 fp.write(e)
1052 fp.write(e)
1053
1053
1054 # if we don't call rename, the temp file will never replace the
1054 # if we don't call rename, the temp file will never replace the
1055 # real index
1055 # real index
1056 fp.rename()
1056 fp.rename()
1057
1057
1058 tr.replace(self.indexfile, trindex * calc)
1058 tr.replace(self.indexfile, trindex * calc)
1059 self._chunkcache = (0, '')
1059 self._chunkcache = (0, '')
1060
1060
1061 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 def addrevision(self, text, transaction, link, p1, p2, d=None):
1062 """add a revision to the log
1062 """add a revision to the log
1063
1063
1064 text - the revision data to add
1064 text - the revision data to add
1065 transaction - the transaction object used for rollback
1065 transaction - the transaction object used for rollback
1066 link - the linkrev data to add
1066 link - the linkrev data to add
1067 p1, p2 - the parent nodeids of the revision
1067 p1, p2 - the parent nodeids of the revision
1068 d - an optional precomputed delta
1068 d - an optional precomputed delta
1069 """
1069 """
1070 dfh = None
1070 dfh = None
1071 if not self._inline:
1071 if not self._inline:
1072 dfh = self.opener(self.datafile, "a")
1072 dfh = self.opener(self.datafile, "a")
1073 ifh = self.opener(self.indexfile, "a+")
1073 ifh = self.opener(self.indexfile, "a+")
1074 try:
1074 try:
1075 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1076 finally:
1076 finally:
1077 if dfh:
1077 if dfh:
1078 dfh.close()
1078 dfh.close()
1079 ifh.close()
1079 ifh.close()
1080
1080
1081 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1082 node = hash(text, p1, p2)
1082 node = hash(text, p1, p2)
1083 if node in self.nodemap:
1083 if node in self.nodemap:
1084 return node
1084 return node
1085
1085
1086 curr = len(self)
1086 curr = len(self)
1087 prev = curr - 1
1087 prev = curr - 1
1088 base = self.base(prev)
1088 base = self.base(prev)
1089 offset = self.end(prev)
1089 offset = self.end(prev)
1090
1090
1091 if curr:
1091 if curr:
1092 if not d:
1092 if not d:
1093 ptext = self.revision(self.node(prev))
1093 ptext = self.revision(self.node(prev))
1094 d = mdiff.textdiff(ptext, text)
1094 d = mdiff.textdiff(ptext, text)
1095 data = compress(d)
1095 data = compress(d)
1096 l = len(data[1]) + len(data[0])
1096 l = len(data[1]) + len(data[0])
1097 dist = l + offset - self.start(base)
1097 dist = l + offset - self.start(base)
1098
1098
1099 # full versions are inserted when the needed deltas
1099 # full versions are inserted when the needed deltas
1100 # become comparable to the uncompressed text
1100 # become comparable to the uncompressed text
1101 if not curr or dist > len(text) * 2:
1101 if not curr or dist > len(text) * 2:
1102 data = compress(text)
1102 data = compress(text)
1103 l = len(data[1]) + len(data[0])
1103 l = len(data[1]) + len(data[0])
1104 base = curr
1104 base = curr
1105
1105
1106 e = (offset_type(offset, 0), l, len(text),
1106 e = (offset_type(offset, 0), l, len(text),
1107 base, link, self.rev(p1), self.rev(p2), node)
1107 base, link, self.rev(p1), self.rev(p2), node)
1108 self.index.insert(-1, e)
1108 self.index.insert(-1, e)
1109 self.nodemap[node] = curr
1109 self.nodemap[node] = curr
1110
1110
1111 entry = self._io.packentry(e, self.node, self.version, curr)
1111 entry = self._io.packentry(e, self.node, self.version, curr)
1112 if not self._inline:
1112 if not self._inline:
1113 transaction.add(self.datafile, offset)
1113 transaction.add(self.datafile, offset)
1114 transaction.add(self.indexfile, curr * len(entry))
1114 transaction.add(self.indexfile, curr * len(entry))
1115 if data[0]:
1115 if data[0]:
1116 dfh.write(data[0])
1116 dfh.write(data[0])
1117 dfh.write(data[1])
1117 dfh.write(data[1])
1118 dfh.flush()
1118 dfh.flush()
1119 ifh.write(entry)
1119 ifh.write(entry)
1120 else:
1120 else:
1121 offset += curr * self._io.size
1121 offset += curr * self._io.size
1122 transaction.add(self.indexfile, offset, curr)
1122 transaction.add(self.indexfile, offset, curr)
1123 ifh.write(entry)
1123 ifh.write(entry)
1124 ifh.write(data[0])
1124 ifh.write(data[0])
1125 ifh.write(data[1])
1125 ifh.write(data[1])
1126 self.checkinlinesize(transaction, ifh)
1126 self.checkinlinesize(transaction, ifh)
1127
1127
1128 self._cache = (node, curr, text)
1128 self._cache = (node, curr, text)
1129 return node
1129 return node
1130
1130
1131 def ancestor(self, a, b):
1131 def ancestor(self, a, b):
1132 """calculate the least common ancestor of nodes a and b"""
1132 """calculate the least common ancestor of nodes a and b"""
1133
1133
1134 def parents(rev):
1134 def parents(rev):
1135 return [p for p in self.parentrevs(rev) if p != nullrev]
1135 return [p for p in self.parentrevs(rev) if p != nullrev]
1136
1136
1137 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1138 if c is None:
1138 if c is None:
1139 return nullid
1139 return nullid
1140
1140
1141 return self.node(c)
1141 return self.node(c)
1142
1142
1143 def group(self, nodelist, lookup, infocollect=None):
1143 def group(self, nodelist, lookup, infocollect=None):
1144 """calculate a delta group
1144 """calculate a delta group
1145
1145
1146 Given a list of changeset revs, return a set of deltas and
1146 Given a list of changeset revs, return a set of deltas and
1147 metadata corresponding to nodes. the first delta is
1147 metadata corresponding to nodes. the first delta is
1148 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1149 have this parent as it has all history before these
1149 have this parent as it has all history before these
1150 changesets. parent is parent[0]
1150 changesets. parent is parent[0]
1151 """
1151 """
1152
1152
1153 # if we don't have any revisions touched by these changesets, bail
1153 # if we don't have any revisions touched by these changesets, bail
1154 if not nodelist:
1154 if not nodelist:
1155 yield changegroup.closechunk()
1155 yield changegroup.closechunk()
1156 return
1156 return
1157
1157
1158 revs = [self.rev(n) for n in nodelist]
1158 revs = [self.rev(n) for n in nodelist]
1159
1159
1160 # add the parent of the first rev
1160 # add the parent of the first rev
1161 p = self.parentrevs(revs[0])[0]
1161 p = self.parentrevs(revs[0])[0]
1162 revs.insert(0, p)
1162 revs.insert(0, p)
1163
1163
1164 # build deltas
1164 # build deltas
1165 for d in xrange(0, len(revs) - 1):
1165 for d in xrange(0, len(revs) - 1):
1166 a, b = revs[d], revs[d + 1]
1166 a, b = revs[d], revs[d + 1]
1167 nb = self.node(b)
1167 nb = self.node(b)
1168
1168
1169 if infocollect is not None:
1169 if infocollect is not None:
1170 infocollect(nb)
1170 infocollect(nb)
1171
1171
1172 p = self.parents(nb)
1172 p = self.parents(nb)
1173 meta = nb + p[0] + p[1] + lookup(nb)
1173 meta = nb + p[0] + p[1] + lookup(nb)
1174 if a == -1:
1174 if a == -1:
1175 d = self.revision(nb)
1175 d = self.revision(nb)
1176 meta += mdiff.trivialdiffheader(len(d))
1176 meta += mdiff.trivialdiffheader(len(d))
1177 else:
1177 else:
1178 d = self.revdiff(a, b)
1178 d = self.revdiff(a, b)
1179 yield changegroup.chunkheader(len(meta) + len(d))
1179 yield changegroup.chunkheader(len(meta) + len(d))
1180 yield meta
1180 yield meta
1181 if len(d) > 2**20:
1181 if len(d) > 2**20:
1182 pos = 0
1182 pos = 0
1183 while pos < len(d):
1183 while pos < len(d):
1184 pos2 = pos + 2 ** 18
1184 pos2 = pos + 2 ** 18
1185 yield d[pos:pos2]
1185 yield d[pos:pos2]
1186 pos = pos2
1186 pos = pos2
1187 else:
1187 else:
1188 yield d
1188 yield d
1189
1189
1190 yield changegroup.closechunk()
1190 yield changegroup.closechunk()
1191
1191
1192 def addgroup(self, revs, linkmapper, transaction):
1192 def addgroup(self, revs, linkmapper, transaction):
1193 """
1193 """
1194 add a delta group
1194 add a delta group
1195
1195
1196 given a set of deltas, add them to the revision log. the
1196 given a set of deltas, add them to the revision log. the
1197 first delta is against its parent, which should be in our
1197 first delta is against its parent, which should be in our
1198 log, the rest are against the previous delta.
1198 log, the rest are against the previous delta.
1199 """
1199 """
1200
1200
1201 #track the base of the current delta log
1201 #track the base of the current delta log
1202 r = len(self)
1202 r = len(self)
1203 t = r - 1
1203 t = r - 1
1204 node = None
1204 node = None
1205
1205
1206 base = prev = nullrev
1206 base = prev = nullrev
1207 start = end = textlen = 0
1207 start = end = textlen = 0
1208 if r:
1208 if r:
1209 end = self.end(t)
1209 end = self.end(t)
1210
1210
1211 ifh = self.opener(self.indexfile, "a+")
1211 ifh = self.opener(self.indexfile, "a+")
1212 isize = r * self._io.size
1212 isize = r * self._io.size
1213 if self._inline:
1213 if self._inline:
1214 transaction.add(self.indexfile, end + isize, r)
1214 transaction.add(self.indexfile, end + isize, r)
1215 dfh = None
1215 dfh = None
1216 else:
1216 else:
1217 transaction.add(self.indexfile, isize, r)
1217 transaction.add(self.indexfile, isize, r)
1218 transaction.add(self.datafile, end)
1218 transaction.add(self.datafile, end)
1219 dfh = self.opener(self.datafile, "a")
1219 dfh = self.opener(self.datafile, "a")
1220
1220
1221 try:
1221 try:
1222 # loop through our set of deltas
1222 # loop through our set of deltas
1223 chain = None
1223 chain = None
1224 for chunk in revs:
1224 for chunk in revs:
1225 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1225 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1226 link = linkmapper(cs)
1226 link = linkmapper(cs)
1227 if node in self.nodemap:
1227 if node in self.nodemap:
1228 # this can happen if two branches make the same change
1228 # this can happen if two branches make the same change
1229 chain = node
1229 chain = node
1230 continue
1230 continue
1231 delta = buffer(chunk, 80)
1231 delta = buffer(chunk, 80)
1232 del chunk
1232 del chunk
1233
1233
1234 for p in (p1, p2):
1234 for p in (p1, p2):
1235 if not p in self.nodemap:
1235 if not p in self.nodemap:
1236 raise LookupError(p, self.indexfile, _('unknown parent'))
1236 raise LookupError(p, self.indexfile, _('unknown parent'))
1237
1237
1238 if not chain:
1238 if not chain:
1239 # retrieve the parent revision of the delta chain
1239 # retrieve the parent revision of the delta chain
1240 chain = p1
1240 chain = p1
1241 if not chain in self.nodemap:
1241 if not chain in self.nodemap:
1242 raise LookupError(chain, self.indexfile, _('unknown base'))
1242 raise LookupError(chain, self.indexfile, _('unknown base'))
1243
1243
1244 # full versions are inserted when the needed deltas become
1244 # full versions are inserted when the needed deltas become
1245 # comparable to the uncompressed text or when the previous
1245 # comparable to the uncompressed text or when the previous
1246 # version is not the one we have a delta against. We use
1246 # version is not the one we have a delta against. We use
1247 # the size of the previous full rev as a proxy for the
1247 # the size of the previous full rev as a proxy for the
1248 # current size.
1248 # current size.
1249
1249
1250 if chain == prev:
1250 if chain == prev:
1251 cdelta = compress(delta)
1251 cdelta = compress(delta)
1252 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1252 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1253 textlen = mdiff.patchedsize(textlen, delta)
1253 textlen = mdiff.patchedsize(textlen, delta)
1254
1254
1255 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1255 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1256 # flush our writes here so we can read it in revision
1256 # flush our writes here so we can read it in revision
1257 if dfh:
1257 if dfh:
1258 dfh.flush()
1258 dfh.flush()
1259 ifh.flush()
1259 ifh.flush()
1260 text = self.revision(chain)
1260 text = self.revision(chain)
1261 if len(text) == 0:
1261 if len(text) == 0:
1262 # skip over trivial delta header
1262 # skip over trivial delta header
1263 text = buffer(delta, 12)
1263 text = buffer(delta, 12)
1264 else:
1264 else:
1265 text = mdiff.patches(text, [delta])
1265 text = mdiff.patches(text, [delta])
1266 del delta
1266 del delta
1267 chk = self._addrevision(text, transaction, link, p1, p2, None,
1267 chk = self._addrevision(text, transaction, link, p1, p2, None,
1268 ifh, dfh)
1268 ifh, dfh)
1269 if not dfh and not self._inline:
1269 if not dfh and not self._inline:
1270 # addrevision switched from inline to conventional
1270 # addrevision switched from inline to conventional
1271 # reopen the index
1271 # reopen the index
1272 dfh = self.opener(self.datafile, "a")
1272 dfh = self.opener(self.datafile, "a")
1273 ifh = self.opener(self.indexfile, "a")
1273 ifh = self.opener(self.indexfile, "a")
1274 if chk != node:
1274 if chk != node:
1275 raise RevlogError(_("consistency error adding group"))
1275 raise RevlogError(_("consistency error adding group"))
1276 textlen = len(text)
1276 textlen = len(text)
1277 else:
1277 else:
1278 e = (offset_type(end, 0), cdeltalen, textlen, base,
1278 e = (offset_type(end, 0), cdeltalen, textlen, base,
1279 link, self.rev(p1), self.rev(p2), node)
1279 link, self.rev(p1), self.rev(p2), node)
1280 self.index.insert(-1, e)
1280 self.index.insert(-1, e)
1281 self.nodemap[node] = r
1281 self.nodemap[node] = r
1282 entry = self._io.packentry(e, self.node, self.version, r)
1282 entry = self._io.packentry(e, self.node, self.version, r)
1283 if self._inline:
1283 if self._inline:
1284 ifh.write(entry)
1284 ifh.write(entry)
1285 ifh.write(cdelta[0])
1285 ifh.write(cdelta[0])
1286 ifh.write(cdelta[1])
1286 ifh.write(cdelta[1])
1287 self.checkinlinesize(transaction, ifh)
1287 self.checkinlinesize(transaction, ifh)
1288 if not self._inline:
1288 if not self._inline:
1289 dfh = self.opener(self.datafile, "a")
1289 dfh = self.opener(self.datafile, "a")
1290 ifh = self.opener(self.indexfile, "a")
1290 ifh = self.opener(self.indexfile, "a")
1291 else:
1291 else:
1292 dfh.write(cdelta[0])
1292 dfh.write(cdelta[0])
1293 dfh.write(cdelta[1])
1293 dfh.write(cdelta[1])
1294 ifh.write(entry)
1294 ifh.write(entry)
1295
1295
1296 t, r, chain, prev = r, r + 1, node, node
1296 t, r, chain, prev = r, r + 1, node, node
1297 base = self.base(t)
1297 base = self.base(t)
1298 start = self.start(base)
1298 start = self.start(base)
1299 end = self.end(t)
1299 end = self.end(t)
1300 finally:
1300 finally:
1301 if dfh:
1301 if dfh:
1302 dfh.close()
1302 dfh.close()
1303 ifh.close()
1303 ifh.close()
1304
1304
1305 return node
1305 return node
1306
1306
1307 def strip(self, minlink, transaction):
1307 def strip(self, minlink, transaction):
1308 """truncate the revlog on the first revision with a linkrev >= minlink
1308 """truncate the revlog on the first revision with a linkrev >= minlink
1309
1309
1310 This function is called when we're stripping revision minlink and
1310 This function is called when we're stripping revision minlink and
1311 its descendants from the repository.
1311 its descendants from the repository.
1312
1312
1313 We have to remove all revisions with linkrev >= minlink, because
1313 We have to remove all revisions with linkrev >= minlink, because
1314 the equivalent changelog revisions will be renumbered after the
1314 the equivalent changelog revisions will be renumbered after the
1315 strip.
1315 strip.
1316
1316
1317 So we truncate the revlog on the first of these revisions, and
1317 So we truncate the revlog on the first of these revisions, and
1318 trust that the caller has saved the revisions that shouldn't be
1318 trust that the caller has saved the revisions that shouldn't be
1319 removed and that it'll readd them after this truncation.
1319 removed and that it'll readd them after this truncation.
1320 """
1320 """
1321 if len(self) == 0:
1321 if len(self) == 0:
1322 return
1322 return
1323
1323
1324 if isinstance(self.index, lazyindex):
1324 if isinstance(self.index, lazyindex):
1325 self._loadindexmap()
1325 self._loadindexmap()
1326
1326
1327 for rev in self:
1327 for rev in self:
1328 if self.index[rev][4] >= minlink:
1328 if self.index[rev][4] >= minlink:
1329 break
1329 break
1330 else:
1330 else:
1331 return
1331 return
1332
1332
1333 # first truncate the files on disk
1333 # first truncate the files on disk
1334 end = self.start(rev)
1334 end = self.start(rev)
1335 if not self._inline:
1335 if not self._inline:
1336 transaction.add(self.datafile, end)
1336 transaction.add(self.datafile, end)
1337 end = rev * self._io.size
1337 end = rev * self._io.size
1338 else:
1338 else:
1339 end += rev * self._io.size
1339 end += rev * self._io.size
1340
1340
1341 transaction.add(self.indexfile, end)
1341 transaction.add(self.indexfile, end)
1342
1342
1343 # then reset internal state in memory to forget those revisions
1343 # then reset internal state in memory to forget those revisions
1344 self._cache = None
1344 self._cache = None
1345 self._chunkcache = (0, '')
1345 self._chunkcache = (0, '')
1346 for x in xrange(rev, len(self)):
1346 for x in xrange(rev, len(self)):
1347 del self.nodemap[self.node(x)]
1347 del self.nodemap[self.node(x)]
1348
1348
1349 del self.index[rev:-1]
1349 del self.index[rev:-1]
1350
1350
1351 def checksize(self):
1351 def checksize(self):
1352 expected = 0
1352 expected = 0
1353 if len(self):
1353 if len(self):
1354 expected = max(0, self.end(len(self) - 1))
1354 expected = max(0, self.end(len(self) - 1))
1355
1355
1356 try:
1356 try:
1357 f = self.opener(self.datafile)
1357 f = self.opener(self.datafile)
1358 f.seek(0, 2)
1358 f.seek(0, 2)
1359 actual = f.tell()
1359 actual = f.tell()
1360 dd = actual - expected
1360 dd = actual - expected
1361 except IOError, inst:
1361 except IOError, inst:
1362 if inst.errno != errno.ENOENT:
1362 if inst.errno != errno.ENOENT:
1363 raise
1363 raise
1364 dd = 0
1364 dd = 0
1365
1365
1366 try:
1366 try:
1367 f = self.opener(self.indexfile)
1367 f = self.opener(self.indexfile)
1368 f.seek(0, 2)
1368 f.seek(0, 2)
1369 actual = f.tell()
1369 actual = f.tell()
1370 s = self._io.size
1370 s = self._io.size
1371 i = max(0, actual / s)
1371 i = max(0, actual / s)
1372 di = actual - (i * s)
1372 di = actual - (i * s)
1373 if self._inline:
1373 if self._inline:
1374 databytes = 0
1374 databytes = 0
1375 for r in self:
1375 for r in self:
1376 databytes += max(0, self.length(r))
1376 databytes += max(0, self.length(r))
1377 dd = 0
1377 dd = 0
1378 di = actual - len(self) * s - databytes
1378 di = actual - len(self) * s - databytes
1379 except IOError, inst:
1379 except IOError, inst:
1380 if inst.errno != errno.ENOENT:
1380 if inst.errno != errno.ENOENT:
1381 raise
1381 raise
1382 di = 0
1382 di = 0
1383
1383
1384 return (dd, di)
1384 return (dd, di)
1385
1385
1386 def files(self):
1386 def files(self):
1387 res = [ self.indexfile ]
1387 res = [ self.indexfile ]
1388 if not self._inline:
1388 if not self._inline:
1389 res.append(self.datafile)
1389 res.append(self.datafile)
1390 return res
1390 return res
General Comments 0
You need to be logged in to leave comments. Login now