##// END OF EJS Templates
revlog: add a fast path for checking ancestry
Dirkjan Ochtman -
r10325:bc72e21f default
parent child Browse files
Show More
@@ -1,1409 +1,1414 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf):
122 def __init__(self, dataf):
123 try:
123 try:
124 size = util.fstat(dataf).st_size
124 size = util.fstat(dataf).st_size
125 except AttributeError:
125 except AttributeError:
126 size = 0
126 size = 0
127 self.dataf = dataf
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
129 self.datasize = size
130 self.l = size / self.s
130 self.l = size / self.s
131 self.index = [None] * self.l
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
132 self.map = {nullid: nullrev}
133 self.allmap = 0
133 self.allmap = 0
134 self.all = 0
134 self.all = 0
135 self.mapfind_count = 0
135 self.mapfind_count = 0
136
136
137 def loadmap(self):
137 def loadmap(self):
138 """
138 """
139 during a commit, we need to make sure the rev being added is
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
142 which takes much less time.
143 """
143 """
144 if self.allmap:
144 if self.allmap:
145 return
145 return
146 end = self.datasize
146 end = self.datasize
147 self.allmap = 1
147 self.allmap = 1
148 cur = 0
148 cur = 0
149 count = 0
149 count = 0
150 blocksize = self.s * 256
150 blocksize = self.s * 256
151 self.dataf.seek(0)
151 self.dataf.seek(0)
152 while cur < end:
152 while cur < end:
153 data = self.dataf.read(blocksize)
153 data = self.dataf.read(blocksize)
154 off = 0
154 off = 0
155 for x in xrange(256):
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
157 self.map[n] = count
158 count += 1
158 count += 1
159 if count >= self.l:
159 if count >= self.l:
160 break
160 break
161 off += self.s
161 off += self.s
162 cur += blocksize
162 cur += blocksize
163
163
164 def loadblock(self, blockstart, blocksize, data=None):
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
165 if self.all:
166 return
166 return
167 if data is None:
167 if data is None:
168 self.dataf.seek(blockstart)
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
175 lend = len(data) / self.s
176 i = blockstart / self.s
176 i = blockstart / self.s
177 off = 0
177 off = 0
178 # lazyindex supports __delitem__
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
180 lend = len(self.index) - i
181 for x in xrange(lend):
181 for x in xrange(lend):
182 if self.index[i + x] is None:
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
183 b = data[off : off + self.s]
184 self.index[i + x] = b
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
186 self.map[n] = i + x
187 off += self.s
187 off += self.s
188
188
189 def findnode(self, node):
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
191 if self.allmap:
192 return None
192 return None
193
193
194 # hg log will cause many many searches for the manifest
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
195 # nodes. After we get called a few times, just load the whole
196 # thing.
196 # thing.
197 if self.mapfind_count > 8:
197 if self.mapfind_count > 8:
198 self.loadmap()
198 self.loadmap()
199 if node in self.map:
199 if node in self.map:
200 return node
200 return node
201 return None
201 return None
202 self.mapfind_count += 1
202 self.mapfind_count += 1
203 last = self.l - 1
203 last = self.l - 1
204 while self.index[last] != None:
204 while self.index[last] != None:
205 if last == 0:
205 if last == 0:
206 self.all = 1
206 self.all = 1
207 self.allmap = 1
207 self.allmap = 1
208 return None
208 return None
209 last -= 1
209 last -= 1
210 end = (last + 1) * self.s
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
211 blocksize = self.s * 256
212 while end >= 0:
212 while end >= 0:
213 start = max(end - blocksize, 0)
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
215 data = self.dataf.read(end - start)
216 findend = end - start
216 findend = end - start
217 while True:
217 while True:
218 # we're searching backwards, so we have to make sure
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
220 off = data.find(node, 0, findend)
221 findend = off
221 findend = off
222 if off >= 0:
222 if off >= 0:
223 i = off / self.s
223 i = off / self.s
224 off = i * self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
226 if n == node:
227 self.map[n] = i + start / self.s
227 self.map[n] = i + start / self.s
228 return node
228 return node
229 else:
229 else:
230 break
230 break
231 end -= blocksize
231 end -= blocksize
232 return None
232 return None
233
233
234 def loadindex(self, i=None, end=None):
234 def loadindex(self, i=None, end=None):
235 if self.all:
235 if self.all:
236 return
236 return
237 all = False
237 all = False
238 if i is None:
238 if i is None:
239 blockstart = 0
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
241 end = self.datasize
242 all = True
242 all = True
243 else:
243 else:
244 if end:
244 if end:
245 blockstart = i * self.s
245 blockstart = i * self.s
246 end = end * self.s
246 end = end * self.s
247 blocksize = end - blockstart
247 blocksize = end - blockstart
248 else:
248 else:
249 blockstart = (i & ~1023) * self.s
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
251 end = blockstart + blocksize
252 while blockstart < end:
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
254 blockstart += blocksize
255 if all:
255 if all:
256 self.all = True
256 self.all = True
257
257
258 class lazyindex(object):
258 class lazyindex(object):
259 """a lazy version of the index array"""
259 """a lazy version of the index array"""
260 def __init__(self, parser):
260 def __init__(self, parser):
261 self.p = parser
261 self.p = parser
262 def __len__(self):
262 def __len__(self):
263 return len(self.p.index)
263 return len(self.p.index)
264 def load(self, pos):
264 def load(self, pos):
265 if pos < 0:
265 if pos < 0:
266 pos += len(self.p.index)
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
268 return self.p.index[pos]
269 def __getitem__(self, pos):
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
273 def __delitem__(self, pos):
274 del self.p.index[pos]
274 del self.p.index[pos]
275 def insert(self, pos, e):
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
278 self.p.index.append(_pack(indexformatng, *e))
279
279
280 class lazymap(object):
280 class lazymap(object):
281 """a lazy version of the node map"""
281 """a lazy version of the node map"""
282 def __init__(self, parser):
282 def __init__(self, parser):
283 self.p = parser
283 self.p = parser
284 def load(self, key):
284 def load(self, key):
285 n = self.p.findnode(key)
285 n = self.p.findnode(key)
286 if n is None:
286 if n is None:
287 raise KeyError(key)
287 raise KeyError(key)
288 def __contains__(self, key):
288 def __contains__(self, key):
289 if key in self.p.map:
289 if key in self.p.map:
290 return True
290 return True
291 self.p.loadmap()
291 self.p.loadmap()
292 return key in self.p.map
292 return key in self.p.map
293 def __iter__(self):
293 def __iter__(self):
294 yield nullid
294 yield nullid
295 for i in xrange(self.p.l):
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
296 ret = self.p.index[i]
297 if not ret:
297 if not ret:
298 self.p.loadindex(i)
298 self.p.loadindex(i)
299 ret = self.p.index[i]
299 ret = self.p.index[i]
300 if isinstance(ret, str):
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
302 yield ret[7]
303 def __getitem__(self, key):
303 def __getitem__(self, key):
304 try:
304 try:
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 try:
307 try:
308 self.load(key)
308 self.load(key)
309 return self.p.map[key]
309 return self.p.map[key]
310 except KeyError:
310 except KeyError:
311 raise KeyError("node " + hex(key))
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
313 self.p.map[key] = val
314 def __delitem__(self, key):
314 def __delitem__(self, key):
315 del self.p.map[key]
315 del self.p.map[key]
316
316
317 indexformatv0 = ">4l20s20s20s"
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
318 v0shaoffset = 56
319
319
320 class revlogoldio(object):
320 class revlogoldio(object):
321 def __init__(self):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
322 self.size = struct.calcsize(indexformatv0)
323
323
324 def parseindex(self, fp, data, inline):
324 def parseindex(self, fp, data, inline):
325 s = self.size
325 s = self.size
326 index = []
326 index = []
327 nodemap = {nullid: nullrev}
327 nodemap = {nullid: nullrev}
328 n = off = 0
328 n = off = 0
329 if len(data) == _prereadsize:
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
330 data += fp.read() # read the rest
331 l = len(data)
331 l = len(data)
332 while off + s <= l:
332 while off + s <= l:
333 cur = data[off:off + s]
333 cur = data[off:off + s]
334 off += s
334 off += s
335 e = _unpack(indexformatv0, cur)
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
339 index.append(e2)
340 nodemap[e[6]] = n
340 nodemap[e[6]] = n
341 n += 1
341 n += 1
342
342
343 return index, nodemap, None
343 return index, nodemap, None
344
344
345 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
347 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
348 return _pack(indexformatv0, *e2)
349
349
350 # index ng:
350 # index ng:
351 # 6 bytes offset
351 # 6 bytes offset
352 # 2 bytes flags
352 # 2 bytes flags
353 # 4 bytes compressed length
353 # 4 bytes compressed length
354 # 4 bytes uncompressed length
354 # 4 bytes uncompressed length
355 # 4 bytes: base rev
355 # 4 bytes: base rev
356 # 4 bytes link rev
356 # 4 bytes link rev
357 # 4 bytes parent 1 rev
357 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
358 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
359 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
360 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
361 ngshaoffset = 32
362 versionformat = ">I"
362 versionformat = ">I"
363
363
364 class revlogio(object):
364 class revlogio(object):
365 def __init__(self):
365 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
366 self.size = struct.calcsize(indexformatng)
367
367
368 def parseindex(self, fp, data, inline):
368 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
369 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
370 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
371 # big index, let's parse it on demand
372 parser = lazyparser(fp)
372 parser = lazyparser(fp)
373 index = lazyindex(parser)
373 index = lazyindex(parser)
374 nodemap = lazymap(parser)
374 nodemap = lazymap(parser)
375 e = list(index[0])
375 e = list(index[0])
376 type = gettype(e[0])
376 type = gettype(e[0])
377 e[0] = offset_type(0, type)
377 e[0] = offset_type(0, type)
378 index[0] = e
378 index[0] = e
379 return index, nodemap, None
379 return index, nodemap, None
380 else:
380 else:
381 data += fp.read()
381 data += fp.read()
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, 'options') and 'defversion' in opener.options:
434 if hasattr(opener, 'options') and 'defversion' in opener.options:
435 v = opener.options['defversion']
435 v = opener.options['defversion']
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError):
469 except (ValueError, IndexError):
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkclear()
473 self._chunkclear()
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # Alternate implementation. The advantage to this code is it
538 # Alternate implementation. The advantage to this code is it
539 # will be faster for a single revision. However, the results
539 # will be faster for a single revision. However, the results
540 # are not cached, so finding the size of every revision will
540 # are not cached, so finding the size of every revision will
541 # be slower.
541 # be slower.
542 #
542 #
543 # if self.cache and self.cache[1] == rev:
543 # if self.cache and self.cache[1] == rev:
544 # return len(self.cache[2])
544 # return len(self.cache[2])
545 #
545 #
546 # base = self.base(rev)
546 # base = self.base(rev)
547 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
548 # base = self.cache[1]
548 # base = self.cache[1]
549 # text = self.cache[2]
549 # text = self.cache[2]
550 # else:
550 # else:
551 # text = self.revision(self.node(base))
551 # text = self.revision(self.node(base))
552 #
552 #
553 # l = len(text)
553 # l = len(text)
554 # for x in xrange(base + 1, rev + 1):
554 # for x in xrange(base + 1, rev + 1):
555 # l = mdiff.patchedsize(l, self._chunk(x))
555 # l = mdiff.patchedsize(l, self._chunk(x))
556 # return l
556 # return l
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
559 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
561 reachable = set((node,))
562 visit = [node]
562 visit = [node]
563 if stop:
563 if stop:
564 stopn = self.rev(stop)
564 stopn = self.rev(stop)
565 else:
565 else:
566 stopn = 0
566 stopn = 0
567 while visit:
567 while visit:
568 n = visit.pop(0)
568 n = visit.pop(0)
569 if n == stop:
569 if n == stop:
570 continue
570 continue
571 if n == nullid:
571 if n == nullid:
572 continue
572 continue
573 for p in self.parents(n):
573 for p in self.parents(n):
574 if self.rev(p) < stopn:
574 if self.rev(p) < stopn:
575 continue
575 continue
576 if p not in reachable:
576 if p not in reachable:
577 reachable.add(p)
577 reachable.add(p)
578 visit.append(p)
578 visit.append(p)
579 return reachable
579 return reachable
580
580
581 def ancestors(self, *revs):
581 def ancestors(self, *revs):
582 """Generate the ancestors of 'revs' in reverse topological order.
582 """Generate the ancestors of 'revs' in reverse topological order.
583
583
584 Yield a sequence of revision numbers starting with the parents
584 Yield a sequence of revision numbers starting with the parents
585 of each revision in revs, i.e., each revision is *not* considered
585 of each revision in revs, i.e., each revision is *not* considered
586 an ancestor of itself. Results are in breadth-first order:
586 an ancestor of itself. Results are in breadth-first order:
587 parents of each rev in revs, then parents of those, etc. Result
587 parents of each rev in revs, then parents of those, etc. Result
588 does not include the null revision."""
588 does not include the null revision."""
589 visit = list(revs)
589 visit = list(revs)
590 seen = set([nullrev])
590 seen = set([nullrev])
591 while visit:
591 while visit:
592 for parent in self.parentrevs(visit.pop(0)):
592 for parent in self.parentrevs(visit.pop(0)):
593 if parent not in seen:
593 if parent not in seen:
594 visit.append(parent)
594 visit.append(parent)
595 seen.add(parent)
595 seen.add(parent)
596 yield parent
596 yield parent
597
597
598 def descendants(self, *revs):
598 def descendants(self, *revs):
599 """Generate the descendants of 'revs' in revision order.
599 """Generate the descendants of 'revs' in revision order.
600
600
601 Yield a sequence of revision numbers starting with a child of
601 Yield a sequence of revision numbers starting with a child of
602 some rev in revs, i.e., each revision is *not* considered a
602 some rev in revs, i.e., each revision is *not* considered a
603 descendant of itself. Results are ordered by revision number (a
603 descendant of itself. Results are ordered by revision number (a
604 topological sort)."""
604 topological sort)."""
605 seen = set(revs)
605 seen = set(revs)
606 for i in xrange(min(revs) + 1, len(self)):
606 for i in xrange(min(revs) + 1, len(self)):
607 for x in self.parentrevs(i):
607 for x in self.parentrevs(i):
608 if x != nullrev and x in seen:
608 if x != nullrev and x in seen:
609 seen.add(i)
609 seen.add(i)
610 yield i
610 yield i
611 break
611 break
612
612
613 def findmissing(self, common=None, heads=None):
613 def findmissing(self, common=None, heads=None):
614 """Return the ancestors of heads that are not ancestors of common.
614 """Return the ancestors of heads that are not ancestors of common.
615
615
616 More specifically, return a list of nodes N such that every N
616 More specifically, return a list of nodes N such that every N
617 satisfies the following constraints:
617 satisfies the following constraints:
618
618
619 1. N is an ancestor of some node in 'heads'
619 1. N is an ancestor of some node in 'heads'
620 2. N is not an ancestor of any node in 'common'
620 2. N is not an ancestor of any node in 'common'
621
621
622 The list is sorted by revision number, meaning it is
622 The list is sorted by revision number, meaning it is
623 topologically sorted.
623 topologically sorted.
624
624
625 'heads' and 'common' are both lists of node IDs. If heads is
625 'heads' and 'common' are both lists of node IDs. If heads is
626 not supplied, uses all of the revlog's heads. If common is not
626 not supplied, uses all of the revlog's heads. If common is not
627 supplied, uses nullid."""
627 supplied, uses nullid."""
628 if common is None:
628 if common is None:
629 common = [nullid]
629 common = [nullid]
630 if heads is None:
630 if heads is None:
631 heads = self.heads()
631 heads = self.heads()
632
632
633 common = [self.rev(n) for n in common]
633 common = [self.rev(n) for n in common]
634 heads = [self.rev(n) for n in heads]
634 heads = [self.rev(n) for n in heads]
635
635
636 # we want the ancestors, but inclusive
636 # we want the ancestors, but inclusive
637 has = set(self.ancestors(*common))
637 has = set(self.ancestors(*common))
638 has.add(nullrev)
638 has.add(nullrev)
639 has.update(common)
639 has.update(common)
640
640
641 # take all ancestors from heads that aren't in has
641 # take all ancestors from heads that aren't in has
642 missing = set()
642 missing = set()
643 visit = [r for r in heads if r not in has]
643 visit = [r for r in heads if r not in has]
644 while visit:
644 while visit:
645 r = visit.pop(0)
645 r = visit.pop(0)
646 if r in missing:
646 if r in missing:
647 continue
647 continue
648 else:
648 else:
649 missing.add(r)
649 missing.add(r)
650 for p in self.parentrevs(r):
650 for p in self.parentrevs(r):
651 if p not in has:
651 if p not in has:
652 visit.append(p)
652 visit.append(p)
653 missing = list(missing)
653 missing = list(missing)
654 missing.sort()
654 missing.sort()
655 return [self.node(r) for r in missing]
655 return [self.node(r) for r in missing]
656
656
657 def nodesbetween(self, roots=None, heads=None):
657 def nodesbetween(self, roots=None, heads=None):
658 """Return a topological path from 'roots' to 'heads'.
658 """Return a topological path from 'roots' to 'heads'.
659
659
660 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
660 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
661 topologically sorted list of all nodes N that satisfy both of
661 topologically sorted list of all nodes N that satisfy both of
662 these constraints:
662 these constraints:
663
663
664 1. N is a descendant of some node in 'roots'
664 1. N is a descendant of some node in 'roots'
665 2. N is an ancestor of some node in 'heads'
665 2. N is an ancestor of some node in 'heads'
666
666
667 Every node is considered to be both a descendant and an ancestor
667 Every node is considered to be both a descendant and an ancestor
668 of itself, so every reachable node in 'roots' and 'heads' will be
668 of itself, so every reachable node in 'roots' and 'heads' will be
669 included in 'nodes'.
669 included in 'nodes'.
670
670
671 'outroots' is the list of reachable nodes in 'roots', i.e., the
671 'outroots' is the list of reachable nodes in 'roots', i.e., the
672 subset of 'roots' that is returned in 'nodes'. Likewise,
672 subset of 'roots' that is returned in 'nodes'. Likewise,
673 'outheads' is the subset of 'heads' that is also in 'nodes'.
673 'outheads' is the subset of 'heads' that is also in 'nodes'.
674
674
675 'roots' and 'heads' are both lists of node IDs. If 'roots' is
675 'roots' and 'heads' are both lists of node IDs. If 'roots' is
676 unspecified, uses nullid as the only root. If 'heads' is
676 unspecified, uses nullid as the only root. If 'heads' is
677 unspecified, uses list of all of the revlog's heads."""
677 unspecified, uses list of all of the revlog's heads."""
678 nonodes = ([], [], [])
678 nonodes = ([], [], [])
679 if roots is not None:
679 if roots is not None:
680 roots = list(roots)
680 roots = list(roots)
681 if not roots:
681 if not roots:
682 return nonodes
682 return nonodes
683 lowestrev = min([self.rev(n) for n in roots])
683 lowestrev = min([self.rev(n) for n in roots])
684 else:
684 else:
685 roots = [nullid] # Everybody's a descendent of nullid
685 roots = [nullid] # Everybody's a descendent of nullid
686 lowestrev = nullrev
686 lowestrev = nullrev
687 if (lowestrev == nullrev) and (heads is None):
687 if (lowestrev == nullrev) and (heads is None):
688 # We want _all_ the nodes!
688 # We want _all_ the nodes!
689 return ([self.node(r) for r in self], [nullid], list(self.heads()))
689 return ([self.node(r) for r in self], [nullid], list(self.heads()))
690 if heads is None:
690 if heads is None:
691 # All nodes are ancestors, so the latest ancestor is the last
691 # All nodes are ancestors, so the latest ancestor is the last
692 # node.
692 # node.
693 highestrev = len(self) - 1
693 highestrev = len(self) - 1
694 # Set ancestors to None to signal that every node is an ancestor.
694 # Set ancestors to None to signal that every node is an ancestor.
695 ancestors = None
695 ancestors = None
696 # Set heads to an empty dictionary for later discovery of heads
696 # Set heads to an empty dictionary for later discovery of heads
697 heads = {}
697 heads = {}
698 else:
698 else:
699 heads = list(heads)
699 heads = list(heads)
700 if not heads:
700 if not heads:
701 return nonodes
701 return nonodes
702 ancestors = set()
702 ancestors = set()
703 # Turn heads into a dictionary so we can remove 'fake' heads.
703 # Turn heads into a dictionary so we can remove 'fake' heads.
704 # Also, later we will be using it to filter out the heads we can't
704 # Also, later we will be using it to filter out the heads we can't
705 # find from roots.
705 # find from roots.
706 heads = dict.fromkeys(heads, 0)
706 heads = dict.fromkeys(heads, 0)
707 # Start at the top and keep marking parents until we're done.
707 # Start at the top and keep marking parents until we're done.
708 nodestotag = set(heads)
708 nodestotag = set(heads)
709 # Remember where the top was so we can use it as a limit later.
709 # Remember where the top was so we can use it as a limit later.
710 highestrev = max([self.rev(n) for n in nodestotag])
710 highestrev = max([self.rev(n) for n in nodestotag])
711 while nodestotag:
711 while nodestotag:
712 # grab a node to tag
712 # grab a node to tag
713 n = nodestotag.pop()
713 n = nodestotag.pop()
714 # Never tag nullid
714 # Never tag nullid
715 if n == nullid:
715 if n == nullid:
716 continue
716 continue
717 # A node's revision number represents its place in a
717 # A node's revision number represents its place in a
718 # topologically sorted list of nodes.
718 # topologically sorted list of nodes.
719 r = self.rev(n)
719 r = self.rev(n)
720 if r >= lowestrev:
720 if r >= lowestrev:
721 if n not in ancestors:
721 if n not in ancestors:
722 # If we are possibly a descendent of one of the roots
722 # If we are possibly a descendent of one of the roots
723 # and we haven't already been marked as an ancestor
723 # and we haven't already been marked as an ancestor
724 ancestors.add(n) # Mark as ancestor
724 ancestors.add(n) # Mark as ancestor
725 # Add non-nullid parents to list of nodes to tag.
725 # Add non-nullid parents to list of nodes to tag.
726 nodestotag.update([p for p in self.parents(n) if
726 nodestotag.update([p for p in self.parents(n) if
727 p != nullid])
727 p != nullid])
728 elif n in heads: # We've seen it before, is it a fake head?
728 elif n in heads: # We've seen it before, is it a fake head?
729 # So it is, real heads should not be the ancestors of
729 # So it is, real heads should not be the ancestors of
730 # any other heads.
730 # any other heads.
731 heads.pop(n)
731 heads.pop(n)
732 if not ancestors:
732 if not ancestors:
733 return nonodes
733 return nonodes
734 # Now that we have our set of ancestors, we want to remove any
734 # Now that we have our set of ancestors, we want to remove any
735 # roots that are not ancestors.
735 # roots that are not ancestors.
736
736
737 # If one of the roots was nullid, everything is included anyway.
737 # If one of the roots was nullid, everything is included anyway.
738 if lowestrev > nullrev:
738 if lowestrev > nullrev:
739 # But, since we weren't, let's recompute the lowest rev to not
739 # But, since we weren't, let's recompute the lowest rev to not
740 # include roots that aren't ancestors.
740 # include roots that aren't ancestors.
741
741
742 # Filter out roots that aren't ancestors of heads
742 # Filter out roots that aren't ancestors of heads
743 roots = [n for n in roots if n in ancestors]
743 roots = [n for n in roots if n in ancestors]
744 # Recompute the lowest revision
744 # Recompute the lowest revision
745 if roots:
745 if roots:
746 lowestrev = min([self.rev(n) for n in roots])
746 lowestrev = min([self.rev(n) for n in roots])
747 else:
747 else:
748 # No more roots? Return empty list
748 # No more roots? Return empty list
749 return nonodes
749 return nonodes
750 else:
750 else:
751 # We are descending from nullid, and don't need to care about
751 # We are descending from nullid, and don't need to care about
752 # any other roots.
752 # any other roots.
753 lowestrev = nullrev
753 lowestrev = nullrev
754 roots = [nullid]
754 roots = [nullid]
755 # Transform our roots list into a set.
755 # Transform our roots list into a set.
756 descendents = set(roots)
756 descendents = set(roots)
757 # Also, keep the original roots so we can filter out roots that aren't
757 # Also, keep the original roots so we can filter out roots that aren't
758 # 'real' roots (i.e. are descended from other roots).
758 # 'real' roots (i.e. are descended from other roots).
759 roots = descendents.copy()
759 roots = descendents.copy()
760 # Our topologically sorted list of output nodes.
760 # Our topologically sorted list of output nodes.
761 orderedout = []
761 orderedout = []
762 # Don't start at nullid since we don't want nullid in our output list,
762 # Don't start at nullid since we don't want nullid in our output list,
763 # and if nullid shows up in descedents, empty parents will look like
763 # and if nullid shows up in descedents, empty parents will look like
764 # they're descendents.
764 # they're descendents.
765 for r in xrange(max(lowestrev, 0), highestrev + 1):
765 for r in xrange(max(lowestrev, 0), highestrev + 1):
766 n = self.node(r)
766 n = self.node(r)
767 isdescendent = False
767 isdescendent = False
768 if lowestrev == nullrev: # Everybody is a descendent of nullid
768 if lowestrev == nullrev: # Everybody is a descendent of nullid
769 isdescendent = True
769 isdescendent = True
770 elif n in descendents:
770 elif n in descendents:
771 # n is already a descendent
771 # n is already a descendent
772 isdescendent = True
772 isdescendent = True
773 # This check only needs to be done here because all the roots
773 # This check only needs to be done here because all the roots
774 # will start being marked is descendents before the loop.
774 # will start being marked is descendents before the loop.
775 if n in roots:
775 if n in roots:
776 # If n was a root, check if it's a 'real' root.
776 # If n was a root, check if it's a 'real' root.
777 p = tuple(self.parents(n))
777 p = tuple(self.parents(n))
778 # If any of its parents are descendents, it's not a root.
778 # If any of its parents are descendents, it's not a root.
779 if (p[0] in descendents) or (p[1] in descendents):
779 if (p[0] in descendents) or (p[1] in descendents):
780 roots.remove(n)
780 roots.remove(n)
781 else:
781 else:
782 p = tuple(self.parents(n))
782 p = tuple(self.parents(n))
783 # A node is a descendent if either of its parents are
783 # A node is a descendent if either of its parents are
784 # descendents. (We seeded the dependents list with the roots
784 # descendents. (We seeded the dependents list with the roots
785 # up there, remember?)
785 # up there, remember?)
786 if (p[0] in descendents) or (p[1] in descendents):
786 if (p[0] in descendents) or (p[1] in descendents):
787 descendents.add(n)
787 descendents.add(n)
788 isdescendent = True
788 isdescendent = True
789 if isdescendent and ((ancestors is None) or (n in ancestors)):
789 if isdescendent and ((ancestors is None) or (n in ancestors)):
790 # Only include nodes that are both descendents and ancestors.
790 # Only include nodes that are both descendents and ancestors.
791 orderedout.append(n)
791 orderedout.append(n)
792 if (ancestors is not None) and (n in heads):
792 if (ancestors is not None) and (n in heads):
793 # We're trying to figure out which heads are reachable
793 # We're trying to figure out which heads are reachable
794 # from roots.
794 # from roots.
795 # Mark this head as having been reached
795 # Mark this head as having been reached
796 heads[n] = 1
796 heads[n] = 1
797 elif ancestors is None:
797 elif ancestors is None:
798 # Otherwise, we're trying to discover the heads.
798 # Otherwise, we're trying to discover the heads.
799 # Assume this is a head because if it isn't, the next step
799 # Assume this is a head because if it isn't, the next step
800 # will eventually remove it.
800 # will eventually remove it.
801 heads[n] = 1
801 heads[n] = 1
802 # But, obviously its parents aren't.
802 # But, obviously its parents aren't.
803 for p in self.parents(n):
803 for p in self.parents(n):
804 heads.pop(p, None)
804 heads.pop(p, None)
805 heads = [n for n in heads.iterkeys() if heads[n] != 0]
805 heads = [n for n in heads.iterkeys() if heads[n] != 0]
806 roots = list(roots)
806 roots = list(roots)
807 assert orderedout
807 assert orderedout
808 assert roots
808 assert roots
809 assert heads
809 assert heads
810 return (orderedout, roots, heads)
810 return (orderedout, roots, heads)
811
811
812 def heads(self, start=None, stop=None):
812 def heads(self, start=None, stop=None):
813 """return the list of all nodes that have no children
813 """return the list of all nodes that have no children
814
814
815 if start is specified, only heads that are descendants of
815 if start is specified, only heads that are descendants of
816 start will be returned
816 start will be returned
817 if stop is specified, it will consider all the revs from stop
817 if stop is specified, it will consider all the revs from stop
818 as if they had no children
818 as if they had no children
819 """
819 """
820 if start is None and stop is None:
820 if start is None and stop is None:
821 count = len(self)
821 count = len(self)
822 if not count:
822 if not count:
823 return [nullid]
823 return [nullid]
824 ishead = [1] * (count + 1)
824 ishead = [1] * (count + 1)
825 index = self.index
825 index = self.index
826 for r in xrange(count):
826 for r in xrange(count):
827 e = index[r]
827 e = index[r]
828 ishead[e[5]] = ishead[e[6]] = 0
828 ishead[e[5]] = ishead[e[6]] = 0
829 return [self.node(r) for r in xrange(count) if ishead[r]]
829 return [self.node(r) for r in xrange(count) if ishead[r]]
830
830
831 if start is None:
831 if start is None:
832 start = nullid
832 start = nullid
833 if stop is None:
833 if stop is None:
834 stop = []
834 stop = []
835 stoprevs = set([self.rev(n) for n in stop])
835 stoprevs = set([self.rev(n) for n in stop])
836 startrev = self.rev(start)
836 startrev = self.rev(start)
837 reachable = set((startrev,))
837 reachable = set((startrev,))
838 heads = set((startrev,))
838 heads = set((startrev,))
839
839
840 parentrevs = self.parentrevs
840 parentrevs = self.parentrevs
841 for r in xrange(startrev + 1, len(self)):
841 for r in xrange(startrev + 1, len(self)):
842 for p in parentrevs(r):
842 for p in parentrevs(r):
843 if p in reachable:
843 if p in reachable:
844 if r not in stoprevs:
844 if r not in stoprevs:
845 reachable.add(r)
845 reachable.add(r)
846 heads.add(r)
846 heads.add(r)
847 if p in heads and p not in stoprevs:
847 if p in heads and p not in stoprevs:
848 heads.remove(p)
848 heads.remove(p)
849
849
850 return [self.node(r) for r in heads]
850 return [self.node(r) for r in heads]
851
851
852 def children(self, node):
852 def children(self, node):
853 """find the children of a given node"""
853 """find the children of a given node"""
854 c = []
854 c = []
855 p = self.rev(node)
855 p = self.rev(node)
856 for r in range(p + 1, len(self)):
856 for r in range(p + 1, len(self)):
857 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
857 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
858 if prevs:
858 if prevs:
859 for pr in prevs:
859 for pr in prevs:
860 if pr == p:
860 if pr == p:
861 c.append(self.node(r))
861 c.append(self.node(r))
862 elif p == nullrev:
862 elif p == nullrev:
863 c.append(self.node(r))
863 c.append(self.node(r))
864 return c
864 return c
865
865
866 def _match(self, id):
866 def _match(self, id):
867 if isinstance(id, (long, int)):
867 if isinstance(id, (long, int)):
868 # rev
868 # rev
869 return self.node(id)
869 return self.node(id)
870 if len(id) == 20:
870 if len(id) == 20:
871 # possibly a binary node
871 # possibly a binary node
872 # odds of a binary node being all hex in ASCII are 1 in 10**25
872 # odds of a binary node being all hex in ASCII are 1 in 10**25
873 try:
873 try:
874 node = id
874 node = id
875 self.rev(node) # quick search the index
875 self.rev(node) # quick search the index
876 return node
876 return node
877 except LookupError:
877 except LookupError:
878 pass # may be partial hex id
878 pass # may be partial hex id
879 try:
879 try:
880 # str(rev)
880 # str(rev)
881 rev = int(id)
881 rev = int(id)
882 if str(rev) != id:
882 if str(rev) != id:
883 raise ValueError
883 raise ValueError
884 if rev < 0:
884 if rev < 0:
885 rev = len(self) + rev
885 rev = len(self) + rev
886 if rev < 0 or rev >= len(self):
886 if rev < 0 or rev >= len(self):
887 raise ValueError
887 raise ValueError
888 return self.node(rev)
888 return self.node(rev)
889 except (ValueError, OverflowError):
889 except (ValueError, OverflowError):
890 pass
890 pass
891 if len(id) == 40:
891 if len(id) == 40:
892 try:
892 try:
893 # a full hex nodeid?
893 # a full hex nodeid?
894 node = bin(id)
894 node = bin(id)
895 self.rev(node)
895 self.rev(node)
896 return node
896 return node
897 except (TypeError, LookupError):
897 except (TypeError, LookupError):
898 pass
898 pass
899
899
900 def _partialmatch(self, id):
900 def _partialmatch(self, id):
901 if len(id) < 40:
901 if len(id) < 40:
902 try:
902 try:
903 # hex(node)[:...]
903 # hex(node)[:...]
904 l = len(id) // 2 # grab an even number of digits
904 l = len(id) // 2 # grab an even number of digits
905 bin_id = bin(id[:l * 2])
905 bin_id = bin(id[:l * 2])
906 nl = [n for n in self.nodemap if n[:l] == bin_id]
906 nl = [n for n in self.nodemap if n[:l] == bin_id]
907 nl = [n for n in nl if hex(n).startswith(id)]
907 nl = [n for n in nl if hex(n).startswith(id)]
908 if len(nl) > 0:
908 if len(nl) > 0:
909 if len(nl) == 1:
909 if len(nl) == 1:
910 return nl[0]
910 return nl[0]
911 raise LookupError(id, self.indexfile,
911 raise LookupError(id, self.indexfile,
912 _('ambiguous identifier'))
912 _('ambiguous identifier'))
913 return None
913 return None
914 except TypeError:
914 except TypeError:
915 pass
915 pass
916
916
917 def lookup(self, id):
917 def lookup(self, id):
918 """locate a node based on:
918 """locate a node based on:
919 - revision number or str(revision number)
919 - revision number or str(revision number)
920 - nodeid or subset of hex nodeid
920 - nodeid or subset of hex nodeid
921 """
921 """
922 n = self._match(id)
922 n = self._match(id)
923 if n is not None:
923 if n is not None:
924 return n
924 return n
925 n = self._partialmatch(id)
925 n = self._partialmatch(id)
926 if n:
926 if n:
927 return n
927 return n
928
928
929 raise LookupError(id, self.indexfile, _('no match found'))
929 raise LookupError(id, self.indexfile, _('no match found'))
930
930
931 def cmp(self, node, text):
931 def cmp(self, node, text):
932 """compare text with a given file revision"""
932 """compare text with a given file revision"""
933 p1, p2 = self.parents(node)
933 p1, p2 = self.parents(node)
934 return hash(text, p1, p2) != node
934 return hash(text, p1, p2) != node
935
935
936 def _addchunk(self, offset, data):
936 def _addchunk(self, offset, data):
937 o, d = self._chunkcache
937 o, d = self._chunkcache
938 # try to add to existing cache
938 # try to add to existing cache
939 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
939 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
940 self._chunkcache = o, d + data
940 self._chunkcache = o, d + data
941 else:
941 else:
942 self._chunkcache = offset, data
942 self._chunkcache = offset, data
943
943
944 def _loadchunk(self, offset, length):
944 def _loadchunk(self, offset, length):
945 if self._inline:
945 if self._inline:
946 df = self.opener(self.indexfile)
946 df = self.opener(self.indexfile)
947 else:
947 else:
948 df = self.opener(self.datafile)
948 df = self.opener(self.datafile)
949
949
950 readahead = max(65536, length)
950 readahead = max(65536, length)
951 df.seek(offset)
951 df.seek(offset)
952 d = df.read(readahead)
952 d = df.read(readahead)
953 self._addchunk(offset, d)
953 self._addchunk(offset, d)
954 if readahead > length:
954 if readahead > length:
955 return d[:length]
955 return d[:length]
956 return d
956 return d
957
957
958 def _getchunk(self, offset, length):
958 def _getchunk(self, offset, length):
959 o, d = self._chunkcache
959 o, d = self._chunkcache
960 l = len(d)
960 l = len(d)
961
961
962 # is it in the cache?
962 # is it in the cache?
963 cachestart = offset - o
963 cachestart = offset - o
964 cacheend = cachestart + length
964 cacheend = cachestart + length
965 if cachestart >= 0 and cacheend <= l:
965 if cachestart >= 0 and cacheend <= l:
966 if cachestart == 0 and cacheend == l:
966 if cachestart == 0 and cacheend == l:
967 return d # avoid a copy
967 return d # avoid a copy
968 return d[cachestart:cacheend]
968 return d[cachestart:cacheend]
969
969
970 return self._loadchunk(offset, length)
970 return self._loadchunk(offset, length)
971
971
972 def _chunkraw(self, startrev, endrev):
972 def _chunkraw(self, startrev, endrev):
973 start = self.start(startrev)
973 start = self.start(startrev)
974 length = self.end(endrev) - start
974 length = self.end(endrev) - start
975 if self._inline:
975 if self._inline:
976 start += (startrev + 1) * self._io.size
976 start += (startrev + 1) * self._io.size
977 return self._getchunk(start, length)
977 return self._getchunk(start, length)
978
978
979 def _chunk(self, rev):
979 def _chunk(self, rev):
980 return decompress(self._chunkraw(rev, rev))
980 return decompress(self._chunkraw(rev, rev))
981
981
982 def _chunkclear(self):
982 def _chunkclear(self):
983 self._chunkcache = (0, '')
983 self._chunkcache = (0, '')
984
984
985 def revdiff(self, rev1, rev2):
985 def revdiff(self, rev1, rev2):
986 """return or calculate a delta between two revisions"""
986 """return or calculate a delta between two revisions"""
987 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
987 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
988 return self._chunk(rev2)
988 return self._chunk(rev2)
989
989
990 return mdiff.textdiff(self.revision(self.node(rev1)),
990 return mdiff.textdiff(self.revision(self.node(rev1)),
991 self.revision(self.node(rev2)))
991 self.revision(self.node(rev2)))
992
992
993 def revision(self, node):
993 def revision(self, node):
994 """return an uncompressed revision of a given node"""
994 """return an uncompressed revision of a given node"""
995 if node == nullid:
995 if node == nullid:
996 return ""
996 return ""
997 if self._cache and self._cache[0] == node:
997 if self._cache and self._cache[0] == node:
998 return self._cache[2]
998 return self._cache[2]
999
999
1000 # look up what we need to read
1000 # look up what we need to read
1001 text = None
1001 text = None
1002 rev = self.rev(node)
1002 rev = self.rev(node)
1003 base = self.base(rev)
1003 base = self.base(rev)
1004
1004
1005 # check rev flags
1005 # check rev flags
1006 if self.index[rev][0] & 0xFFFF:
1006 if self.index[rev][0] & 0xFFFF:
1007 raise RevlogError(_('incompatible revision flag %x') %
1007 raise RevlogError(_('incompatible revision flag %x') %
1008 (self.index[rev][0] & 0xFFFF))
1008 (self.index[rev][0] & 0xFFFF))
1009
1009
1010 # do we have useful data cached?
1010 # do we have useful data cached?
1011 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1011 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1012 base = self._cache[1]
1012 base = self._cache[1]
1013 text = self._cache[2]
1013 text = self._cache[2]
1014
1014
1015 self._loadindex(base, rev + 1)
1015 self._loadindex(base, rev + 1)
1016 self._chunkraw(base, rev)
1016 self._chunkraw(base, rev)
1017 if text is None:
1017 if text is None:
1018 text = self._chunk(base)
1018 text = self._chunk(base)
1019
1019
1020 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1020 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1021 text = mdiff.patches(text, bins)
1021 text = mdiff.patches(text, bins)
1022 p1, p2 = self.parents(node)
1022 p1, p2 = self.parents(node)
1023 if node != hash(text, p1, p2):
1023 if node != hash(text, p1, p2):
1024 raise RevlogError(_("integrity check failed on %s:%d")
1024 raise RevlogError(_("integrity check failed on %s:%d")
1025 % (self.indexfile, rev))
1025 % (self.indexfile, rev))
1026
1026
1027 self._cache = (node, rev, text)
1027 self._cache = (node, rev, text)
1028 return text
1028 return text
1029
1029
1030 def checkinlinesize(self, tr, fp=None):
1030 def checkinlinesize(self, tr, fp=None):
1031 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1031 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1032 return
1032 return
1033
1033
1034 trinfo = tr.find(self.indexfile)
1034 trinfo = tr.find(self.indexfile)
1035 if trinfo is None:
1035 if trinfo is None:
1036 raise RevlogError(_("%s not found in the transaction")
1036 raise RevlogError(_("%s not found in the transaction")
1037 % self.indexfile)
1037 % self.indexfile)
1038
1038
1039 trindex = trinfo[2]
1039 trindex = trinfo[2]
1040 dataoff = self.start(trindex)
1040 dataoff = self.start(trindex)
1041
1041
1042 tr.add(self.datafile, dataoff)
1042 tr.add(self.datafile, dataoff)
1043
1043
1044 if fp:
1044 if fp:
1045 fp.flush()
1045 fp.flush()
1046 fp.close()
1046 fp.close()
1047
1047
1048 df = self.opener(self.datafile, 'w')
1048 df = self.opener(self.datafile, 'w')
1049 try:
1049 try:
1050 for r in self:
1050 for r in self:
1051 df.write(self._chunkraw(r, r))
1051 df.write(self._chunkraw(r, r))
1052 finally:
1052 finally:
1053 df.close()
1053 df.close()
1054
1054
1055 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1055 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1056 self.version &= ~(REVLOGNGINLINEDATA)
1056 self.version &= ~(REVLOGNGINLINEDATA)
1057 self._inline = False
1057 self._inline = False
1058 for i in self:
1058 for i in self:
1059 e = self._io.packentry(self.index[i], self.node, self.version, i)
1059 e = self._io.packentry(self.index[i], self.node, self.version, i)
1060 fp.write(e)
1060 fp.write(e)
1061
1061
1062 # if we don't call rename, the temp file will never replace the
1062 # if we don't call rename, the temp file will never replace the
1063 # real index
1063 # real index
1064 fp.rename()
1064 fp.rename()
1065
1065
1066 tr.replace(self.indexfile, trindex * self._io.size)
1066 tr.replace(self.indexfile, trindex * self._io.size)
1067 self._chunkclear()
1067 self._chunkclear()
1068
1068
1069 def addrevision(self, text, transaction, link, p1, p2, d=None):
1069 def addrevision(self, text, transaction, link, p1, p2, d=None):
1070 """add a revision to the log
1070 """add a revision to the log
1071
1071
1072 text - the revision data to add
1072 text - the revision data to add
1073 transaction - the transaction object used for rollback
1073 transaction - the transaction object used for rollback
1074 link - the linkrev data to add
1074 link - the linkrev data to add
1075 p1, p2 - the parent nodeids of the revision
1075 p1, p2 - the parent nodeids of the revision
1076 d - an optional precomputed delta
1076 d - an optional precomputed delta
1077 """
1077 """
1078 dfh = None
1078 dfh = None
1079 if not self._inline:
1079 if not self._inline:
1080 dfh = self.opener(self.datafile, "a")
1080 dfh = self.opener(self.datafile, "a")
1081 ifh = self.opener(self.indexfile, "a+")
1081 ifh = self.opener(self.indexfile, "a+")
1082 try:
1082 try:
1083 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1083 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1084 finally:
1084 finally:
1085 if dfh:
1085 if dfh:
1086 dfh.close()
1086 dfh.close()
1087 ifh.close()
1087 ifh.close()
1088
1088
1089 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1089 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1090 node = hash(text, p1, p2)
1090 node = hash(text, p1, p2)
1091 if node in self.nodemap:
1091 if node in self.nodemap:
1092 return node
1092 return node
1093
1093
1094 curr = len(self)
1094 curr = len(self)
1095 prev = curr - 1
1095 prev = curr - 1
1096 base = self.base(prev)
1096 base = self.base(prev)
1097 offset = self.end(prev)
1097 offset = self.end(prev)
1098
1098
1099 if curr:
1099 if curr:
1100 if not d:
1100 if not d:
1101 ptext = self.revision(self.node(prev))
1101 ptext = self.revision(self.node(prev))
1102 d = mdiff.textdiff(ptext, text)
1102 d = mdiff.textdiff(ptext, text)
1103 data = compress(d)
1103 data = compress(d)
1104 l = len(data[1]) + len(data[0])
1104 l = len(data[1]) + len(data[0])
1105 dist = l + offset - self.start(base)
1105 dist = l + offset - self.start(base)
1106
1106
1107 # full versions are inserted when the needed deltas
1107 # full versions are inserted when the needed deltas
1108 # become comparable to the uncompressed text
1108 # become comparable to the uncompressed text
1109 if not curr or dist > len(text) * 2:
1109 if not curr or dist > len(text) * 2:
1110 data = compress(text)
1110 data = compress(text)
1111 l = len(data[1]) + len(data[0])
1111 l = len(data[1]) + len(data[0])
1112 base = curr
1112 base = curr
1113
1113
1114 e = (offset_type(offset, 0), l, len(text),
1114 e = (offset_type(offset, 0), l, len(text),
1115 base, link, self.rev(p1), self.rev(p2), node)
1115 base, link, self.rev(p1), self.rev(p2), node)
1116 self.index.insert(-1, e)
1116 self.index.insert(-1, e)
1117 self.nodemap[node] = curr
1117 self.nodemap[node] = curr
1118
1118
1119 entry = self._io.packentry(e, self.node, self.version, curr)
1119 entry = self._io.packentry(e, self.node, self.version, curr)
1120 if not self._inline:
1120 if not self._inline:
1121 transaction.add(self.datafile, offset)
1121 transaction.add(self.datafile, offset)
1122 transaction.add(self.indexfile, curr * len(entry))
1122 transaction.add(self.indexfile, curr * len(entry))
1123 if data[0]:
1123 if data[0]:
1124 dfh.write(data[0])
1124 dfh.write(data[0])
1125 dfh.write(data[1])
1125 dfh.write(data[1])
1126 dfh.flush()
1126 dfh.flush()
1127 ifh.write(entry)
1127 ifh.write(entry)
1128 else:
1128 else:
1129 offset += curr * self._io.size
1129 offset += curr * self._io.size
1130 transaction.add(self.indexfile, offset, curr)
1130 transaction.add(self.indexfile, offset, curr)
1131 ifh.write(entry)
1131 ifh.write(entry)
1132 ifh.write(data[0])
1132 ifh.write(data[0])
1133 ifh.write(data[1])
1133 ifh.write(data[1])
1134 self.checkinlinesize(transaction, ifh)
1134 self.checkinlinesize(transaction, ifh)
1135
1135
1136 if type(text) == str: # only accept immutable objects
1136 if type(text) == str: # only accept immutable objects
1137 self._cache = (node, curr, text)
1137 self._cache = (node, curr, text)
1138 return node
1138 return node
1139
1139
1140 def descendant(self, a, b):
1141 if a > b:
1142 return False
1143 for i in self.descendants(a):
1144 if i == b:
1145 return True
1146 elif i > b:
1147 break
1148 return False
1149
1140 def ancestor(self, a, b):
1150 def ancestor(self, a, b):
1141 """calculate the least common ancestor of nodes a and b"""
1151 """calculate the least common ancestor of nodes a and b"""
1142
1152
1143 # fast path, check if it is a descendant
1144 a, b = self.rev(a), self.rev(b)
1145 start, end = sorted((a, b))
1153 start, end = sorted((a, b))
1146 for i in self.descendants(start):
1154 if self.descendant(a, b):
1147 if i == end:
1155 return self.node(start)
1148 return self.node(start)
1149 elif i > end:
1150 break
1151
1156
1152 def parents(rev):
1157 def parents(rev):
1153 return [p for p in self.parentrevs(rev) if p != nullrev]
1158 return [p for p in self.parentrevs(rev) if p != nullrev]
1154
1159
1155 c = ancestor.ancestor(a, b, parents)
1160 c = ancestor.ancestor(a, b, parents)
1156 if c is None:
1161 if c is None:
1157 return nullid
1162 return nullid
1158
1163
1159 return self.node(c)
1164 return self.node(c)
1160
1165
1161 def group(self, nodelist, lookup, infocollect=None):
1166 def group(self, nodelist, lookup, infocollect=None):
1162 """Calculate a delta group, yielding a sequence of changegroup chunks
1167 """Calculate a delta group, yielding a sequence of changegroup chunks
1163 (strings).
1168 (strings).
1164
1169
1165 Given a list of changeset revs, return a set of deltas and
1170 Given a list of changeset revs, return a set of deltas and
1166 metadata corresponding to nodes. the first delta is
1171 metadata corresponding to nodes. the first delta is
1167 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1172 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1168 have this parent as it has all history before these
1173 have this parent as it has all history before these
1169 changesets. parent is parent[0]
1174 changesets. parent is parent[0]
1170 """
1175 """
1171
1176
1172 revs = [self.rev(n) for n in nodelist]
1177 revs = [self.rev(n) for n in nodelist]
1173
1178
1174 # if we don't have any revisions touched by these changesets, bail
1179 # if we don't have any revisions touched by these changesets, bail
1175 if not revs:
1180 if not revs:
1176 yield changegroup.closechunk()
1181 yield changegroup.closechunk()
1177 return
1182 return
1178
1183
1179 # add the parent of the first rev
1184 # add the parent of the first rev
1180 p = self.parentrevs(revs[0])[0]
1185 p = self.parentrevs(revs[0])[0]
1181 revs.insert(0, p)
1186 revs.insert(0, p)
1182
1187
1183 # build deltas
1188 # build deltas
1184 for d in xrange(len(revs) - 1):
1189 for d in xrange(len(revs) - 1):
1185 a, b = revs[d], revs[d + 1]
1190 a, b = revs[d], revs[d + 1]
1186 nb = self.node(b)
1191 nb = self.node(b)
1187
1192
1188 if infocollect is not None:
1193 if infocollect is not None:
1189 infocollect(nb)
1194 infocollect(nb)
1190
1195
1191 p = self.parents(nb)
1196 p = self.parents(nb)
1192 meta = nb + p[0] + p[1] + lookup(nb)
1197 meta = nb + p[0] + p[1] + lookup(nb)
1193 if a == -1:
1198 if a == -1:
1194 d = self.revision(nb)
1199 d = self.revision(nb)
1195 meta += mdiff.trivialdiffheader(len(d))
1200 meta += mdiff.trivialdiffheader(len(d))
1196 else:
1201 else:
1197 d = self.revdiff(a, b)
1202 d = self.revdiff(a, b)
1198 yield changegroup.chunkheader(len(meta) + len(d))
1203 yield changegroup.chunkheader(len(meta) + len(d))
1199 yield meta
1204 yield meta
1200 if len(d) > 2**20:
1205 if len(d) > 2**20:
1201 pos = 0
1206 pos = 0
1202 while pos < len(d):
1207 while pos < len(d):
1203 pos2 = pos + 2 ** 18
1208 pos2 = pos + 2 ** 18
1204 yield d[pos:pos2]
1209 yield d[pos:pos2]
1205 pos = pos2
1210 pos = pos2
1206 else:
1211 else:
1207 yield d
1212 yield d
1208
1213
1209 yield changegroup.closechunk()
1214 yield changegroup.closechunk()
1210
1215
1211 def addgroup(self, revs, linkmapper, transaction):
1216 def addgroup(self, revs, linkmapper, transaction):
1212 """
1217 """
1213 add a delta group
1218 add a delta group
1214
1219
1215 given a set of deltas, add them to the revision log. the
1220 given a set of deltas, add them to the revision log. the
1216 first delta is against its parent, which should be in our
1221 first delta is against its parent, which should be in our
1217 log, the rest are against the previous delta.
1222 log, the rest are against the previous delta.
1218 """
1223 """
1219
1224
1220 #track the base of the current delta log
1225 #track the base of the current delta log
1221 r = len(self)
1226 r = len(self)
1222 t = r - 1
1227 t = r - 1
1223 node = None
1228 node = None
1224
1229
1225 base = prev = nullrev
1230 base = prev = nullrev
1226 start = end = textlen = 0
1231 start = end = textlen = 0
1227 if r:
1232 if r:
1228 end = self.end(t)
1233 end = self.end(t)
1229
1234
1230 ifh = self.opener(self.indexfile, "a+")
1235 ifh = self.opener(self.indexfile, "a+")
1231 isize = r * self._io.size
1236 isize = r * self._io.size
1232 if self._inline:
1237 if self._inline:
1233 transaction.add(self.indexfile, end + isize, r)
1238 transaction.add(self.indexfile, end + isize, r)
1234 dfh = None
1239 dfh = None
1235 else:
1240 else:
1236 transaction.add(self.indexfile, isize, r)
1241 transaction.add(self.indexfile, isize, r)
1237 transaction.add(self.datafile, end)
1242 transaction.add(self.datafile, end)
1238 dfh = self.opener(self.datafile, "a")
1243 dfh = self.opener(self.datafile, "a")
1239
1244
1240 try:
1245 try:
1241 # loop through our set of deltas
1246 # loop through our set of deltas
1242 chain = None
1247 chain = None
1243 for chunk in revs:
1248 for chunk in revs:
1244 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1249 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1245 link = linkmapper(cs)
1250 link = linkmapper(cs)
1246 if node in self.nodemap:
1251 if node in self.nodemap:
1247 # this can happen if two branches make the same change
1252 # this can happen if two branches make the same change
1248 chain = node
1253 chain = node
1249 continue
1254 continue
1250 delta = buffer(chunk, 80)
1255 delta = buffer(chunk, 80)
1251 del chunk
1256 del chunk
1252
1257
1253 for p in (p1, p2):
1258 for p in (p1, p2):
1254 if not p in self.nodemap:
1259 if not p in self.nodemap:
1255 raise LookupError(p, self.indexfile, _('unknown parent'))
1260 raise LookupError(p, self.indexfile, _('unknown parent'))
1256
1261
1257 if not chain:
1262 if not chain:
1258 # retrieve the parent revision of the delta chain
1263 # retrieve the parent revision of the delta chain
1259 chain = p1
1264 chain = p1
1260 if not chain in self.nodemap:
1265 if not chain in self.nodemap:
1261 raise LookupError(chain, self.indexfile, _('unknown base'))
1266 raise LookupError(chain, self.indexfile, _('unknown base'))
1262
1267
1263 # full versions are inserted when the needed deltas become
1268 # full versions are inserted when the needed deltas become
1264 # comparable to the uncompressed text or when the previous
1269 # comparable to the uncompressed text or when the previous
1265 # version is not the one we have a delta against. We use
1270 # version is not the one we have a delta against. We use
1266 # the size of the previous full rev as a proxy for the
1271 # the size of the previous full rev as a proxy for the
1267 # current size.
1272 # current size.
1268
1273
1269 if chain == prev:
1274 if chain == prev:
1270 cdelta = compress(delta)
1275 cdelta = compress(delta)
1271 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1276 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1272 textlen = mdiff.patchedsize(textlen, delta)
1277 textlen = mdiff.patchedsize(textlen, delta)
1273
1278
1274 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1279 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1275 # flush our writes here so we can read it in revision
1280 # flush our writes here so we can read it in revision
1276 if dfh:
1281 if dfh:
1277 dfh.flush()
1282 dfh.flush()
1278 ifh.flush()
1283 ifh.flush()
1279 text = self.revision(chain)
1284 text = self.revision(chain)
1280 if len(text) == 0:
1285 if len(text) == 0:
1281 # skip over trivial delta header
1286 # skip over trivial delta header
1282 text = buffer(delta, 12)
1287 text = buffer(delta, 12)
1283 else:
1288 else:
1284 text = mdiff.patches(text, [delta])
1289 text = mdiff.patches(text, [delta])
1285 del delta
1290 del delta
1286 chk = self._addrevision(text, transaction, link, p1, p2, None,
1291 chk = self._addrevision(text, transaction, link, p1, p2, None,
1287 ifh, dfh)
1292 ifh, dfh)
1288 if not dfh and not self._inline:
1293 if not dfh and not self._inline:
1289 # addrevision switched from inline to conventional
1294 # addrevision switched from inline to conventional
1290 # reopen the index
1295 # reopen the index
1291 dfh = self.opener(self.datafile, "a")
1296 dfh = self.opener(self.datafile, "a")
1292 ifh = self.opener(self.indexfile, "a")
1297 ifh = self.opener(self.indexfile, "a")
1293 if chk != node:
1298 if chk != node:
1294 raise RevlogError(_("consistency error adding group"))
1299 raise RevlogError(_("consistency error adding group"))
1295 textlen = len(text)
1300 textlen = len(text)
1296 else:
1301 else:
1297 e = (offset_type(end, 0), cdeltalen, textlen, base,
1302 e = (offset_type(end, 0), cdeltalen, textlen, base,
1298 link, self.rev(p1), self.rev(p2), node)
1303 link, self.rev(p1), self.rev(p2), node)
1299 self.index.insert(-1, e)
1304 self.index.insert(-1, e)
1300 self.nodemap[node] = r
1305 self.nodemap[node] = r
1301 entry = self._io.packentry(e, self.node, self.version, r)
1306 entry = self._io.packentry(e, self.node, self.version, r)
1302 if self._inline:
1307 if self._inline:
1303 ifh.write(entry)
1308 ifh.write(entry)
1304 ifh.write(cdelta[0])
1309 ifh.write(cdelta[0])
1305 ifh.write(cdelta[1])
1310 ifh.write(cdelta[1])
1306 self.checkinlinesize(transaction, ifh)
1311 self.checkinlinesize(transaction, ifh)
1307 if not self._inline:
1312 if not self._inline:
1308 dfh = self.opener(self.datafile, "a")
1313 dfh = self.opener(self.datafile, "a")
1309 ifh = self.opener(self.indexfile, "a")
1314 ifh = self.opener(self.indexfile, "a")
1310 else:
1315 else:
1311 dfh.write(cdelta[0])
1316 dfh.write(cdelta[0])
1312 dfh.write(cdelta[1])
1317 dfh.write(cdelta[1])
1313 ifh.write(entry)
1318 ifh.write(entry)
1314
1319
1315 t, r, chain, prev = r, r + 1, node, node
1320 t, r, chain, prev = r, r + 1, node, node
1316 base = self.base(t)
1321 base = self.base(t)
1317 start = self.start(base)
1322 start = self.start(base)
1318 end = self.end(t)
1323 end = self.end(t)
1319 finally:
1324 finally:
1320 if dfh:
1325 if dfh:
1321 dfh.close()
1326 dfh.close()
1322 ifh.close()
1327 ifh.close()
1323
1328
1324 return node
1329 return node
1325
1330
1326 def strip(self, minlink, transaction):
1331 def strip(self, minlink, transaction):
1327 """truncate the revlog on the first revision with a linkrev >= minlink
1332 """truncate the revlog on the first revision with a linkrev >= minlink
1328
1333
1329 This function is called when we're stripping revision minlink and
1334 This function is called when we're stripping revision minlink and
1330 its descendants from the repository.
1335 its descendants from the repository.
1331
1336
1332 We have to remove all revisions with linkrev >= minlink, because
1337 We have to remove all revisions with linkrev >= minlink, because
1333 the equivalent changelog revisions will be renumbered after the
1338 the equivalent changelog revisions will be renumbered after the
1334 strip.
1339 strip.
1335
1340
1336 So we truncate the revlog on the first of these revisions, and
1341 So we truncate the revlog on the first of these revisions, and
1337 trust that the caller has saved the revisions that shouldn't be
1342 trust that the caller has saved the revisions that shouldn't be
1338 removed and that it'll readd them after this truncation.
1343 removed and that it'll readd them after this truncation.
1339 """
1344 """
1340 if len(self) == 0:
1345 if len(self) == 0:
1341 return
1346 return
1342
1347
1343 if isinstance(self.index, lazyindex):
1348 if isinstance(self.index, lazyindex):
1344 self._loadindexmap()
1349 self._loadindexmap()
1345
1350
1346 for rev in self:
1351 for rev in self:
1347 if self.index[rev][4] >= minlink:
1352 if self.index[rev][4] >= minlink:
1348 break
1353 break
1349 else:
1354 else:
1350 return
1355 return
1351
1356
1352 # first truncate the files on disk
1357 # first truncate the files on disk
1353 end = self.start(rev)
1358 end = self.start(rev)
1354 if not self._inline:
1359 if not self._inline:
1355 transaction.add(self.datafile, end)
1360 transaction.add(self.datafile, end)
1356 end = rev * self._io.size
1361 end = rev * self._io.size
1357 else:
1362 else:
1358 end += rev * self._io.size
1363 end += rev * self._io.size
1359
1364
1360 transaction.add(self.indexfile, end)
1365 transaction.add(self.indexfile, end)
1361
1366
1362 # then reset internal state in memory to forget those revisions
1367 # then reset internal state in memory to forget those revisions
1363 self._cache = None
1368 self._cache = None
1364 self._chunkclear()
1369 self._chunkclear()
1365 for x in xrange(rev, len(self)):
1370 for x in xrange(rev, len(self)):
1366 del self.nodemap[self.node(x)]
1371 del self.nodemap[self.node(x)]
1367
1372
1368 del self.index[rev:-1]
1373 del self.index[rev:-1]
1369
1374
1370 def checksize(self):
1375 def checksize(self):
1371 expected = 0
1376 expected = 0
1372 if len(self):
1377 if len(self):
1373 expected = max(0, self.end(len(self) - 1))
1378 expected = max(0, self.end(len(self) - 1))
1374
1379
1375 try:
1380 try:
1376 f = self.opener(self.datafile)
1381 f = self.opener(self.datafile)
1377 f.seek(0, 2)
1382 f.seek(0, 2)
1378 actual = f.tell()
1383 actual = f.tell()
1379 dd = actual - expected
1384 dd = actual - expected
1380 except IOError, inst:
1385 except IOError, inst:
1381 if inst.errno != errno.ENOENT:
1386 if inst.errno != errno.ENOENT:
1382 raise
1387 raise
1383 dd = 0
1388 dd = 0
1384
1389
1385 try:
1390 try:
1386 f = self.opener(self.indexfile)
1391 f = self.opener(self.indexfile)
1387 f.seek(0, 2)
1392 f.seek(0, 2)
1388 actual = f.tell()
1393 actual = f.tell()
1389 s = self._io.size
1394 s = self._io.size
1390 i = max(0, actual // s)
1395 i = max(0, actual // s)
1391 di = actual - (i * s)
1396 di = actual - (i * s)
1392 if self._inline:
1397 if self._inline:
1393 databytes = 0
1398 databytes = 0
1394 for r in self:
1399 for r in self:
1395 databytes += max(0, self.length(r))
1400 databytes += max(0, self.length(r))
1396 dd = 0
1401 dd = 0
1397 di = actual - len(self) * s - databytes
1402 di = actual - len(self) * s - databytes
1398 except IOError, inst:
1403 except IOError, inst:
1399 if inst.errno != errno.ENOENT:
1404 if inst.errno != errno.ENOENT:
1400 raise
1405 raise
1401 di = 0
1406 di = 0
1402
1407
1403 return (dd, di)
1408 return (dd, di)
1404
1409
1405 def files(self):
1410 def files(self):
1406 res = [self.indexfile]
1411 res = [self.indexfile]
1407 if not self._inline:
1412 if not self._inline:
1408 res.append(self.datafile)
1413 res.append(self.datafile)
1409 return res
1414 return res
General Comments 0
You need to be logged in to leave comments. Login now