##// END OF EJS Templates
revlog: drop cache after use to save memory footprint...
Matt Mackall -
r11754:6ccd130e stable
parent child Browse files
Show More
@@ -1,1405 +1,1408
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 # amount of data read unconditionally, should be >= 4
34 # amount of data read unconditionally, should be >= 4
35 # when not inline: threshold for using lazy index
35 # when not inline: threshold for using lazy index
36 _prereadsize = 1048576
36 _prereadsize = 1048576
37 # max size of revlog with inline data
37 # max size of revlog with inline data
38 _maxinline = 131072
38 _maxinline = 131072
39
39
40 RevlogError = error.RevlogError
40 RevlogError = error.RevlogError
41 LookupError = error.LookupError
41 LookupError = error.LookupError
42
42
43 def getoffset(q):
43 def getoffset(q):
44 return int(q >> 16)
44 return int(q >> 16)
45
45
46 def gettype(q):
46 def gettype(q):
47 return int(q & 0xFFFF)
47 return int(q & 0xFFFF)
48
48
49 def offset_type(offset, type):
49 def offset_type(offset, type):
50 return long(long(offset) << 16 | type)
50 return long(long(offset) << 16 | type)
51
51
52 nullhash = _sha(nullid)
52 nullhash = _sha(nullid)
53
53
54 def hash(text, p1, p2):
54 def hash(text, p1, p2):
55 """generate a hash from the given text and its parent hashes
55 """generate a hash from the given text and its parent hashes
56
56
57 This hash combines both the current file contents and its history
57 This hash combines both the current file contents and its history
58 in a manner that makes it easy to distinguish nodes with the same
58 in a manner that makes it easy to distinguish nodes with the same
59 content in the revision graph.
59 content in the revision graph.
60 """
60 """
61 # As of now, if one of the parent node is null, p2 is null
61 # As of now, if one of the parent node is null, p2 is null
62 if p2 == nullid:
62 if p2 == nullid:
63 # deep copy of a hash is faster than creating one
63 # deep copy of a hash is faster than creating one
64 s = nullhash.copy()
64 s = nullhash.copy()
65 s.update(p1)
65 s.update(p1)
66 else:
66 else:
67 # none of the parent nodes are nullid
67 # none of the parent nodes are nullid
68 l = [p1, p2]
68 l = [p1, p2]
69 l.sort()
69 l.sort()
70 s = _sha(l[0])
70 s = _sha(l[0])
71 s.update(l[1])
71 s.update(l[1])
72 s.update(text)
72 s.update(text)
73 return s.digest()
73 return s.digest()
74
74
75 def compress(text):
75 def compress(text):
76 """ generate a possibly-compressed representation of text """
76 """ generate a possibly-compressed representation of text """
77 if not text:
77 if not text:
78 return ("", text)
78 return ("", text)
79 l = len(text)
79 l = len(text)
80 bin = None
80 bin = None
81 if l < 44:
81 if l < 44:
82 pass
82 pass
83 elif l > 1000000:
83 elif l > 1000000:
84 # zlib makes an internal copy, thus doubling memory usage for
84 # zlib makes an internal copy, thus doubling memory usage for
85 # large files, so lets do this in pieces
85 # large files, so lets do this in pieces
86 z = zlib.compressobj()
86 z = zlib.compressobj()
87 p = []
87 p = []
88 pos = 0
88 pos = 0
89 while pos < l:
89 while pos < l:
90 pos2 = pos + 2**20
90 pos2 = pos + 2**20
91 p.append(z.compress(text[pos:pos2]))
91 p.append(z.compress(text[pos:pos2]))
92 pos = pos2
92 pos = pos2
93 p.append(z.flush())
93 p.append(z.flush())
94 if sum(map(len, p)) < l:
94 if sum(map(len, p)) < l:
95 bin = "".join(p)
95 bin = "".join(p)
96 else:
96 else:
97 bin = _compress(text)
97 bin = _compress(text)
98 if bin is None or len(bin) > l:
98 if bin is None or len(bin) > l:
99 if text[0] == '\0':
99 if text[0] == '\0':
100 return ("", text)
100 return ("", text)
101 return ('u', text)
101 return ('u', text)
102 return ("", bin)
102 return ("", bin)
103
103
104 def decompress(bin):
104 def decompress(bin):
105 """ decompress the given input """
105 """ decompress the given input """
106 if not bin:
106 if not bin:
107 return bin
107 return bin
108 t = bin[0]
108 t = bin[0]
109 if t == '\0':
109 if t == '\0':
110 return bin
110 return bin
111 if t == 'x':
111 if t == 'x':
112 return _decompress(bin)
112 return _decompress(bin)
113 if t == 'u':
113 if t == 'u':
114 return bin[1:]
114 return bin[1:]
115 raise RevlogError(_("unknown compression type %r") % t)
115 raise RevlogError(_("unknown compression type %r") % t)
116
116
117 class lazyparser(object):
117 class lazyparser(object):
118 """
118 """
119 this class avoids the need to parse the entirety of large indices
119 this class avoids the need to parse the entirety of large indices
120 """
120 """
121
121
122 # lazyparser is not safe to use on windows if win32 extensions not
122 # lazyparser is not safe to use on windows if win32 extensions not
123 # available. it keeps file handle open, which make it not possible
123 # available. it keeps file handle open, which make it not possible
124 # to break hardlinks on local cloned repos.
124 # to break hardlinks on local cloned repos.
125
125
126 def __init__(self, dataf):
126 def __init__(self, dataf):
127 try:
127 try:
128 size = util.fstat(dataf).st_size
128 size = util.fstat(dataf).st_size
129 except AttributeError:
129 except AttributeError:
130 size = 0
130 size = 0
131 self.dataf = dataf
131 self.dataf = dataf
132 self.s = struct.calcsize(indexformatng)
132 self.s = struct.calcsize(indexformatng)
133 self.datasize = size
133 self.datasize = size
134 self.l = size / self.s
134 self.l = size / self.s
135 self.index = [None] * self.l
135 self.index = [None] * self.l
136 self.map = {nullid: nullrev}
136 self.map = {nullid: nullrev}
137 self.allmap = 0
137 self.allmap = 0
138 self.all = 0
138 self.all = 0
139 self.mapfind_count = 0
139 self.mapfind_count = 0
140
140
141 def loadmap(self):
141 def loadmap(self):
142 """
142 """
143 during a commit, we need to make sure the rev being added is
143 during a commit, we need to make sure the rev being added is
144 not a duplicate. This requires loading the entire index,
144 not a duplicate. This requires loading the entire index,
145 which is fairly slow. loadmap can load up just the node map,
145 which is fairly slow. loadmap can load up just the node map,
146 which takes much less time.
146 which takes much less time.
147 """
147 """
148 if self.allmap:
148 if self.allmap:
149 return
149 return
150 end = self.datasize
150 end = self.datasize
151 self.allmap = 1
151 self.allmap = 1
152 cur = 0
152 cur = 0
153 count = 0
153 count = 0
154 blocksize = self.s * 256
154 blocksize = self.s * 256
155 self.dataf.seek(0)
155 self.dataf.seek(0)
156 while cur < end:
156 while cur < end:
157 data = self.dataf.read(blocksize)
157 data = self.dataf.read(blocksize)
158 off = 0
158 off = 0
159 for x in xrange(256):
159 for x in xrange(256):
160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
160 n = data[off + ngshaoffset:off + ngshaoffset + 20]
161 self.map[n] = count
161 self.map[n] = count
162 count += 1
162 count += 1
163 if count >= self.l:
163 if count >= self.l:
164 break
164 break
165 off += self.s
165 off += self.s
166 cur += blocksize
166 cur += blocksize
167
167
168 def loadblock(self, blockstart, blocksize, data=None):
168 def loadblock(self, blockstart, blocksize, data=None):
169 if self.all:
169 if self.all:
170 return
170 return
171 if data is None:
171 if data is None:
172 self.dataf.seek(blockstart)
172 self.dataf.seek(blockstart)
173 if blockstart + blocksize > self.datasize:
173 if blockstart + blocksize > self.datasize:
174 # the revlog may have grown since we've started running,
174 # the revlog may have grown since we've started running,
175 # but we don't have space in self.index for more entries.
175 # but we don't have space in self.index for more entries.
176 # limit blocksize so that we don't get too much data.
176 # limit blocksize so that we don't get too much data.
177 blocksize = max(self.datasize - blockstart, 0)
177 blocksize = max(self.datasize - blockstart, 0)
178 data = self.dataf.read(blocksize)
178 data = self.dataf.read(blocksize)
179 lend = len(data) / self.s
179 lend = len(data) / self.s
180 i = blockstart / self.s
180 i = blockstart / self.s
181 off = 0
181 off = 0
182 # lazyindex supports __delitem__
182 # lazyindex supports __delitem__
183 if lend > len(self.index) - i:
183 if lend > len(self.index) - i:
184 lend = len(self.index) - i
184 lend = len(self.index) - i
185 for x in xrange(lend):
185 for x in xrange(lend):
186 if self.index[i + x] is None:
186 if self.index[i + x] is None:
187 b = data[off : off + self.s]
187 b = data[off : off + self.s]
188 self.index[i + x] = b
188 self.index[i + x] = b
189 n = b[ngshaoffset:ngshaoffset + 20]
189 n = b[ngshaoffset:ngshaoffset + 20]
190 self.map[n] = i + x
190 self.map[n] = i + x
191 off += self.s
191 off += self.s
192
192
193 def findnode(self, node):
193 def findnode(self, node):
194 """search backwards through the index file for a specific node"""
194 """search backwards through the index file for a specific node"""
195 if self.allmap:
195 if self.allmap:
196 return None
196 return None
197
197
198 # hg log will cause many many searches for the manifest
198 # hg log will cause many many searches for the manifest
199 # nodes. After we get called a few times, just load the whole
199 # nodes. After we get called a few times, just load the whole
200 # thing.
200 # thing.
201 if self.mapfind_count > 8:
201 if self.mapfind_count > 8:
202 self.loadmap()
202 self.loadmap()
203 if node in self.map:
203 if node in self.map:
204 return node
204 return node
205 return None
205 return None
206 self.mapfind_count += 1
206 self.mapfind_count += 1
207 last = self.l - 1
207 last = self.l - 1
208 while self.index[last] != None:
208 while self.index[last] != None:
209 if last == 0:
209 if last == 0:
210 self.all = 1
210 self.all = 1
211 self.allmap = 1
211 self.allmap = 1
212 return None
212 return None
213 last -= 1
213 last -= 1
214 end = (last + 1) * self.s
214 end = (last + 1) * self.s
215 blocksize = self.s * 256
215 blocksize = self.s * 256
216 while end >= 0:
216 while end >= 0:
217 start = max(end - blocksize, 0)
217 start = max(end - blocksize, 0)
218 self.dataf.seek(start)
218 self.dataf.seek(start)
219 data = self.dataf.read(end - start)
219 data = self.dataf.read(end - start)
220 findend = end - start
220 findend = end - start
221 while True:
221 while True:
222 # we're searching backwards, so we have to make sure
222 # we're searching backwards, so we have to make sure
223 # we don't find a changeset where this node is a parent
223 # we don't find a changeset where this node is a parent
224 off = data.find(node, 0, findend)
224 off = data.find(node, 0, findend)
225 findend = off
225 findend = off
226 if off >= 0:
226 if off >= 0:
227 i = off / self.s
227 i = off / self.s
228 off = i * self.s
228 off = i * self.s
229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
229 n = data[off + ngshaoffset:off + ngshaoffset + 20]
230 if n == node:
230 if n == node:
231 self.map[n] = i + start / self.s
231 self.map[n] = i + start / self.s
232 return node
232 return node
233 else:
233 else:
234 break
234 break
235 end -= blocksize
235 end -= blocksize
236 return None
236 return None
237
237
238 def loadindex(self, i=None, end=None):
238 def loadindex(self, i=None, end=None):
239 if self.all:
239 if self.all:
240 return
240 return
241 all = False
241 all = False
242 if i is None:
242 if i is None:
243 blockstart = 0
243 blockstart = 0
244 blocksize = (65536 / self.s) * self.s
244 blocksize = (65536 / self.s) * self.s
245 end = self.datasize
245 end = self.datasize
246 all = True
246 all = True
247 else:
247 else:
248 if end:
248 if end:
249 blockstart = i * self.s
249 blockstart = i * self.s
250 end = end * self.s
250 end = end * self.s
251 blocksize = end - blockstart
251 blocksize = end - blockstart
252 else:
252 else:
253 blockstart = (i & ~1023) * self.s
253 blockstart = (i & ~1023) * self.s
254 blocksize = self.s * 1024
254 blocksize = self.s * 1024
255 end = blockstart + blocksize
255 end = blockstart + blocksize
256 while blockstart < end:
256 while blockstart < end:
257 self.loadblock(blockstart, blocksize)
257 self.loadblock(blockstart, blocksize)
258 blockstart += blocksize
258 blockstart += blocksize
259 if all:
259 if all:
260 self.all = True
260 self.all = True
261
261
262 class lazyindex(object):
262 class lazyindex(object):
263 """a lazy version of the index array"""
263 """a lazy version of the index array"""
264 def __init__(self, parser):
264 def __init__(self, parser):
265 self.p = parser
265 self.p = parser
266 def __len__(self):
266 def __len__(self):
267 return len(self.p.index)
267 return len(self.p.index)
268 def load(self, pos):
268 def load(self, pos):
269 if pos < 0:
269 if pos < 0:
270 pos += len(self.p.index)
270 pos += len(self.p.index)
271 self.p.loadindex(pos)
271 self.p.loadindex(pos)
272 return self.p.index[pos]
272 return self.p.index[pos]
273 def __getitem__(self, pos):
273 def __getitem__(self, pos):
274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
274 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
275 def __setitem__(self, pos, item):
275 def __setitem__(self, pos, item):
276 self.p.index[pos] = _pack(indexformatng, *item)
276 self.p.index[pos] = _pack(indexformatng, *item)
277 def __delitem__(self, pos):
277 def __delitem__(self, pos):
278 del self.p.index[pos]
278 del self.p.index[pos]
279 def insert(self, pos, e):
279 def insert(self, pos, e):
280 self.p.index.insert(pos, _pack(indexformatng, *e))
280 self.p.index.insert(pos, _pack(indexformatng, *e))
281 def append(self, e):
281 def append(self, e):
282 self.p.index.append(_pack(indexformatng, *e))
282 self.p.index.append(_pack(indexformatng, *e))
283
283
284 class lazymap(object):
284 class lazymap(object):
285 """a lazy version of the node map"""
285 """a lazy version of the node map"""
286 def __init__(self, parser):
286 def __init__(self, parser):
287 self.p = parser
287 self.p = parser
288 def load(self, key):
288 def load(self, key):
289 n = self.p.findnode(key)
289 n = self.p.findnode(key)
290 if n is None:
290 if n is None:
291 raise KeyError(key)
291 raise KeyError(key)
292 def __contains__(self, key):
292 def __contains__(self, key):
293 if key in self.p.map:
293 if key in self.p.map:
294 return True
294 return True
295 self.p.loadmap()
295 self.p.loadmap()
296 return key in self.p.map
296 return key in self.p.map
297 def __iter__(self):
297 def __iter__(self):
298 yield nullid
298 yield nullid
299 for i, ret in enumerate(self.p.index):
299 for i, ret in enumerate(self.p.index):
300 if not ret:
300 if not ret:
301 self.p.loadindex(i)
301 self.p.loadindex(i)
302 ret = self.p.index[i]
302 ret = self.p.index[i]
303 if isinstance(ret, str):
303 if isinstance(ret, str):
304 ret = _unpack(indexformatng, ret)
304 ret = _unpack(indexformatng, ret)
305 yield ret[7]
305 yield ret[7]
306 def __getitem__(self, key):
306 def __getitem__(self, key):
307 try:
307 try:
308 return self.p.map[key]
308 return self.p.map[key]
309 except KeyError:
309 except KeyError:
310 try:
310 try:
311 self.load(key)
311 self.load(key)
312 return self.p.map[key]
312 return self.p.map[key]
313 except KeyError:
313 except KeyError:
314 raise KeyError("node " + hex(key))
314 raise KeyError("node " + hex(key))
315 def __setitem__(self, key, val):
315 def __setitem__(self, key, val):
316 self.p.map[key] = val
316 self.p.map[key] = val
317 def __delitem__(self, key):
317 def __delitem__(self, key):
318 del self.p.map[key]
318 del self.p.map[key]
319
319
320 indexformatv0 = ">4l20s20s20s"
320 indexformatv0 = ">4l20s20s20s"
321 v0shaoffset = 56
321 v0shaoffset = 56
322
322
323 class revlogoldio(object):
323 class revlogoldio(object):
324 def __init__(self):
324 def __init__(self):
325 self.size = struct.calcsize(indexformatv0)
325 self.size = struct.calcsize(indexformatv0)
326
326
327 def parseindex(self, fp, data, inline):
327 def parseindex(self, fp, data, inline):
328 s = self.size
328 s = self.size
329 index = []
329 index = []
330 nodemap = {nullid: nullrev}
330 nodemap = {nullid: nullrev}
331 n = off = 0
331 n = off = 0
332 if len(data) == _prereadsize:
332 if len(data) == _prereadsize:
333 data += fp.read() # read the rest
333 data += fp.read() # read the rest
334 l = len(data)
334 l = len(data)
335 while off + s <= l:
335 while off + s <= l:
336 cur = data[off:off + s]
336 cur = data[off:off + s]
337 off += s
337 off += s
338 e = _unpack(indexformatv0, cur)
338 e = _unpack(indexformatv0, cur)
339 # transform to revlogv1 format
339 # transform to revlogv1 format
340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
340 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
341 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
342 index.append(e2)
342 index.append(e2)
343 nodemap[e[6]] = n
343 nodemap[e[6]] = n
344 n += 1
344 n += 1
345
345
346 return index, nodemap, None
346 return index, nodemap, None
347
347
348 def packentry(self, entry, node, version, rev):
348 def packentry(self, entry, node, version, rev):
349 if gettype(entry[0]):
349 if gettype(entry[0]):
350 raise RevlogError(_("index entry flags need RevlogNG"))
350 raise RevlogError(_("index entry flags need RevlogNG"))
351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
351 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
352 node(entry[5]), node(entry[6]), entry[7])
352 node(entry[5]), node(entry[6]), entry[7])
353 return _pack(indexformatv0, *e2)
353 return _pack(indexformatv0, *e2)
354
354
355 # index ng:
355 # index ng:
356 # 6 bytes: offset
356 # 6 bytes: offset
357 # 2 bytes: flags
357 # 2 bytes: flags
358 # 4 bytes: compressed length
358 # 4 bytes: compressed length
359 # 4 bytes: uncompressed length
359 # 4 bytes: uncompressed length
360 # 4 bytes: base rev
360 # 4 bytes: base rev
361 # 4 bytes: link rev
361 # 4 bytes: link rev
362 # 4 bytes: parent 1 rev
362 # 4 bytes: parent 1 rev
363 # 4 bytes: parent 2 rev
363 # 4 bytes: parent 2 rev
364 # 32 bytes: nodeid
364 # 32 bytes: nodeid
365 indexformatng = ">Qiiiiii20s12x"
365 indexformatng = ">Qiiiiii20s12x"
366 ngshaoffset = 32
366 ngshaoffset = 32
367 versionformat = ">I"
367 versionformat = ">I"
368
368
369 class revlogio(object):
369 class revlogio(object):
370 def __init__(self):
370 def __init__(self):
371 self.size = struct.calcsize(indexformatng)
371 self.size = struct.calcsize(indexformatng)
372
372
373 def parseindex(self, fp, data, inline):
373 def parseindex(self, fp, data, inline):
374 if len(data) == _prereadsize:
374 if len(data) == _prereadsize:
375 if util.openhardlinks() and not inline:
375 if util.openhardlinks() and not inline:
376 # big index, let's parse it on demand
376 # big index, let's parse it on demand
377 parser = lazyparser(fp)
377 parser = lazyparser(fp)
378 index = lazyindex(parser)
378 index = lazyindex(parser)
379 nodemap = lazymap(parser)
379 nodemap = lazymap(parser)
380 e = list(index[0])
380 e = list(index[0])
381 type = gettype(e[0])
381 type = gettype(e[0])
382 e[0] = offset_type(0, type)
382 e[0] = offset_type(0, type)
383 index[0] = e
383 index[0] = e
384 return index, nodemap, None
384 return index, nodemap, None
385 else:
385 else:
386 data += fp.read()
386 data += fp.read()
387
387
388 # call the C implementation to parse the index data
388 # call the C implementation to parse the index data
389 index, nodemap, cache = parsers.parse_index(data, inline)
389 index, nodemap, cache = parsers.parse_index(data, inline)
390 return index, nodemap, cache
390 return index, nodemap, cache
391
391
392 def packentry(self, entry, node, version, rev):
392 def packentry(self, entry, node, version, rev):
393 p = _pack(indexformatng, *entry)
393 p = _pack(indexformatng, *entry)
394 if rev == 0:
394 if rev == 0:
395 p = _pack(versionformat, version) + p[4:]
395 p = _pack(versionformat, version) + p[4:]
396 return p
396 return p
397
397
398 class revlog(object):
398 class revlog(object):
399 """
399 """
400 the underlying revision storage object
400 the underlying revision storage object
401
401
402 A revlog consists of two parts, an index and the revision data.
402 A revlog consists of two parts, an index and the revision data.
403
403
404 The index is a file with a fixed record size containing
404 The index is a file with a fixed record size containing
405 information on each revision, including its nodeid (hash), the
405 information on each revision, including its nodeid (hash), the
406 nodeids of its parents, the position and offset of its data within
406 nodeids of its parents, the position and offset of its data within
407 the data file, and the revision it's based on. Finally, each entry
407 the data file, and the revision it's based on. Finally, each entry
408 contains a linkrev entry that can serve as a pointer to external
408 contains a linkrev entry that can serve as a pointer to external
409 data.
409 data.
410
410
411 The revision data itself is a linear collection of data chunks.
411 The revision data itself is a linear collection of data chunks.
412 Each chunk represents a revision and is usually represented as a
412 Each chunk represents a revision and is usually represented as a
413 delta against the previous chunk. To bound lookup time, runs of
413 delta against the previous chunk. To bound lookup time, runs of
414 deltas are limited to about 2 times the length of the original
414 deltas are limited to about 2 times the length of the original
415 version data. This makes retrieval of a version proportional to
415 version data. This makes retrieval of a version proportional to
416 its size, or O(1) relative to the number of revisions.
416 its size, or O(1) relative to the number of revisions.
417
417
418 Both pieces of the revlog are written to in an append-only
418 Both pieces of the revlog are written to in an append-only
419 fashion, which means we never need to rewrite a file to insert or
419 fashion, which means we never need to rewrite a file to insert or
420 remove data, and can use some simple techniques to avoid the need
420 remove data, and can use some simple techniques to avoid the need
421 for locking while reading.
421 for locking while reading.
422 """
422 """
423 def __init__(self, opener, indexfile):
423 def __init__(self, opener, indexfile):
424 """
424 """
425 create a revlog object
425 create a revlog object
426
426
427 opener is a function that abstracts the file opening operation
427 opener is a function that abstracts the file opening operation
428 and can be used to implement COW semantics or the like.
428 and can be used to implement COW semantics or the like.
429 """
429 """
430 self.indexfile = indexfile
430 self.indexfile = indexfile
431 self.datafile = indexfile[:-2] + ".d"
431 self.datafile = indexfile[:-2] + ".d"
432 self.opener = opener
432 self.opener = opener
433 self._cache = None
433 self._cache = None
434 self._chunkcache = (0, '')
434 self._chunkcache = (0, '')
435 self.nodemap = {nullid: nullrev}
435 self.nodemap = {nullid: nullrev}
436 self.index = []
436 self.index = []
437
437
438 v = REVLOG_DEFAULT_VERSION
438 v = REVLOG_DEFAULT_VERSION
439 if hasattr(opener, 'options') and 'defversion' in opener.options:
439 if hasattr(opener, 'options') and 'defversion' in opener.options:
440 v = opener.options['defversion']
440 v = opener.options['defversion']
441 if v & REVLOGNG:
441 if v & REVLOGNG:
442 v |= REVLOGNGINLINEDATA
442 v |= REVLOGNGINLINEDATA
443
443
444 i = ''
444 i = ''
445 try:
445 try:
446 f = self.opener(self.indexfile)
446 f = self.opener(self.indexfile)
447 if "nonlazy" in getattr(self.opener, 'options', {}):
447 if "nonlazy" in getattr(self.opener, 'options', {}):
448 i = f.read()
448 i = f.read()
449 else:
449 else:
450 i = f.read(_prereadsize)
450 i = f.read(_prereadsize)
451 if len(i) > 0:
451 if len(i) > 0:
452 v = struct.unpack(versionformat, i[:4])[0]
452 v = struct.unpack(versionformat, i[:4])[0]
453 except IOError, inst:
453 except IOError, inst:
454 if inst.errno != errno.ENOENT:
454 if inst.errno != errno.ENOENT:
455 raise
455 raise
456
456
457 self.version = v
457 self.version = v
458 self._inline = v & REVLOGNGINLINEDATA
458 self._inline = v & REVLOGNGINLINEDATA
459 flags = v & ~0xFFFF
459 flags = v & ~0xFFFF
460 fmt = v & 0xFFFF
460 fmt = v & 0xFFFF
461 if fmt == REVLOGV0 and flags:
461 if fmt == REVLOGV0 and flags:
462 raise RevlogError(_("index %s unknown flags %#04x for format v0")
462 raise RevlogError(_("index %s unknown flags %#04x for format v0")
463 % (self.indexfile, flags >> 16))
463 % (self.indexfile, flags >> 16))
464 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
464 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
465 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
465 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
466 % (self.indexfile, flags >> 16))
466 % (self.indexfile, flags >> 16))
467 elif fmt > REVLOGNG:
467 elif fmt > REVLOGNG:
468 raise RevlogError(_("index %s unknown format %d")
468 raise RevlogError(_("index %s unknown format %d")
469 % (self.indexfile, fmt))
469 % (self.indexfile, fmt))
470
470
471 self._io = revlogio()
471 self._io = revlogio()
472 if self.version == REVLOGV0:
472 if self.version == REVLOGV0:
473 self._io = revlogoldio()
473 self._io = revlogoldio()
474 if i:
474 if i:
475 try:
475 try:
476 d = self._io.parseindex(f, i, self._inline)
476 d = self._io.parseindex(f, i, self._inline)
477 except (ValueError, IndexError):
477 except (ValueError, IndexError):
478 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
478 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
479 self.index, self.nodemap, self._chunkcache = d
479 self.index, self.nodemap, self._chunkcache = d
480 if not self._chunkcache:
480 if not self._chunkcache:
481 self._chunkclear()
481 self._chunkclear()
482
482
483 # add the magic null revision at -1 (if it hasn't been done already)
483 # add the magic null revision at -1 (if it hasn't been done already)
484 if (self.index == [] or isinstance(self.index, lazyindex) or
484 if (self.index == [] or isinstance(self.index, lazyindex) or
485 self.index[-1][7] != nullid) :
485 self.index[-1][7] != nullid) :
486 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
486 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
487
487
488 def _loadindex(self, start, end):
488 def _loadindex(self, start, end):
489 """load a block of indexes all at once from the lazy parser"""
489 """load a block of indexes all at once from the lazy parser"""
490 if isinstance(self.index, lazyindex):
490 if isinstance(self.index, lazyindex):
491 self.index.p.loadindex(start, end)
491 self.index.p.loadindex(start, end)
492
492
493 def _loadindexmap(self):
493 def _loadindexmap(self):
494 """loads both the map and the index from the lazy parser"""
494 """loads both the map and the index from the lazy parser"""
495 if isinstance(self.index, lazyindex):
495 if isinstance(self.index, lazyindex):
496 p = self.index.p
496 p = self.index.p
497 p.loadindex()
497 p.loadindex()
498 self.nodemap = p.map
498 self.nodemap = p.map
499
499
500 def _loadmap(self):
500 def _loadmap(self):
501 """loads the map from the lazy parser"""
501 """loads the map from the lazy parser"""
502 if isinstance(self.nodemap, lazymap):
502 if isinstance(self.nodemap, lazymap):
503 self.nodemap.p.loadmap()
503 self.nodemap.p.loadmap()
504 self.nodemap = self.nodemap.p.map
504 self.nodemap = self.nodemap.p.map
505
505
506 def tip(self):
506 def tip(self):
507 return self.node(len(self.index) - 2)
507 return self.node(len(self.index) - 2)
508 def __len__(self):
508 def __len__(self):
509 return len(self.index) - 1
509 return len(self.index) - 1
510 def __iter__(self):
510 def __iter__(self):
511 for i in xrange(len(self)):
511 for i in xrange(len(self)):
512 yield i
512 yield i
513 def rev(self, node):
513 def rev(self, node):
514 try:
514 try:
515 return self.nodemap[node]
515 return self.nodemap[node]
516 except KeyError:
516 except KeyError:
517 raise LookupError(node, self.indexfile, _('no node'))
517 raise LookupError(node, self.indexfile, _('no node'))
518 def node(self, rev):
518 def node(self, rev):
519 return self.index[rev][7]
519 return self.index[rev][7]
520 def linkrev(self, rev):
520 def linkrev(self, rev):
521 return self.index[rev][4]
521 return self.index[rev][4]
522 def parents(self, node):
522 def parents(self, node):
523 i = self.index
523 i = self.index
524 d = i[self.rev(node)]
524 d = i[self.rev(node)]
525 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
525 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
526 def parentrevs(self, rev):
526 def parentrevs(self, rev):
527 return self.index[rev][5:7]
527 return self.index[rev][5:7]
528 def start(self, rev):
528 def start(self, rev):
529 return int(self.index[rev][0] >> 16)
529 return int(self.index[rev][0] >> 16)
530 def end(self, rev):
530 def end(self, rev):
531 return self.start(rev) + self.length(rev)
531 return self.start(rev) + self.length(rev)
532 def length(self, rev):
532 def length(self, rev):
533 return self.index[rev][1]
533 return self.index[rev][1]
534 def base(self, rev):
534 def base(self, rev):
535 return self.index[rev][3]
535 return self.index[rev][3]
536
536
537 def size(self, rev):
537 def size(self, rev):
538 """return the length of the uncompressed text for a given revision"""
538 """return the length of the uncompressed text for a given revision"""
539 l = self.index[rev][2]
539 l = self.index[rev][2]
540 if l >= 0:
540 if l >= 0:
541 return l
541 return l
542
542
543 t = self.revision(self.node(rev))
543 t = self.revision(self.node(rev))
544 return len(t)
544 return len(t)
545
545
546 def reachable(self, node, stop=None):
546 def reachable(self, node, stop=None):
547 """return the set of all nodes ancestral to a given node, including
547 """return the set of all nodes ancestral to a given node, including
548 the node itself, stopping when stop is matched"""
548 the node itself, stopping when stop is matched"""
549 reachable = set((node,))
549 reachable = set((node,))
550 visit = [node]
550 visit = [node]
551 if stop:
551 if stop:
552 stopn = self.rev(stop)
552 stopn = self.rev(stop)
553 else:
553 else:
554 stopn = 0
554 stopn = 0
555 while visit:
555 while visit:
556 n = visit.pop(0)
556 n = visit.pop(0)
557 if n == stop:
557 if n == stop:
558 continue
558 continue
559 if n == nullid:
559 if n == nullid:
560 continue
560 continue
561 for p in self.parents(n):
561 for p in self.parents(n):
562 if self.rev(p) < stopn:
562 if self.rev(p) < stopn:
563 continue
563 continue
564 if p not in reachable:
564 if p not in reachable:
565 reachable.add(p)
565 reachable.add(p)
566 visit.append(p)
566 visit.append(p)
567 return reachable
567 return reachable
568
568
569 def ancestors(self, *revs):
569 def ancestors(self, *revs):
570 """Generate the ancestors of 'revs' in reverse topological order.
570 """Generate the ancestors of 'revs' in reverse topological order.
571
571
572 Yield a sequence of revision numbers starting with the parents
572 Yield a sequence of revision numbers starting with the parents
573 of each revision in revs, i.e., each revision is *not* considered
573 of each revision in revs, i.e., each revision is *not* considered
574 an ancestor of itself. Results are in breadth-first order:
574 an ancestor of itself. Results are in breadth-first order:
575 parents of each rev in revs, then parents of those, etc. Result
575 parents of each rev in revs, then parents of those, etc. Result
576 does not include the null revision."""
576 does not include the null revision."""
577 visit = list(revs)
577 visit = list(revs)
578 seen = set([nullrev])
578 seen = set([nullrev])
579 while visit:
579 while visit:
580 for parent in self.parentrevs(visit.pop(0)):
580 for parent in self.parentrevs(visit.pop(0)):
581 if parent not in seen:
581 if parent not in seen:
582 visit.append(parent)
582 visit.append(parent)
583 seen.add(parent)
583 seen.add(parent)
584 yield parent
584 yield parent
585
585
586 def descendants(self, *revs):
586 def descendants(self, *revs):
587 """Generate the descendants of 'revs' in revision order.
587 """Generate the descendants of 'revs' in revision order.
588
588
589 Yield a sequence of revision numbers starting with a child of
589 Yield a sequence of revision numbers starting with a child of
590 some rev in revs, i.e., each revision is *not* considered a
590 some rev in revs, i.e., each revision is *not* considered a
591 descendant of itself. Results are ordered by revision number (a
591 descendant of itself. Results are ordered by revision number (a
592 topological sort)."""
592 topological sort)."""
593 seen = set(revs)
593 seen = set(revs)
594 for i in xrange(min(revs) + 1, len(self)):
594 for i in xrange(min(revs) + 1, len(self)):
595 for x in self.parentrevs(i):
595 for x in self.parentrevs(i):
596 if x != nullrev and x in seen:
596 if x != nullrev and x in seen:
597 seen.add(i)
597 seen.add(i)
598 yield i
598 yield i
599 break
599 break
600
600
601 def findmissing(self, common=None, heads=None):
601 def findmissing(self, common=None, heads=None):
602 """Return the ancestors of heads that are not ancestors of common.
602 """Return the ancestors of heads that are not ancestors of common.
603
603
604 More specifically, return a list of nodes N such that every N
604 More specifically, return a list of nodes N such that every N
605 satisfies the following constraints:
605 satisfies the following constraints:
606
606
607 1. N is an ancestor of some node in 'heads'
607 1. N is an ancestor of some node in 'heads'
608 2. N is not an ancestor of any node in 'common'
608 2. N is not an ancestor of any node in 'common'
609
609
610 The list is sorted by revision number, meaning it is
610 The list is sorted by revision number, meaning it is
611 topologically sorted.
611 topologically sorted.
612
612
613 'heads' and 'common' are both lists of node IDs. If heads is
613 'heads' and 'common' are both lists of node IDs. If heads is
614 not supplied, uses all of the revlog's heads. If common is not
614 not supplied, uses all of the revlog's heads. If common is not
615 supplied, uses nullid."""
615 supplied, uses nullid."""
616 if common is None:
616 if common is None:
617 common = [nullid]
617 common = [nullid]
618 if heads is None:
618 if heads is None:
619 heads = self.heads()
619 heads = self.heads()
620
620
621 common = [self.rev(n) for n in common]
621 common = [self.rev(n) for n in common]
622 heads = [self.rev(n) for n in heads]
622 heads = [self.rev(n) for n in heads]
623
623
624 # we want the ancestors, but inclusive
624 # we want the ancestors, but inclusive
625 has = set(self.ancestors(*common))
625 has = set(self.ancestors(*common))
626 has.add(nullrev)
626 has.add(nullrev)
627 has.update(common)
627 has.update(common)
628
628
629 # take all ancestors from heads that aren't in has
629 # take all ancestors from heads that aren't in has
630 missing = set()
630 missing = set()
631 visit = [r for r in heads if r not in has]
631 visit = [r for r in heads if r not in has]
632 while visit:
632 while visit:
633 r = visit.pop(0)
633 r = visit.pop(0)
634 if r in missing:
634 if r in missing:
635 continue
635 continue
636 else:
636 else:
637 missing.add(r)
637 missing.add(r)
638 for p in self.parentrevs(r):
638 for p in self.parentrevs(r):
639 if p not in has:
639 if p not in has:
640 visit.append(p)
640 visit.append(p)
641 missing = list(missing)
641 missing = list(missing)
642 missing.sort()
642 missing.sort()
643 return [self.node(r) for r in missing]
643 return [self.node(r) for r in missing]
644
644
645 def nodesbetween(self, roots=None, heads=None):
645 def nodesbetween(self, roots=None, heads=None):
646 """Return a topological path from 'roots' to 'heads'.
646 """Return a topological path from 'roots' to 'heads'.
647
647
648 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
648 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
649 topologically sorted list of all nodes N that satisfy both of
649 topologically sorted list of all nodes N that satisfy both of
650 these constraints:
650 these constraints:
651
651
652 1. N is a descendant of some node in 'roots'
652 1. N is a descendant of some node in 'roots'
653 2. N is an ancestor of some node in 'heads'
653 2. N is an ancestor of some node in 'heads'
654
654
655 Every node is considered to be both a descendant and an ancestor
655 Every node is considered to be both a descendant and an ancestor
656 of itself, so every reachable node in 'roots' and 'heads' will be
656 of itself, so every reachable node in 'roots' and 'heads' will be
657 included in 'nodes'.
657 included in 'nodes'.
658
658
659 'outroots' is the list of reachable nodes in 'roots', i.e., the
659 'outroots' is the list of reachable nodes in 'roots', i.e., the
660 subset of 'roots' that is returned in 'nodes'. Likewise,
660 subset of 'roots' that is returned in 'nodes'. Likewise,
661 'outheads' is the subset of 'heads' that is also in 'nodes'.
661 'outheads' is the subset of 'heads' that is also in 'nodes'.
662
662
663 'roots' and 'heads' are both lists of node IDs. If 'roots' is
663 'roots' and 'heads' are both lists of node IDs. If 'roots' is
664 unspecified, uses nullid as the only root. If 'heads' is
664 unspecified, uses nullid as the only root. If 'heads' is
665 unspecified, uses list of all of the revlog's heads."""
665 unspecified, uses list of all of the revlog's heads."""
666 nonodes = ([], [], [])
666 nonodes = ([], [], [])
667 if roots is not None:
667 if roots is not None:
668 roots = list(roots)
668 roots = list(roots)
669 if not roots:
669 if not roots:
670 return nonodes
670 return nonodes
671 lowestrev = min([self.rev(n) for n in roots])
671 lowestrev = min([self.rev(n) for n in roots])
672 else:
672 else:
673 roots = [nullid] # Everybody's a descendent of nullid
673 roots = [nullid] # Everybody's a descendent of nullid
674 lowestrev = nullrev
674 lowestrev = nullrev
675 if (lowestrev == nullrev) and (heads is None):
675 if (lowestrev == nullrev) and (heads is None):
676 # We want _all_ the nodes!
676 # We want _all_ the nodes!
677 return ([self.node(r) for r in self], [nullid], list(self.heads()))
677 return ([self.node(r) for r in self], [nullid], list(self.heads()))
678 if heads is None:
678 if heads is None:
679 # All nodes are ancestors, so the latest ancestor is the last
679 # All nodes are ancestors, so the latest ancestor is the last
680 # node.
680 # node.
681 highestrev = len(self) - 1
681 highestrev = len(self) - 1
682 # Set ancestors to None to signal that every node is an ancestor.
682 # Set ancestors to None to signal that every node is an ancestor.
683 ancestors = None
683 ancestors = None
684 # Set heads to an empty dictionary for later discovery of heads
684 # Set heads to an empty dictionary for later discovery of heads
685 heads = {}
685 heads = {}
686 else:
686 else:
687 heads = list(heads)
687 heads = list(heads)
688 if not heads:
688 if not heads:
689 return nonodes
689 return nonodes
690 ancestors = set()
690 ancestors = set()
691 # Turn heads into a dictionary so we can remove 'fake' heads.
691 # Turn heads into a dictionary so we can remove 'fake' heads.
692 # Also, later we will be using it to filter out the heads we can't
692 # Also, later we will be using it to filter out the heads we can't
693 # find from roots.
693 # find from roots.
694 heads = dict.fromkeys(heads, 0)
694 heads = dict.fromkeys(heads, 0)
695 # Start at the top and keep marking parents until we're done.
695 # Start at the top and keep marking parents until we're done.
696 nodestotag = set(heads)
696 nodestotag = set(heads)
697 # Remember where the top was so we can use it as a limit later.
697 # Remember where the top was so we can use it as a limit later.
698 highestrev = max([self.rev(n) for n in nodestotag])
698 highestrev = max([self.rev(n) for n in nodestotag])
699 while nodestotag:
699 while nodestotag:
700 # grab a node to tag
700 # grab a node to tag
701 n = nodestotag.pop()
701 n = nodestotag.pop()
702 # Never tag nullid
702 # Never tag nullid
703 if n == nullid:
703 if n == nullid:
704 continue
704 continue
705 # A node's revision number represents its place in a
705 # A node's revision number represents its place in a
706 # topologically sorted list of nodes.
706 # topologically sorted list of nodes.
707 r = self.rev(n)
707 r = self.rev(n)
708 if r >= lowestrev:
708 if r >= lowestrev:
709 if n not in ancestors:
709 if n not in ancestors:
710 # If we are possibly a descendent of one of the roots
710 # If we are possibly a descendent of one of the roots
711 # and we haven't already been marked as an ancestor
711 # and we haven't already been marked as an ancestor
712 ancestors.add(n) # Mark as ancestor
712 ancestors.add(n) # Mark as ancestor
713 # Add non-nullid parents to list of nodes to tag.
713 # Add non-nullid parents to list of nodes to tag.
714 nodestotag.update([p for p in self.parents(n) if
714 nodestotag.update([p for p in self.parents(n) if
715 p != nullid])
715 p != nullid])
716 elif n in heads: # We've seen it before, is it a fake head?
716 elif n in heads: # We've seen it before, is it a fake head?
717 # So it is, real heads should not be the ancestors of
717 # So it is, real heads should not be the ancestors of
718 # any other heads.
718 # any other heads.
719 heads.pop(n)
719 heads.pop(n)
720 if not ancestors:
720 if not ancestors:
721 return nonodes
721 return nonodes
722 # Now that we have our set of ancestors, we want to remove any
722 # Now that we have our set of ancestors, we want to remove any
723 # roots that are not ancestors.
723 # roots that are not ancestors.
724
724
725 # If one of the roots was nullid, everything is included anyway.
725 # If one of the roots was nullid, everything is included anyway.
726 if lowestrev > nullrev:
726 if lowestrev > nullrev:
727 # But, since we weren't, let's recompute the lowest rev to not
727 # But, since we weren't, let's recompute the lowest rev to not
728 # include roots that aren't ancestors.
728 # include roots that aren't ancestors.
729
729
730 # Filter out roots that aren't ancestors of heads
730 # Filter out roots that aren't ancestors of heads
731 roots = [n for n in roots if n in ancestors]
731 roots = [n for n in roots if n in ancestors]
732 # Recompute the lowest revision
732 # Recompute the lowest revision
733 if roots:
733 if roots:
734 lowestrev = min([self.rev(n) for n in roots])
734 lowestrev = min([self.rev(n) for n in roots])
735 else:
735 else:
736 # No more roots? Return empty list
736 # No more roots? Return empty list
737 return nonodes
737 return nonodes
738 else:
738 else:
739 # We are descending from nullid, and don't need to care about
739 # We are descending from nullid, and don't need to care about
740 # any other roots.
740 # any other roots.
741 lowestrev = nullrev
741 lowestrev = nullrev
742 roots = [nullid]
742 roots = [nullid]
743 # Transform our roots list into a set.
743 # Transform our roots list into a set.
744 descendents = set(roots)
744 descendents = set(roots)
745 # Also, keep the original roots so we can filter out roots that aren't
745 # Also, keep the original roots so we can filter out roots that aren't
746 # 'real' roots (i.e. are descended from other roots).
746 # 'real' roots (i.e. are descended from other roots).
747 roots = descendents.copy()
747 roots = descendents.copy()
748 # Our topologically sorted list of output nodes.
748 # Our topologically sorted list of output nodes.
749 orderedout = []
749 orderedout = []
750 # Don't start at nullid since we don't want nullid in our output list,
750 # Don't start at nullid since we don't want nullid in our output list,
751 # and if nullid shows up in descedents, empty parents will look like
751 # and if nullid shows up in descedents, empty parents will look like
752 # they're descendents.
752 # they're descendents.
753 for r in xrange(max(lowestrev, 0), highestrev + 1):
753 for r in xrange(max(lowestrev, 0), highestrev + 1):
754 n = self.node(r)
754 n = self.node(r)
755 isdescendent = False
755 isdescendent = False
756 if lowestrev == nullrev: # Everybody is a descendent of nullid
756 if lowestrev == nullrev: # Everybody is a descendent of nullid
757 isdescendent = True
757 isdescendent = True
758 elif n in descendents:
758 elif n in descendents:
759 # n is already a descendent
759 # n is already a descendent
760 isdescendent = True
760 isdescendent = True
761 # This check only needs to be done here because all the roots
761 # This check only needs to be done here because all the roots
762 # will start being marked is descendents before the loop.
762 # will start being marked is descendents before the loop.
763 if n in roots:
763 if n in roots:
764 # If n was a root, check if it's a 'real' root.
764 # If n was a root, check if it's a 'real' root.
765 p = tuple(self.parents(n))
765 p = tuple(self.parents(n))
766 # If any of its parents are descendents, it's not a root.
766 # If any of its parents are descendents, it's not a root.
767 if (p[0] in descendents) or (p[1] in descendents):
767 if (p[0] in descendents) or (p[1] in descendents):
768 roots.remove(n)
768 roots.remove(n)
769 else:
769 else:
770 p = tuple(self.parents(n))
770 p = tuple(self.parents(n))
771 # A node is a descendent if either of its parents are
771 # A node is a descendent if either of its parents are
772 # descendents. (We seeded the dependents list with the roots
772 # descendents. (We seeded the dependents list with the roots
773 # up there, remember?)
773 # up there, remember?)
774 if (p[0] in descendents) or (p[1] in descendents):
774 if (p[0] in descendents) or (p[1] in descendents):
775 descendents.add(n)
775 descendents.add(n)
776 isdescendent = True
776 isdescendent = True
777 if isdescendent and ((ancestors is None) or (n in ancestors)):
777 if isdescendent and ((ancestors is None) or (n in ancestors)):
778 # Only include nodes that are both descendents and ancestors.
778 # Only include nodes that are both descendents and ancestors.
779 orderedout.append(n)
779 orderedout.append(n)
780 if (ancestors is not None) and (n in heads):
780 if (ancestors is not None) and (n in heads):
781 # We're trying to figure out which heads are reachable
781 # We're trying to figure out which heads are reachable
782 # from roots.
782 # from roots.
783 # Mark this head as having been reached
783 # Mark this head as having been reached
784 heads[n] = 1
784 heads[n] = 1
785 elif ancestors is None:
785 elif ancestors is None:
786 # Otherwise, we're trying to discover the heads.
786 # Otherwise, we're trying to discover the heads.
787 # Assume this is a head because if it isn't, the next step
787 # Assume this is a head because if it isn't, the next step
788 # will eventually remove it.
788 # will eventually remove it.
789 heads[n] = 1
789 heads[n] = 1
790 # But, obviously its parents aren't.
790 # But, obviously its parents aren't.
791 for p in self.parents(n):
791 for p in self.parents(n):
792 heads.pop(p, None)
792 heads.pop(p, None)
793 heads = [n for n in heads.iterkeys() if heads[n] != 0]
793 heads = [n for n in heads.iterkeys() if heads[n] != 0]
794 roots = list(roots)
794 roots = list(roots)
795 assert orderedout
795 assert orderedout
796 assert roots
796 assert roots
797 assert heads
797 assert heads
798 return (orderedout, roots, heads)
798 return (orderedout, roots, heads)
799
799
800 def heads(self, start=None, stop=None):
800 def heads(self, start=None, stop=None):
801 """return the list of all nodes that have no children
801 """return the list of all nodes that have no children
802
802
803 if start is specified, only heads that are descendants of
803 if start is specified, only heads that are descendants of
804 start will be returned
804 start will be returned
805 if stop is specified, it will consider all the revs from stop
805 if stop is specified, it will consider all the revs from stop
806 as if they had no children
806 as if they had no children
807 """
807 """
808 if start is None and stop is None:
808 if start is None and stop is None:
809 count = len(self)
809 count = len(self)
810 if not count:
810 if not count:
811 return [nullid]
811 return [nullid]
812 ishead = [1] * (count + 1)
812 ishead = [1] * (count + 1)
813 index = self.index
813 index = self.index
814 for r in xrange(count):
814 for r in xrange(count):
815 e = index[r]
815 e = index[r]
816 ishead[e[5]] = ishead[e[6]] = 0
816 ishead[e[5]] = ishead[e[6]] = 0
817 return [self.node(r) for r in xrange(count) if ishead[r]]
817 return [self.node(r) for r in xrange(count) if ishead[r]]
818
818
819 if start is None:
819 if start is None:
820 start = nullid
820 start = nullid
821 if stop is None:
821 if stop is None:
822 stop = []
822 stop = []
823 stoprevs = set([self.rev(n) for n in stop])
823 stoprevs = set([self.rev(n) for n in stop])
824 startrev = self.rev(start)
824 startrev = self.rev(start)
825 reachable = set((startrev,))
825 reachable = set((startrev,))
826 heads = set((startrev,))
826 heads = set((startrev,))
827
827
828 parentrevs = self.parentrevs
828 parentrevs = self.parentrevs
829 for r in xrange(startrev + 1, len(self)):
829 for r in xrange(startrev + 1, len(self)):
830 for p in parentrevs(r):
830 for p in parentrevs(r):
831 if p in reachable:
831 if p in reachable:
832 if r not in stoprevs:
832 if r not in stoprevs:
833 reachable.add(r)
833 reachable.add(r)
834 heads.add(r)
834 heads.add(r)
835 if p in heads and p not in stoprevs:
835 if p in heads and p not in stoprevs:
836 heads.remove(p)
836 heads.remove(p)
837
837
838 return [self.node(r) for r in heads]
838 return [self.node(r) for r in heads]
839
839
840 def children(self, node):
840 def children(self, node):
841 """find the children of a given node"""
841 """find the children of a given node"""
842 c = []
842 c = []
843 p = self.rev(node)
843 p = self.rev(node)
844 for r in range(p + 1, len(self)):
844 for r in range(p + 1, len(self)):
845 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
845 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
846 if prevs:
846 if prevs:
847 for pr in prevs:
847 for pr in prevs:
848 if pr == p:
848 if pr == p:
849 c.append(self.node(r))
849 c.append(self.node(r))
850 elif p == nullrev:
850 elif p == nullrev:
851 c.append(self.node(r))
851 c.append(self.node(r))
852 return c
852 return c
853
853
854 def descendant(self, start, end):
854 def descendant(self, start, end):
855 for i in self.descendants(start):
855 for i in self.descendants(start):
856 if i == end:
856 if i == end:
857 return True
857 return True
858 elif i > end:
858 elif i > end:
859 break
859 break
860 return False
860 return False
861
861
862 def ancestor(self, a, b):
862 def ancestor(self, a, b):
863 """calculate the least common ancestor of nodes a and b"""
863 """calculate the least common ancestor of nodes a and b"""
864
864
865 # fast path, check if it is a descendant
865 # fast path, check if it is a descendant
866 a, b = self.rev(a), self.rev(b)
866 a, b = self.rev(a), self.rev(b)
867 start, end = sorted((a, b))
867 start, end = sorted((a, b))
868 if self.descendant(start, end):
868 if self.descendant(start, end):
869 return self.node(start)
869 return self.node(start)
870
870
871 def parents(rev):
871 def parents(rev):
872 return [p for p in self.parentrevs(rev) if p != nullrev]
872 return [p for p in self.parentrevs(rev) if p != nullrev]
873
873
874 c = ancestor.ancestor(a, b, parents)
874 c = ancestor.ancestor(a, b, parents)
875 if c is None:
875 if c is None:
876 return nullid
876 return nullid
877
877
878 return self.node(c)
878 return self.node(c)
879
879
880 def _match(self, id):
880 def _match(self, id):
881 if isinstance(id, (long, int)):
881 if isinstance(id, (long, int)):
882 # rev
882 # rev
883 return self.node(id)
883 return self.node(id)
884 if len(id) == 20:
884 if len(id) == 20:
885 # possibly a binary node
885 # possibly a binary node
886 # odds of a binary node being all hex in ASCII are 1 in 10**25
886 # odds of a binary node being all hex in ASCII are 1 in 10**25
887 try:
887 try:
888 node = id
888 node = id
889 self.rev(node) # quick search the index
889 self.rev(node) # quick search the index
890 return node
890 return node
891 except LookupError:
891 except LookupError:
892 pass # may be partial hex id
892 pass # may be partial hex id
893 try:
893 try:
894 # str(rev)
894 # str(rev)
895 rev = int(id)
895 rev = int(id)
896 if str(rev) != id:
896 if str(rev) != id:
897 raise ValueError
897 raise ValueError
898 if rev < 0:
898 if rev < 0:
899 rev = len(self) + rev
899 rev = len(self) + rev
900 if rev < 0 or rev >= len(self):
900 if rev < 0 or rev >= len(self):
901 raise ValueError
901 raise ValueError
902 return self.node(rev)
902 return self.node(rev)
903 except (ValueError, OverflowError):
903 except (ValueError, OverflowError):
904 pass
904 pass
905 if len(id) == 40:
905 if len(id) == 40:
906 try:
906 try:
907 # a full hex nodeid?
907 # a full hex nodeid?
908 node = bin(id)
908 node = bin(id)
909 self.rev(node)
909 self.rev(node)
910 return node
910 return node
911 except (TypeError, LookupError):
911 except (TypeError, LookupError):
912 pass
912 pass
913
913
914 def _partialmatch(self, id):
914 def _partialmatch(self, id):
915 if len(id) < 40:
915 if len(id) < 40:
916 try:
916 try:
917 # hex(node)[:...]
917 # hex(node)[:...]
918 l = len(id) // 2 # grab an even number of digits
918 l = len(id) // 2 # grab an even number of digits
919 bin_id = bin(id[:l * 2])
919 bin_id = bin(id[:l * 2])
920 nl = [n for n in self.nodemap if n[:l] == bin_id]
920 nl = [n for n in self.nodemap if n[:l] == bin_id]
921 nl = [n for n in nl if hex(n).startswith(id)]
921 nl = [n for n in nl if hex(n).startswith(id)]
922 if len(nl) > 0:
922 if len(nl) > 0:
923 if len(nl) == 1:
923 if len(nl) == 1:
924 return nl[0]
924 return nl[0]
925 raise LookupError(id, self.indexfile,
925 raise LookupError(id, self.indexfile,
926 _('ambiguous identifier'))
926 _('ambiguous identifier'))
927 return None
927 return None
928 except TypeError:
928 except TypeError:
929 pass
929 pass
930
930
931 def lookup(self, id):
931 def lookup(self, id):
932 """locate a node based on:
932 """locate a node based on:
933 - revision number or str(revision number)
933 - revision number or str(revision number)
934 - nodeid or subset of hex nodeid
934 - nodeid or subset of hex nodeid
935 """
935 """
936 n = self._match(id)
936 n = self._match(id)
937 if n is not None:
937 if n is not None:
938 return n
938 return n
939 n = self._partialmatch(id)
939 n = self._partialmatch(id)
940 if n:
940 if n:
941 return n
941 return n
942
942
943 raise LookupError(id, self.indexfile, _('no match found'))
943 raise LookupError(id, self.indexfile, _('no match found'))
944
944
945 def cmp(self, node, text):
945 def cmp(self, node, text):
946 """compare text with a given file revision
946 """compare text with a given file revision
947
947
948 returns True if text is different than what is stored.
948 returns True if text is different than what is stored.
949 """
949 """
950 p1, p2 = self.parents(node)
950 p1, p2 = self.parents(node)
951 return hash(text, p1, p2) != node
951 return hash(text, p1, p2) != node
952
952
953 def _addchunk(self, offset, data):
953 def _addchunk(self, offset, data):
954 o, d = self._chunkcache
954 o, d = self._chunkcache
955 # try to add to existing cache
955 # try to add to existing cache
956 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
956 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
957 self._chunkcache = o, d + data
957 self._chunkcache = o, d + data
958 else:
958 else:
959 self._chunkcache = offset, data
959 self._chunkcache = offset, data
960
960
961 def _loadchunk(self, offset, length):
961 def _loadchunk(self, offset, length):
962 if self._inline:
962 if self._inline:
963 df = self.opener(self.indexfile)
963 df = self.opener(self.indexfile)
964 else:
964 else:
965 df = self.opener(self.datafile)
965 df = self.opener(self.datafile)
966
966
967 readahead = max(65536, length)
967 readahead = max(65536, length)
968 df.seek(offset)
968 df.seek(offset)
969 d = df.read(readahead)
969 d = df.read(readahead)
970 self._addchunk(offset, d)
970 self._addchunk(offset, d)
971 if readahead > length:
971 if readahead > length:
972 return d[:length]
972 return d[:length]
973 return d
973 return d
974
974
975 def _getchunk(self, offset, length):
975 def _getchunk(self, offset, length):
976 o, d = self._chunkcache
976 o, d = self._chunkcache
977 l = len(d)
977 l = len(d)
978
978
979 # is it in the cache?
979 # is it in the cache?
980 cachestart = offset - o
980 cachestart = offset - o
981 cacheend = cachestart + length
981 cacheend = cachestart + length
982 if cachestart >= 0 and cacheend <= l:
982 if cachestart >= 0 and cacheend <= l:
983 if cachestart == 0 and cacheend == l:
983 if cachestart == 0 and cacheend == l:
984 return d # avoid a copy
984 return d # avoid a copy
985 return d[cachestart:cacheend]
985 return d[cachestart:cacheend]
986
986
987 return self._loadchunk(offset, length)
987 return self._loadchunk(offset, length)
988
988
989 def _chunkraw(self, startrev, endrev):
989 def _chunkraw(self, startrev, endrev):
990 start = self.start(startrev)
990 start = self.start(startrev)
991 length = self.end(endrev) - start
991 length = self.end(endrev) - start
992 if self._inline:
992 if self._inline:
993 start += (startrev + 1) * self._io.size
993 start += (startrev + 1) * self._io.size
994 return self._getchunk(start, length)
994 return self._getchunk(start, length)
995
995
996 def _chunk(self, rev):
996 def _chunk(self, rev):
997 return decompress(self._chunkraw(rev, rev))
997 return decompress(self._chunkraw(rev, rev))
998
998
999 def _chunkclear(self):
999 def _chunkclear(self):
1000 self._chunkcache = (0, '')
1000 self._chunkcache = (0, '')
1001
1001
1002 def revdiff(self, rev1, rev2):
1002 def revdiff(self, rev1, rev2):
1003 """return or calculate a delta between two revisions"""
1003 """return or calculate a delta between two revisions"""
1004 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1004 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
1005 return self._chunk(rev2)
1005 return self._chunk(rev2)
1006
1006
1007 return mdiff.textdiff(self.revision(self.node(rev1)),
1007 return mdiff.textdiff(self.revision(self.node(rev1)),
1008 self.revision(self.node(rev2)))
1008 self.revision(self.node(rev2)))
1009
1009
1010 def revision(self, node):
1010 def revision(self, node):
1011 """return an uncompressed revision of a given node"""
1011 """return an uncompressed revision of a given node"""
1012 if node == nullid:
1012 if node == nullid:
1013 return ""
1013 return ""
1014 if self._cache and self._cache[0] == node:
1014 if self._cache and self._cache[0] == node:
1015 return self._cache[2]
1015 return self._cache[2]
1016
1016
1017 # look up what we need to read
1017 # look up what we need to read
1018 text = None
1018 text = None
1019 rev = self.rev(node)
1019 rev = self.rev(node)
1020 base = self.base(rev)
1020 base = self.base(rev)
1021
1021
1022 # check rev flags
1022 # check rev flags
1023 if self.index[rev][0] & 0xFFFF:
1023 if self.index[rev][0] & 0xFFFF:
1024 raise RevlogError(_('incompatible revision flag %x') %
1024 raise RevlogError(_('incompatible revision flag %x') %
1025 (self.index[rev][0] & 0xFFFF))
1025 (self.index[rev][0] & 0xFFFF))
1026
1026
1027 # do we have useful data cached?
1027 # do we have useful data cached?
1028 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1028 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1029 base = self._cache[1]
1029 base = self._cache[1]
1030 text = self._cache[2]
1030 text = self._cache[2]
1031
1031
1032 # drop cache to save memory
1033 self._cache = None
1034
1032 self._loadindex(base, rev + 1)
1035 self._loadindex(base, rev + 1)
1033 self._chunkraw(base, rev)
1036 self._chunkraw(base, rev)
1034 if text is None:
1037 if text is None:
1035 text = self._chunk(base)
1038 text = self._chunk(base)
1036
1039
1037 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1040 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1038 text = mdiff.patches(text, bins)
1041 text = mdiff.patches(text, bins)
1039 p1, p2 = self.parents(node)
1042 p1, p2 = self.parents(node)
1040 if node != hash(text, p1, p2):
1043 if node != hash(text, p1, p2):
1041 raise RevlogError(_("integrity check failed on %s:%d")
1044 raise RevlogError(_("integrity check failed on %s:%d")
1042 % (self.indexfile, rev))
1045 % (self.indexfile, rev))
1043
1046
1044 self._cache = (node, rev, text)
1047 self._cache = (node, rev, text)
1045 return text
1048 return text
1046
1049
1047 def checkinlinesize(self, tr, fp=None):
1050 def checkinlinesize(self, tr, fp=None):
1048 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1051 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1049 return
1052 return
1050
1053
1051 trinfo = tr.find(self.indexfile)
1054 trinfo = tr.find(self.indexfile)
1052 if trinfo is None:
1055 if trinfo is None:
1053 raise RevlogError(_("%s not found in the transaction")
1056 raise RevlogError(_("%s not found in the transaction")
1054 % self.indexfile)
1057 % self.indexfile)
1055
1058
1056 trindex = trinfo[2]
1059 trindex = trinfo[2]
1057 dataoff = self.start(trindex)
1060 dataoff = self.start(trindex)
1058
1061
1059 tr.add(self.datafile, dataoff)
1062 tr.add(self.datafile, dataoff)
1060
1063
1061 if fp:
1064 if fp:
1062 fp.flush()
1065 fp.flush()
1063 fp.close()
1066 fp.close()
1064
1067
1065 df = self.opener(self.datafile, 'w')
1068 df = self.opener(self.datafile, 'w')
1066 try:
1069 try:
1067 for r in self:
1070 for r in self:
1068 df.write(self._chunkraw(r, r))
1071 df.write(self._chunkraw(r, r))
1069 finally:
1072 finally:
1070 df.close()
1073 df.close()
1071
1074
1072 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1075 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1073 self.version &= ~(REVLOGNGINLINEDATA)
1076 self.version &= ~(REVLOGNGINLINEDATA)
1074 self._inline = False
1077 self._inline = False
1075 for i in self:
1078 for i in self:
1076 e = self._io.packentry(self.index[i], self.node, self.version, i)
1079 e = self._io.packentry(self.index[i], self.node, self.version, i)
1077 fp.write(e)
1080 fp.write(e)
1078
1081
1079 # if we don't call rename, the temp file will never replace the
1082 # if we don't call rename, the temp file will never replace the
1080 # real index
1083 # real index
1081 fp.rename()
1084 fp.rename()
1082
1085
1083 tr.replace(self.indexfile, trindex * self._io.size)
1086 tr.replace(self.indexfile, trindex * self._io.size)
1084 self._chunkclear()
1087 self._chunkclear()
1085
1088
1086 def addrevision(self, text, transaction, link, p1, p2, d=None):
1089 def addrevision(self, text, transaction, link, p1, p2, d=None):
1087 """add a revision to the log
1090 """add a revision to the log
1088
1091
1089 text - the revision data to add
1092 text - the revision data to add
1090 transaction - the transaction object used for rollback
1093 transaction - the transaction object used for rollback
1091 link - the linkrev data to add
1094 link - the linkrev data to add
1092 p1, p2 - the parent nodeids of the revision
1095 p1, p2 - the parent nodeids of the revision
1093 d - an optional precomputed delta
1096 d - an optional precomputed delta
1094 """
1097 """
1095 dfh = None
1098 dfh = None
1096 if not self._inline:
1099 if not self._inline:
1097 dfh = self.opener(self.datafile, "a")
1100 dfh = self.opener(self.datafile, "a")
1098 ifh = self.opener(self.indexfile, "a+")
1101 ifh = self.opener(self.indexfile, "a+")
1099 try:
1102 try:
1100 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1103 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1101 finally:
1104 finally:
1102 if dfh:
1105 if dfh:
1103 dfh.close()
1106 dfh.close()
1104 ifh.close()
1107 ifh.close()
1105
1108
1106 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1109 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1107 node = hash(text, p1, p2)
1110 node = hash(text, p1, p2)
1108 if node in self.nodemap:
1111 if node in self.nodemap:
1109 return node
1112 return node
1110
1113
1111 curr = len(self)
1114 curr = len(self)
1112 prev = curr - 1
1115 prev = curr - 1
1113 base = self.base(prev)
1116 base = self.base(prev)
1114 offset = self.end(prev)
1117 offset = self.end(prev)
1115
1118
1116 if curr:
1119 if curr:
1117 if not d:
1120 if not d:
1118 ptext = self.revision(self.node(prev))
1121 ptext = self.revision(self.node(prev))
1119 d = mdiff.textdiff(ptext, text)
1122 d = mdiff.textdiff(ptext, text)
1120 data = compress(d)
1123 data = compress(d)
1121 l = len(data[1]) + len(data[0])
1124 l = len(data[1]) + len(data[0])
1122 dist = l + offset - self.start(base)
1125 dist = l + offset - self.start(base)
1123
1126
1124 # full versions are inserted when the needed deltas
1127 # full versions are inserted when the needed deltas
1125 # become comparable to the uncompressed text
1128 # become comparable to the uncompressed text
1126 if not curr or dist > len(text) * 2:
1129 if not curr or dist > len(text) * 2:
1127 data = compress(text)
1130 data = compress(text)
1128 l = len(data[1]) + len(data[0])
1131 l = len(data[1]) + len(data[0])
1129 base = curr
1132 base = curr
1130
1133
1131 e = (offset_type(offset, 0), l, len(text),
1134 e = (offset_type(offset, 0), l, len(text),
1132 base, link, self.rev(p1), self.rev(p2), node)
1135 base, link, self.rev(p1), self.rev(p2), node)
1133 self.index.insert(-1, e)
1136 self.index.insert(-1, e)
1134 self.nodemap[node] = curr
1137 self.nodemap[node] = curr
1135
1138
1136 entry = self._io.packentry(e, self.node, self.version, curr)
1139 entry = self._io.packentry(e, self.node, self.version, curr)
1137 if not self._inline:
1140 if not self._inline:
1138 transaction.add(self.datafile, offset)
1141 transaction.add(self.datafile, offset)
1139 transaction.add(self.indexfile, curr * len(entry))
1142 transaction.add(self.indexfile, curr * len(entry))
1140 if data[0]:
1143 if data[0]:
1141 dfh.write(data[0])
1144 dfh.write(data[0])
1142 dfh.write(data[1])
1145 dfh.write(data[1])
1143 dfh.flush()
1146 dfh.flush()
1144 ifh.write(entry)
1147 ifh.write(entry)
1145 else:
1148 else:
1146 offset += curr * self._io.size
1149 offset += curr * self._io.size
1147 transaction.add(self.indexfile, offset, curr)
1150 transaction.add(self.indexfile, offset, curr)
1148 ifh.write(entry)
1151 ifh.write(entry)
1149 ifh.write(data[0])
1152 ifh.write(data[0])
1150 ifh.write(data[1])
1153 ifh.write(data[1])
1151 self.checkinlinesize(transaction, ifh)
1154 self.checkinlinesize(transaction, ifh)
1152
1155
1153 if type(text) == str: # only accept immutable objects
1156 if type(text) == str: # only accept immutable objects
1154 self._cache = (node, curr, text)
1157 self._cache = (node, curr, text)
1155 return node
1158 return node
1156
1159
1157 def group(self, nodelist, lookup, infocollect=None):
1160 def group(self, nodelist, lookup, infocollect=None):
1158 """Calculate a delta group, yielding a sequence of changegroup chunks
1161 """Calculate a delta group, yielding a sequence of changegroup chunks
1159 (strings).
1162 (strings).
1160
1163
1161 Given a list of changeset revs, return a set of deltas and
1164 Given a list of changeset revs, return a set of deltas and
1162 metadata corresponding to nodes. the first delta is
1165 metadata corresponding to nodes. the first delta is
1163 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1166 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1164 have this parent as it has all history before these
1167 have this parent as it has all history before these
1165 changesets. parent is parent[0]
1168 changesets. parent is parent[0]
1166 """
1169 """
1167
1170
1168 revs = [self.rev(n) for n in nodelist]
1171 revs = [self.rev(n) for n in nodelist]
1169
1172
1170 # if we don't have any revisions touched by these changesets, bail
1173 # if we don't have any revisions touched by these changesets, bail
1171 if not revs:
1174 if not revs:
1172 yield changegroup.closechunk()
1175 yield changegroup.closechunk()
1173 return
1176 return
1174
1177
1175 # add the parent of the first rev
1178 # add the parent of the first rev
1176 p = self.parentrevs(revs[0])[0]
1179 p = self.parentrevs(revs[0])[0]
1177 revs.insert(0, p)
1180 revs.insert(0, p)
1178
1181
1179 # build deltas
1182 # build deltas
1180 for d in xrange(len(revs) - 1):
1183 for d in xrange(len(revs) - 1):
1181 a, b = revs[d], revs[d + 1]
1184 a, b = revs[d], revs[d + 1]
1182 nb = self.node(b)
1185 nb = self.node(b)
1183
1186
1184 if infocollect is not None:
1187 if infocollect is not None:
1185 infocollect(nb)
1188 infocollect(nb)
1186
1189
1187 p = self.parents(nb)
1190 p = self.parents(nb)
1188 meta = nb + p[0] + p[1] + lookup(nb)
1191 meta = nb + p[0] + p[1] + lookup(nb)
1189 if a == -1:
1192 if a == -1:
1190 d = self.revision(nb)
1193 d = self.revision(nb)
1191 meta += mdiff.trivialdiffheader(len(d))
1194 meta += mdiff.trivialdiffheader(len(d))
1192 else:
1195 else:
1193 d = self.revdiff(a, b)
1196 d = self.revdiff(a, b)
1194 yield changegroup.chunkheader(len(meta) + len(d))
1197 yield changegroup.chunkheader(len(meta) + len(d))
1195 yield meta
1198 yield meta
1196 if len(d) > 2**20:
1199 if len(d) > 2**20:
1197 pos = 0
1200 pos = 0
1198 while pos < len(d):
1201 while pos < len(d):
1199 pos2 = pos + 2 ** 18
1202 pos2 = pos + 2 ** 18
1200 yield d[pos:pos2]
1203 yield d[pos:pos2]
1201 pos = pos2
1204 pos = pos2
1202 else:
1205 else:
1203 yield d
1206 yield d
1204
1207
1205 yield changegroup.closechunk()
1208 yield changegroup.closechunk()
1206
1209
1207 def addgroup(self, revs, linkmapper, transaction):
1210 def addgroup(self, revs, linkmapper, transaction):
1208 """
1211 """
1209 add a delta group
1212 add a delta group
1210
1213
1211 given a set of deltas, add them to the revision log. the
1214 given a set of deltas, add them to the revision log. the
1212 first delta is against its parent, which should be in our
1215 first delta is against its parent, which should be in our
1213 log, the rest are against the previous delta.
1216 log, the rest are against the previous delta.
1214 """
1217 """
1215
1218
1216 #track the base of the current delta log
1219 #track the base of the current delta log
1217 r = len(self)
1220 r = len(self)
1218 t = r - 1
1221 t = r - 1
1219 node = None
1222 node = None
1220
1223
1221 base = prev = nullrev
1224 base = prev = nullrev
1222 start = end = textlen = 0
1225 start = end = textlen = 0
1223 if r:
1226 if r:
1224 end = self.end(t)
1227 end = self.end(t)
1225
1228
1226 ifh = self.opener(self.indexfile, "a+")
1229 ifh = self.opener(self.indexfile, "a+")
1227 isize = r * self._io.size
1230 isize = r * self._io.size
1228 if self._inline:
1231 if self._inline:
1229 transaction.add(self.indexfile, end + isize, r)
1232 transaction.add(self.indexfile, end + isize, r)
1230 dfh = None
1233 dfh = None
1231 else:
1234 else:
1232 transaction.add(self.indexfile, isize, r)
1235 transaction.add(self.indexfile, isize, r)
1233 transaction.add(self.datafile, end)
1236 transaction.add(self.datafile, end)
1234 dfh = self.opener(self.datafile, "a")
1237 dfh = self.opener(self.datafile, "a")
1235
1238
1236 try:
1239 try:
1237 # loop through our set of deltas
1240 # loop through our set of deltas
1238 chain = None
1241 chain = None
1239 for chunk in revs:
1242 for chunk in revs:
1240 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1243 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1241 link = linkmapper(cs)
1244 link = linkmapper(cs)
1242 if node in self.nodemap:
1245 if node in self.nodemap:
1243 # this can happen if two branches make the same change
1246 # this can happen if two branches make the same change
1244 chain = node
1247 chain = node
1245 continue
1248 continue
1246 delta = buffer(chunk, 80)
1249 delta = buffer(chunk, 80)
1247 del chunk
1250 del chunk
1248
1251
1249 for p in (p1, p2):
1252 for p in (p1, p2):
1250 if not p in self.nodemap:
1253 if not p in self.nodemap:
1251 raise LookupError(p, self.indexfile, _('unknown parent'))
1254 raise LookupError(p, self.indexfile, _('unknown parent'))
1252
1255
1253 if not chain:
1256 if not chain:
1254 # retrieve the parent revision of the delta chain
1257 # retrieve the parent revision of the delta chain
1255 chain = p1
1258 chain = p1
1256 if not chain in self.nodemap:
1259 if not chain in self.nodemap:
1257 raise LookupError(chain, self.indexfile, _('unknown base'))
1260 raise LookupError(chain, self.indexfile, _('unknown base'))
1258
1261
1259 # full versions are inserted when the needed deltas become
1262 # full versions are inserted when the needed deltas become
1260 # comparable to the uncompressed text or when the previous
1263 # comparable to the uncompressed text or when the previous
1261 # version is not the one we have a delta against. We use
1264 # version is not the one we have a delta against. We use
1262 # the size of the previous full rev as a proxy for the
1265 # the size of the previous full rev as a proxy for the
1263 # current size.
1266 # current size.
1264
1267
1265 if chain == prev:
1268 if chain == prev:
1266 cdelta = compress(delta)
1269 cdelta = compress(delta)
1267 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1270 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1268 textlen = mdiff.patchedsize(textlen, delta)
1271 textlen = mdiff.patchedsize(textlen, delta)
1269
1272
1270 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1273 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1271 # flush our writes here so we can read it in revision
1274 # flush our writes here so we can read it in revision
1272 if dfh:
1275 if dfh:
1273 dfh.flush()
1276 dfh.flush()
1274 ifh.flush()
1277 ifh.flush()
1275 text = self.revision(chain)
1278 text = self.revision(chain)
1276 if len(text) == 0:
1279 if len(text) == 0:
1277 # skip over trivial delta header
1280 # skip over trivial delta header
1278 text = buffer(delta, 12)
1281 text = buffer(delta, 12)
1279 else:
1282 else:
1280 text = mdiff.patches(text, [delta])
1283 text = mdiff.patches(text, [delta])
1281 del delta
1284 del delta
1282 chk = self._addrevision(text, transaction, link, p1, p2, None,
1285 chk = self._addrevision(text, transaction, link, p1, p2, None,
1283 ifh, dfh)
1286 ifh, dfh)
1284 if not dfh and not self._inline:
1287 if not dfh and not self._inline:
1285 # addrevision switched from inline to conventional
1288 # addrevision switched from inline to conventional
1286 # reopen the index
1289 # reopen the index
1287 dfh = self.opener(self.datafile, "a")
1290 dfh = self.opener(self.datafile, "a")
1288 ifh = self.opener(self.indexfile, "a")
1291 ifh = self.opener(self.indexfile, "a")
1289 if chk != node:
1292 if chk != node:
1290 raise RevlogError(_("consistency error adding group"))
1293 raise RevlogError(_("consistency error adding group"))
1291 textlen = len(text)
1294 textlen = len(text)
1292 else:
1295 else:
1293 e = (offset_type(end, 0), cdeltalen, textlen, base,
1296 e = (offset_type(end, 0), cdeltalen, textlen, base,
1294 link, self.rev(p1), self.rev(p2), node)
1297 link, self.rev(p1), self.rev(p2), node)
1295 self.index.insert(-1, e)
1298 self.index.insert(-1, e)
1296 self.nodemap[node] = r
1299 self.nodemap[node] = r
1297 entry = self._io.packentry(e, self.node, self.version, r)
1300 entry = self._io.packentry(e, self.node, self.version, r)
1298 if self._inline:
1301 if self._inline:
1299 ifh.write(entry)
1302 ifh.write(entry)
1300 ifh.write(cdelta[0])
1303 ifh.write(cdelta[0])
1301 ifh.write(cdelta[1])
1304 ifh.write(cdelta[1])
1302 self.checkinlinesize(transaction, ifh)
1305 self.checkinlinesize(transaction, ifh)
1303 if not self._inline:
1306 if not self._inline:
1304 dfh = self.opener(self.datafile, "a")
1307 dfh = self.opener(self.datafile, "a")
1305 ifh = self.opener(self.indexfile, "a")
1308 ifh = self.opener(self.indexfile, "a")
1306 else:
1309 else:
1307 dfh.write(cdelta[0])
1310 dfh.write(cdelta[0])
1308 dfh.write(cdelta[1])
1311 dfh.write(cdelta[1])
1309 ifh.write(entry)
1312 ifh.write(entry)
1310
1313
1311 t, r, chain, prev = r, r + 1, node, node
1314 t, r, chain, prev = r, r + 1, node, node
1312 base = self.base(t)
1315 base = self.base(t)
1313 start = self.start(base)
1316 start = self.start(base)
1314 end = self.end(t)
1317 end = self.end(t)
1315 finally:
1318 finally:
1316 if dfh:
1319 if dfh:
1317 dfh.close()
1320 dfh.close()
1318 ifh.close()
1321 ifh.close()
1319
1322
1320 return node
1323 return node
1321
1324
1322 def strip(self, minlink, transaction):
1325 def strip(self, minlink, transaction):
1323 """truncate the revlog on the first revision with a linkrev >= minlink
1326 """truncate the revlog on the first revision with a linkrev >= minlink
1324
1327
1325 This function is called when we're stripping revision minlink and
1328 This function is called when we're stripping revision minlink and
1326 its descendants from the repository.
1329 its descendants from the repository.
1327
1330
1328 We have to remove all revisions with linkrev >= minlink, because
1331 We have to remove all revisions with linkrev >= minlink, because
1329 the equivalent changelog revisions will be renumbered after the
1332 the equivalent changelog revisions will be renumbered after the
1330 strip.
1333 strip.
1331
1334
1332 So we truncate the revlog on the first of these revisions, and
1335 So we truncate the revlog on the first of these revisions, and
1333 trust that the caller has saved the revisions that shouldn't be
1336 trust that the caller has saved the revisions that shouldn't be
1334 removed and that it'll readd them after this truncation.
1337 removed and that it'll readd them after this truncation.
1335 """
1338 """
1336 if len(self) == 0:
1339 if len(self) == 0:
1337 return
1340 return
1338
1341
1339 if isinstance(self.index, lazyindex):
1342 if isinstance(self.index, lazyindex):
1340 self._loadindexmap()
1343 self._loadindexmap()
1341
1344
1342 for rev in self:
1345 for rev in self:
1343 if self.index[rev][4] >= minlink:
1346 if self.index[rev][4] >= minlink:
1344 break
1347 break
1345 else:
1348 else:
1346 return
1349 return
1347
1350
1348 # first truncate the files on disk
1351 # first truncate the files on disk
1349 end = self.start(rev)
1352 end = self.start(rev)
1350 if not self._inline:
1353 if not self._inline:
1351 transaction.add(self.datafile, end)
1354 transaction.add(self.datafile, end)
1352 end = rev * self._io.size
1355 end = rev * self._io.size
1353 else:
1356 else:
1354 end += rev * self._io.size
1357 end += rev * self._io.size
1355
1358
1356 transaction.add(self.indexfile, end)
1359 transaction.add(self.indexfile, end)
1357
1360
1358 # then reset internal state in memory to forget those revisions
1361 # then reset internal state in memory to forget those revisions
1359 self._cache = None
1362 self._cache = None
1360 self._chunkclear()
1363 self._chunkclear()
1361 for x in xrange(rev, len(self)):
1364 for x in xrange(rev, len(self)):
1362 del self.nodemap[self.node(x)]
1365 del self.nodemap[self.node(x)]
1363
1366
1364 del self.index[rev:-1]
1367 del self.index[rev:-1]
1365
1368
1366 def checksize(self):
1369 def checksize(self):
1367 expected = 0
1370 expected = 0
1368 if len(self):
1371 if len(self):
1369 expected = max(0, self.end(len(self) - 1))
1372 expected = max(0, self.end(len(self) - 1))
1370
1373
1371 try:
1374 try:
1372 f = self.opener(self.datafile)
1375 f = self.opener(self.datafile)
1373 f.seek(0, 2)
1376 f.seek(0, 2)
1374 actual = f.tell()
1377 actual = f.tell()
1375 dd = actual - expected
1378 dd = actual - expected
1376 except IOError, inst:
1379 except IOError, inst:
1377 if inst.errno != errno.ENOENT:
1380 if inst.errno != errno.ENOENT:
1378 raise
1381 raise
1379 dd = 0
1382 dd = 0
1380
1383
1381 try:
1384 try:
1382 f = self.opener(self.indexfile)
1385 f = self.opener(self.indexfile)
1383 f.seek(0, 2)
1386 f.seek(0, 2)
1384 actual = f.tell()
1387 actual = f.tell()
1385 s = self._io.size
1388 s = self._io.size
1386 i = max(0, actual // s)
1389 i = max(0, actual // s)
1387 di = actual - (i * s)
1390 di = actual - (i * s)
1388 if self._inline:
1391 if self._inline:
1389 databytes = 0
1392 databytes = 0
1390 for r in self:
1393 for r in self:
1391 databytes += max(0, self.length(r))
1394 databytes += max(0, self.length(r))
1392 dd = 0
1395 dd = 0
1393 di = actual - len(self) * s - databytes
1396 di = actual - len(self) * s - databytes
1394 except IOError, inst:
1397 except IOError, inst:
1395 if inst.errno != errno.ENOENT:
1398 if inst.errno != errno.ENOENT:
1396 raise
1399 raise
1397 di = 0
1400 di = 0
1398
1401
1399 return (dd, di)
1402 return (dd, di)
1400
1403
1401 def files(self):
1404 def files(self):
1402 res = [self.indexfile]
1405 res = [self.indexfile]
1403 if not self._inline:
1406 if not self._inline:
1404 res.append(self.datafile)
1407 res.append(self.datafile)
1405 return res
1408 return res
General Comments 0
You need to be logged in to leave comments. Login now