##// END OF EJS Templates
revlog: make revlogv0 loading more robust against corruption
Matt Mackall -
r5544:686899a7 default
parent child Browse files
Show More
@@ -1,1305 +1,1305 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = sha.new
22 _sha = sha.new
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34 class LookupError(RevlogError):
34 class LookupError(RevlogError):
35 pass
35 pass
36
36
37 def getoffset(q):
37 def getoffset(q):
38 return int(q >> 16)
38 return int(q >> 16)
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43 def offset_type(offset, type):
43 def offset_type(offset, type):
44 return long(long(offset) << 16 | type)
44 return long(long(offset) << 16 | type)
45
45
46 def hash(text, p1, p2):
46 def hash(text, p1, p2):
47 """generate a hash from the given text and its parent hashes
47 """generate a hash from the given text and its parent hashes
48
48
49 This hash combines both the current file contents and its history
49 This hash combines both the current file contents and its history
50 in a manner that makes it easy to distinguish nodes with the same
50 in a manner that makes it easy to distinguish nodes with the same
51 content in the revision graph.
51 content in the revision graph.
52 """
52 """
53 l = [p1, p2]
53 l = [p1, p2]
54 l.sort()
54 l.sort()
55 s = _sha(l[0])
55 s = _sha(l[0])
56 s.update(l[1])
56 s.update(l[1])
57 s.update(text)
57 s.update(text)
58 return s.digest()
58 return s.digest()
59
59
60 def compress(text):
60 def compress(text):
61 """ generate a possibly-compressed representation of text """
61 """ generate a possibly-compressed representation of text """
62 if not text:
62 if not text:
63 return ("", text)
63 return ("", text)
64 l = len(text)
64 l = len(text)
65 bin = None
65 bin = None
66 if l < 44:
66 if l < 44:
67 pass
67 pass
68 elif l > 1000000:
68 elif l > 1000000:
69 # zlib makes an internal copy, thus doubling memory usage for
69 # zlib makes an internal copy, thus doubling memory usage for
70 # large files, so lets do this in pieces
70 # large files, so lets do this in pieces
71 z = zlib.compressobj()
71 z = zlib.compressobj()
72 p = []
72 p = []
73 pos = 0
73 pos = 0
74 while pos < l:
74 while pos < l:
75 pos2 = pos + 2**20
75 pos2 = pos + 2**20
76 p.append(z.compress(text[pos:pos2]))
76 p.append(z.compress(text[pos:pos2]))
77 pos = pos2
77 pos = pos2
78 p.append(z.flush())
78 p.append(z.flush())
79 if sum(map(len, p)) < l:
79 if sum(map(len, p)) < l:
80 bin = "".join(p)
80 bin = "".join(p)
81 else:
81 else:
82 bin = _compress(text)
82 bin = _compress(text)
83 if bin is None or len(bin) > l:
83 if bin is None or len(bin) > l:
84 if text[0] == '\0':
84 if text[0] == '\0':
85 return ("", text)
85 return ("", text)
86 return ('u', text)
86 return ('u', text)
87 return ("", bin)
87 return ("", bin)
88
88
89 def decompress(bin):
89 def decompress(bin):
90 """ decompress the given input """
90 """ decompress the given input """
91 if not bin:
91 if not bin:
92 return bin
92 return bin
93 t = bin[0]
93 t = bin[0]
94 if t == '\0':
94 if t == '\0':
95 return bin
95 return bin
96 if t == 'x':
96 if t == 'x':
97 return _decompress(bin)
97 return _decompress(bin)
98 if t == 'u':
98 if t == 'u':
99 return bin[1:]
99 return bin[1:]
100 raise RevlogError(_("unknown compression type %r") % t)
100 raise RevlogError(_("unknown compression type %r") % t)
101
101
102 class lazyparser(object):
102 class lazyparser(object):
103 """
103 """
104 this class avoids the need to parse the entirety of large indices
104 this class avoids the need to parse the entirety of large indices
105 """
105 """
106
106
107 # lazyparser is not safe to use on windows if win32 extensions not
107 # lazyparser is not safe to use on windows if win32 extensions not
108 # available. it keeps file handle open, which make it not possible
108 # available. it keeps file handle open, which make it not possible
109 # to break hardlinks on local cloned repos.
109 # to break hardlinks on local cloned repos.
110 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
110 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
111 hasattr(util, 'win32api'))
111 hasattr(util, 'win32api'))
112
112
113 def __init__(self, dataf, size):
113 def __init__(self, dataf, size):
114 self.dataf = dataf
114 self.dataf = dataf
115 self.s = struct.calcsize(indexformatng)
115 self.s = struct.calcsize(indexformatng)
116 self.datasize = size
116 self.datasize = size
117 self.l = size/self.s
117 self.l = size/self.s
118 self.index = [None] * self.l
118 self.index = [None] * self.l
119 self.map = {nullid: nullrev}
119 self.map = {nullid: nullrev}
120 self.allmap = 0
120 self.allmap = 0
121 self.all = 0
121 self.all = 0
122 self.mapfind_count = 0
122 self.mapfind_count = 0
123
123
124 def loadmap(self):
124 def loadmap(self):
125 """
125 """
126 during a commit, we need to make sure the rev being added is
126 during a commit, we need to make sure the rev being added is
127 not a duplicate. This requires loading the entire index,
127 not a duplicate. This requires loading the entire index,
128 which is fairly slow. loadmap can load up just the node map,
128 which is fairly slow. loadmap can load up just the node map,
129 which takes much less time.
129 which takes much less time.
130 """
130 """
131 if self.allmap:
131 if self.allmap:
132 return
132 return
133 end = self.datasize
133 end = self.datasize
134 self.allmap = 1
134 self.allmap = 1
135 cur = 0
135 cur = 0
136 count = 0
136 count = 0
137 blocksize = self.s * 256
137 blocksize = self.s * 256
138 self.dataf.seek(0)
138 self.dataf.seek(0)
139 while cur < end:
139 while cur < end:
140 data = self.dataf.read(blocksize)
140 data = self.dataf.read(blocksize)
141 off = 0
141 off = 0
142 for x in xrange(256):
142 for x in xrange(256):
143 n = data[off + ngshaoffset:off + ngshaoffset + 20]
143 n = data[off + ngshaoffset:off + ngshaoffset + 20]
144 self.map[n] = count
144 self.map[n] = count
145 count += 1
145 count += 1
146 if count >= self.l:
146 if count >= self.l:
147 break
147 break
148 off += self.s
148 off += self.s
149 cur += blocksize
149 cur += blocksize
150
150
151 def loadblock(self, blockstart, blocksize, data=None):
151 def loadblock(self, blockstart, blocksize, data=None):
152 if self.all:
152 if self.all:
153 return
153 return
154 if data is None:
154 if data is None:
155 self.dataf.seek(blockstart)
155 self.dataf.seek(blockstart)
156 if blockstart + blocksize > self.datasize:
156 if blockstart + blocksize > self.datasize:
157 # the revlog may have grown since we've started running,
157 # the revlog may have grown since we've started running,
158 # but we don't have space in self.index for more entries.
158 # but we don't have space in self.index for more entries.
159 # limit blocksize so that we don't get too much data.
159 # limit blocksize so that we don't get too much data.
160 blocksize = max(self.datasize - blockstart, 0)
160 blocksize = max(self.datasize - blockstart, 0)
161 data = self.dataf.read(blocksize)
161 data = self.dataf.read(blocksize)
162 lend = len(data) / self.s
162 lend = len(data) / self.s
163 i = blockstart / self.s
163 i = blockstart / self.s
164 off = 0
164 off = 0
165 # lazyindex supports __delitem__
165 # lazyindex supports __delitem__
166 if lend > len(self.index) - i:
166 if lend > len(self.index) - i:
167 lend = len(self.index) - i
167 lend = len(self.index) - i
168 for x in xrange(lend):
168 for x in xrange(lend):
169 if self.index[i + x] == None:
169 if self.index[i + x] == None:
170 b = data[off : off + self.s]
170 b = data[off : off + self.s]
171 self.index[i + x] = b
171 self.index[i + x] = b
172 n = b[ngshaoffset:ngshaoffset + 20]
172 n = b[ngshaoffset:ngshaoffset + 20]
173 self.map[n] = i + x
173 self.map[n] = i + x
174 off += self.s
174 off += self.s
175
175
176 def findnode(self, node):
176 def findnode(self, node):
177 """search backwards through the index file for a specific node"""
177 """search backwards through the index file for a specific node"""
178 if self.allmap:
178 if self.allmap:
179 return None
179 return None
180
180
181 # hg log will cause many many searches for the manifest
181 # hg log will cause many many searches for the manifest
182 # nodes. After we get called a few times, just load the whole
182 # nodes. After we get called a few times, just load the whole
183 # thing.
183 # thing.
184 if self.mapfind_count > 8:
184 if self.mapfind_count > 8:
185 self.loadmap()
185 self.loadmap()
186 if node in self.map:
186 if node in self.map:
187 return node
187 return node
188 return None
188 return None
189 self.mapfind_count += 1
189 self.mapfind_count += 1
190 last = self.l - 1
190 last = self.l - 1
191 while self.index[last] != None:
191 while self.index[last] != None:
192 if last == 0:
192 if last == 0:
193 self.all = 1
193 self.all = 1
194 self.allmap = 1
194 self.allmap = 1
195 return None
195 return None
196 last -= 1
196 last -= 1
197 end = (last + 1) * self.s
197 end = (last + 1) * self.s
198 blocksize = self.s * 256
198 blocksize = self.s * 256
199 while end >= 0:
199 while end >= 0:
200 start = max(end - blocksize, 0)
200 start = max(end - blocksize, 0)
201 self.dataf.seek(start)
201 self.dataf.seek(start)
202 data = self.dataf.read(end - start)
202 data = self.dataf.read(end - start)
203 findend = end - start
203 findend = end - start
204 while True:
204 while True:
205 # we're searching backwards, so we have to make sure
205 # we're searching backwards, so we have to make sure
206 # we don't find a changeset where this node is a parent
206 # we don't find a changeset where this node is a parent
207 off = data.find(node, 0, findend)
207 off = data.find(node, 0, findend)
208 findend = off
208 findend = off
209 if off >= 0:
209 if off >= 0:
210 i = off / self.s
210 i = off / self.s
211 off = i * self.s
211 off = i * self.s
212 n = data[off + ngshaoffset:off + ngshaoffset + 20]
212 n = data[off + ngshaoffset:off + ngshaoffset + 20]
213 if n == node:
213 if n == node:
214 self.map[n] = i + start / self.s
214 self.map[n] = i + start / self.s
215 return node
215 return node
216 else:
216 else:
217 break
217 break
218 end -= blocksize
218 end -= blocksize
219 return None
219 return None
220
220
221 def loadindex(self, i=None, end=None):
221 def loadindex(self, i=None, end=None):
222 if self.all:
222 if self.all:
223 return
223 return
224 all = False
224 all = False
225 if i == None:
225 if i == None:
226 blockstart = 0
226 blockstart = 0
227 blocksize = (65536 / self.s) * self.s
227 blocksize = (65536 / self.s) * self.s
228 end = self.datasize
228 end = self.datasize
229 all = True
229 all = True
230 else:
230 else:
231 if end:
231 if end:
232 blockstart = i * self.s
232 blockstart = i * self.s
233 end = end * self.s
233 end = end * self.s
234 blocksize = end - blockstart
234 blocksize = end - blockstart
235 else:
235 else:
236 blockstart = (i & ~1023) * self.s
236 blockstart = (i & ~1023) * self.s
237 blocksize = self.s * 1024
237 blocksize = self.s * 1024
238 end = blockstart + blocksize
238 end = blockstart + blocksize
239 while blockstart < end:
239 while blockstart < end:
240 self.loadblock(blockstart, blocksize)
240 self.loadblock(blockstart, blocksize)
241 blockstart += blocksize
241 blockstart += blocksize
242 if all:
242 if all:
243 self.all = True
243 self.all = True
244
244
245 class lazyindex(object):
245 class lazyindex(object):
246 """a lazy version of the index array"""
246 """a lazy version of the index array"""
247 def __init__(self, parser):
247 def __init__(self, parser):
248 self.p = parser
248 self.p = parser
249 def __len__(self):
249 def __len__(self):
250 return len(self.p.index)
250 return len(self.p.index)
251 def load(self, pos):
251 def load(self, pos):
252 if pos < 0:
252 if pos < 0:
253 pos += len(self.p.index)
253 pos += len(self.p.index)
254 self.p.loadindex(pos)
254 self.p.loadindex(pos)
255 return self.p.index[pos]
255 return self.p.index[pos]
256 def __getitem__(self, pos):
256 def __getitem__(self, pos):
257 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
257 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
258 def __setitem__(self, pos, item):
258 def __setitem__(self, pos, item):
259 self.p.index[pos] = _pack(indexformatng, *item)
259 self.p.index[pos] = _pack(indexformatng, *item)
260 def __delitem__(self, pos):
260 def __delitem__(self, pos):
261 del self.p.index[pos]
261 del self.p.index[pos]
262 def insert(self, pos, e):
262 def insert(self, pos, e):
263 self.p.index.insert(pos, _pack(indexformatng, *e))
263 self.p.index.insert(pos, _pack(indexformatng, *e))
264 def append(self, e):
264 def append(self, e):
265 self.p.index.append(_pack(indexformatng, *e))
265 self.p.index.append(_pack(indexformatng, *e))
266
266
267 class lazymap(object):
267 class lazymap(object):
268 """a lazy version of the node map"""
268 """a lazy version of the node map"""
269 def __init__(self, parser):
269 def __init__(self, parser):
270 self.p = parser
270 self.p = parser
271 def load(self, key):
271 def load(self, key):
272 n = self.p.findnode(key)
272 n = self.p.findnode(key)
273 if n == None:
273 if n == None:
274 raise KeyError(key)
274 raise KeyError(key)
275 def __contains__(self, key):
275 def __contains__(self, key):
276 if key in self.p.map:
276 if key in self.p.map:
277 return True
277 return True
278 self.p.loadmap()
278 self.p.loadmap()
279 return key in self.p.map
279 return key in self.p.map
280 def __iter__(self):
280 def __iter__(self):
281 yield nullid
281 yield nullid
282 for i in xrange(self.p.l):
282 for i in xrange(self.p.l):
283 ret = self.p.index[i]
283 ret = self.p.index[i]
284 if not ret:
284 if not ret:
285 self.p.loadindex(i)
285 self.p.loadindex(i)
286 ret = self.p.index[i]
286 ret = self.p.index[i]
287 if isinstance(ret, str):
287 if isinstance(ret, str):
288 ret = _unpack(indexformatng, ret)
288 ret = _unpack(indexformatng, ret)
289 yield ret[7]
289 yield ret[7]
290 def __getitem__(self, key):
290 def __getitem__(self, key):
291 try:
291 try:
292 return self.p.map[key]
292 return self.p.map[key]
293 except KeyError:
293 except KeyError:
294 try:
294 try:
295 self.load(key)
295 self.load(key)
296 return self.p.map[key]
296 return self.p.map[key]
297 except KeyError:
297 except KeyError:
298 raise KeyError("node " + hex(key))
298 raise KeyError("node " + hex(key))
299 def __setitem__(self, key, val):
299 def __setitem__(self, key, val):
300 self.p.map[key] = val
300 self.p.map[key] = val
301 def __delitem__(self, key):
301 def __delitem__(self, key):
302 del self.p.map[key]
302 del self.p.map[key]
303
303
304 indexformatv0 = ">4l20s20s20s"
304 indexformatv0 = ">4l20s20s20s"
305 v0shaoffset = 56
305 v0shaoffset = 56
306
306
307 class revlogoldio(object):
307 class revlogoldio(object):
308 def __init__(self):
308 def __init__(self):
309 self.size = struct.calcsize(indexformatv0)
309 self.size = struct.calcsize(indexformatv0)
310
310
311 def parseindex(self, fp, inline):
311 def parseindex(self, fp, inline):
312 s = self.size
312 s = self.size
313 index = []
313 index = []
314 nodemap = {nullid: nullrev}
314 nodemap = {nullid: nullrev}
315 n = off = 0
315 n = off = 0
316 data = fp.read()
316 data = fp.read()
317 l = len(data)
317 l = len(data)
318 while off + s <= l:
318 while off + s <= l:
319 cur = data[off:off + s]
319 cur = data[off:off + s]
320 off += s
320 off += s
321 e = _unpack(indexformatv0, cur)
321 e = _unpack(indexformatv0, cur)
322 # transform to revlogv1 format
322 # transform to revlogv1 format
323 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
323 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
324 nodemap[e[4]], nodemap[e[5]], e[6])
324 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
325 index.append(e2)
325 index.append(e2)
326 nodemap[e[6]] = n
326 nodemap[e[6]] = n
327 n += 1
327 n += 1
328
328
329 return index, nodemap, None
329 return index, nodemap, None
330
330
331 def packentry(self, entry, node, version, rev):
331 def packentry(self, entry, node, version, rev):
332 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
332 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
333 node(entry[5]), node(entry[6]), entry[7])
333 node(entry[5]), node(entry[6]), entry[7])
334 return _pack(indexformatv0, *e2)
334 return _pack(indexformatv0, *e2)
335
335
336 # index ng:
336 # index ng:
337 # 6 bytes offset
337 # 6 bytes offset
338 # 2 bytes flags
338 # 2 bytes flags
339 # 4 bytes compressed length
339 # 4 bytes compressed length
340 # 4 bytes uncompressed length
340 # 4 bytes uncompressed length
341 # 4 bytes: base rev
341 # 4 bytes: base rev
342 # 4 bytes link rev
342 # 4 bytes link rev
343 # 4 bytes parent 1 rev
343 # 4 bytes parent 1 rev
344 # 4 bytes parent 2 rev
344 # 4 bytes parent 2 rev
345 # 32 bytes: nodeid
345 # 32 bytes: nodeid
346 indexformatng = ">Qiiiiii20s12x"
346 indexformatng = ">Qiiiiii20s12x"
347 ngshaoffset = 32
347 ngshaoffset = 32
348 versionformat = ">I"
348 versionformat = ">I"
349
349
350 class revlogio(object):
350 class revlogio(object):
351 def __init__(self):
351 def __init__(self):
352 self.size = struct.calcsize(indexformatng)
352 self.size = struct.calcsize(indexformatng)
353
353
354 def parseindex(self, fp, inline):
354 def parseindex(self, fp, inline):
355 try:
355 try:
356 size = util.fstat(fp).st_size
356 size = util.fstat(fp).st_size
357 except AttributeError:
357 except AttributeError:
358 size = 0
358 size = 0
359
359
360 if lazyparser.safe_to_use and not inline and size > 1000000:
360 if lazyparser.safe_to_use and not inline and size > 1000000:
361 # big index, let's parse it on demand
361 # big index, let's parse it on demand
362 parser = lazyparser(fp, size)
362 parser = lazyparser(fp, size)
363 index = lazyindex(parser)
363 index = lazyindex(parser)
364 nodemap = lazymap(parser)
364 nodemap = lazymap(parser)
365 e = list(index[0])
365 e = list(index[0])
366 type = gettype(e[0])
366 type = gettype(e[0])
367 e[0] = offset_type(0, type)
367 e[0] = offset_type(0, type)
368 index[0] = e
368 index[0] = e
369 return index, nodemap, None
369 return index, nodemap, None
370
370
371 s = self.size
371 s = self.size
372 cache = None
372 cache = None
373 index = []
373 index = []
374 nodemap = {nullid: nullrev}
374 nodemap = {nullid: nullrev}
375 n = off = 0
375 n = off = 0
376 # if we're not using lazymap, always read the whole index
376 # if we're not using lazymap, always read the whole index
377 data = fp.read()
377 data = fp.read()
378 l = len(data) - s
378 l = len(data) - s
379 append = index.append
379 append = index.append
380 if inline:
380 if inline:
381 cache = (0, data)
381 cache = (0, data)
382 while off <= l:
382 while off <= l:
383 e = _unpack(indexformatng, data[off:off + s])
383 e = _unpack(indexformatng, data[off:off + s])
384 nodemap[e[7]] = n
384 nodemap[e[7]] = n
385 append(e)
385 append(e)
386 n += 1
386 n += 1
387 if e[1] < 0:
387 if e[1] < 0:
388 break
388 break
389 off += e[1] + s
389 off += e[1] + s
390 else:
390 else:
391 while off <= l:
391 while off <= l:
392 e = _unpack(indexformatng, data[off:off + s])
392 e = _unpack(indexformatng, data[off:off + s])
393 nodemap[e[7]] = n
393 nodemap[e[7]] = n
394 append(e)
394 append(e)
395 n += 1
395 n += 1
396 off += s
396 off += s
397
397
398 e = list(index[0])
398 e = list(index[0])
399 type = gettype(e[0])
399 type = gettype(e[0])
400 e[0] = offset_type(0, type)
400 e[0] = offset_type(0, type)
401 index[0] = e
401 index[0] = e
402
402
403 return index, nodemap, cache
403 return index, nodemap, cache
404
404
405 def packentry(self, entry, node, version, rev):
405 def packentry(self, entry, node, version, rev):
406 p = _pack(indexformatng, *entry)
406 p = _pack(indexformatng, *entry)
407 if rev == 0:
407 if rev == 0:
408 p = _pack(versionformat, version) + p[4:]
408 p = _pack(versionformat, version) + p[4:]
409 return p
409 return p
410
410
411 class revlog(object):
411 class revlog(object):
412 """
412 """
413 the underlying revision storage object
413 the underlying revision storage object
414
414
415 A revlog consists of two parts, an index and the revision data.
415 A revlog consists of two parts, an index and the revision data.
416
416
417 The index is a file with a fixed record size containing
417 The index is a file with a fixed record size containing
418 information on each revision, includings its nodeid (hash), the
418 information on each revision, includings its nodeid (hash), the
419 nodeids of its parents, the position and offset of its data within
419 nodeids of its parents, the position and offset of its data within
420 the data file, and the revision it's based on. Finally, each entry
420 the data file, and the revision it's based on. Finally, each entry
421 contains a linkrev entry that can serve as a pointer to external
421 contains a linkrev entry that can serve as a pointer to external
422 data.
422 data.
423
423
424 The revision data itself is a linear collection of data chunks.
424 The revision data itself is a linear collection of data chunks.
425 Each chunk represents a revision and is usually represented as a
425 Each chunk represents a revision and is usually represented as a
426 delta against the previous chunk. To bound lookup time, runs of
426 delta against the previous chunk. To bound lookup time, runs of
427 deltas are limited to about 2 times the length of the original
427 deltas are limited to about 2 times the length of the original
428 version data. This makes retrieval of a version proportional to
428 version data. This makes retrieval of a version proportional to
429 its size, or O(1) relative to the number of revisions.
429 its size, or O(1) relative to the number of revisions.
430
430
431 Both pieces of the revlog are written to in an append-only
431 Both pieces of the revlog are written to in an append-only
432 fashion, which means we never need to rewrite a file to insert or
432 fashion, which means we never need to rewrite a file to insert or
433 remove data, and can use some simple techniques to avoid the need
433 remove data, and can use some simple techniques to avoid the need
434 for locking while reading.
434 for locking while reading.
435 """
435 """
436 def __init__(self, opener, indexfile):
436 def __init__(self, opener, indexfile):
437 """
437 """
438 create a revlog object
438 create a revlog object
439
439
440 opener is a function that abstracts the file opening operation
440 opener is a function that abstracts the file opening operation
441 and can be used to implement COW semantics or the like.
441 and can be used to implement COW semantics or the like.
442 """
442 """
443 self.indexfile = indexfile
443 self.indexfile = indexfile
444 self.datafile = indexfile[:-2] + ".d"
444 self.datafile = indexfile[:-2] + ".d"
445 self.opener = opener
445 self.opener = opener
446 self._cache = None
446 self._cache = None
447 self._chunkcache = None
447 self._chunkcache = None
448 self.nodemap = {nullid: nullrev}
448 self.nodemap = {nullid: nullrev}
449 self.index = []
449 self.index = []
450
450
451 v = REVLOG_DEFAULT_VERSION
451 v = REVLOG_DEFAULT_VERSION
452 if hasattr(opener, "defversion"):
452 if hasattr(opener, "defversion"):
453 v = opener.defversion
453 v = opener.defversion
454 if v & REVLOGNG:
454 if v & REVLOGNG:
455 v |= REVLOGNGINLINEDATA
455 v |= REVLOGNGINLINEDATA
456
456
457 i = ""
457 i = ""
458 try:
458 try:
459 f = self.opener(self.indexfile)
459 f = self.opener(self.indexfile)
460 i = f.read(4)
460 i = f.read(4)
461 f.seek(0)
461 f.seek(0)
462 if len(i) > 0:
462 if len(i) > 0:
463 v = struct.unpack(versionformat, i)[0]
463 v = struct.unpack(versionformat, i)[0]
464 except IOError, inst:
464 except IOError, inst:
465 if inst.errno != errno.ENOENT:
465 if inst.errno != errno.ENOENT:
466 raise
466 raise
467
467
468 self.version = v
468 self.version = v
469 self._inline = v & REVLOGNGINLINEDATA
469 self._inline = v & REVLOGNGINLINEDATA
470 flags = v & ~0xFFFF
470 flags = v & ~0xFFFF
471 fmt = v & 0xFFFF
471 fmt = v & 0xFFFF
472 if fmt == REVLOGV0 and flags:
472 if fmt == REVLOGV0 and flags:
473 raise RevlogError(_("index %s unknown flags %#04x for format v0")
473 raise RevlogError(_("index %s unknown flags %#04x for format v0")
474 % (self.indexfile, flags >> 16))
474 % (self.indexfile, flags >> 16))
475 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
475 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
476 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
476 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
477 % (self.indexfile, flags >> 16))
477 % (self.indexfile, flags >> 16))
478 elif fmt > REVLOGNG:
478 elif fmt > REVLOGNG:
479 raise RevlogError(_("index %s unknown format %d")
479 raise RevlogError(_("index %s unknown format %d")
480 % (self.indexfile, fmt))
480 % (self.indexfile, fmt))
481
481
482 self._io = revlogio()
482 self._io = revlogio()
483 if self.version == REVLOGV0:
483 if self.version == REVLOGV0:
484 self._io = revlogoldio()
484 self._io = revlogoldio()
485 if i:
485 if i:
486 d = self._io.parseindex(f, self._inline)
486 d = self._io.parseindex(f, self._inline)
487 self.index, self.nodemap, self._chunkcache = d
487 self.index, self.nodemap, self._chunkcache = d
488
488
489 # add the magic null revision at -1
489 # add the magic null revision at -1
490 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
490 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
491
491
492 def _loadindex(self, start, end):
492 def _loadindex(self, start, end):
493 """load a block of indexes all at once from the lazy parser"""
493 """load a block of indexes all at once from the lazy parser"""
494 if isinstance(self.index, lazyindex):
494 if isinstance(self.index, lazyindex):
495 self.index.p.loadindex(start, end)
495 self.index.p.loadindex(start, end)
496
496
497 def _loadindexmap(self):
497 def _loadindexmap(self):
498 """loads both the map and the index from the lazy parser"""
498 """loads both the map and the index from the lazy parser"""
499 if isinstance(self.index, lazyindex):
499 if isinstance(self.index, lazyindex):
500 p = self.index.p
500 p = self.index.p
501 p.loadindex()
501 p.loadindex()
502 self.nodemap = p.map
502 self.nodemap = p.map
503
503
504 def _loadmap(self):
504 def _loadmap(self):
505 """loads the map from the lazy parser"""
505 """loads the map from the lazy parser"""
506 if isinstance(self.nodemap, lazymap):
506 if isinstance(self.nodemap, lazymap):
507 self.nodemap.p.loadmap()
507 self.nodemap.p.loadmap()
508 self.nodemap = self.nodemap.p.map
508 self.nodemap = self.nodemap.p.map
509
509
510 def tip(self):
510 def tip(self):
511 return self.node(len(self.index) - 2)
511 return self.node(len(self.index) - 2)
512 def count(self):
512 def count(self):
513 return len(self.index) - 1
513 return len(self.index) - 1
514
514
515 def rev(self, node):
515 def rev(self, node):
516 try:
516 try:
517 return self.nodemap[node]
517 return self.nodemap[node]
518 except KeyError:
518 except KeyError:
519 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
519 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
520 def node(self, rev):
520 def node(self, rev):
521 return self.index[rev][7]
521 return self.index[rev][7]
522 def linkrev(self, node):
522 def linkrev(self, node):
523 return self.index[self.rev(node)][4]
523 return self.index[self.rev(node)][4]
524 def parents(self, node):
524 def parents(self, node):
525 d = self.index[self.rev(node)][5:7]
525 d = self.index[self.rev(node)][5:7]
526 return (self.node(d[0]), self.node(d[1]))
526 return (self.node(d[0]), self.node(d[1]))
527 def parentrevs(self, rev):
527 def parentrevs(self, rev):
528 return self.index[rev][5:7]
528 return self.index[rev][5:7]
529 def start(self, rev):
529 def start(self, rev):
530 return int(self.index[rev][0] >> 16)
530 return int(self.index[rev][0] >> 16)
531 def end(self, rev):
531 def end(self, rev):
532 return self.start(rev) + self.length(rev)
532 return self.start(rev) + self.length(rev)
533 def length(self, rev):
533 def length(self, rev):
534 return self.index[rev][1]
534 return self.index[rev][1]
535 def base(self, rev):
535 def base(self, rev):
536 return self.index[rev][3]
536 return self.index[rev][3]
537
537
538 def size(self, rev):
538 def size(self, rev):
539 """return the length of the uncompressed text for a given revision"""
539 """return the length of the uncompressed text for a given revision"""
540 l = self.index[rev][2]
540 l = self.index[rev][2]
541 if l >= 0:
541 if l >= 0:
542 return l
542 return l
543
543
544 t = self.revision(self.node(rev))
544 t = self.revision(self.node(rev))
545 return len(t)
545 return len(t)
546
546
547 # alternate implementation, The advantage to this code is it
547 # alternate implementation, The advantage to this code is it
548 # will be faster for a single revision. But, the results are not
548 # will be faster for a single revision. But, the results are not
549 # cached, so finding the size of every revision will be slower.
549 # cached, so finding the size of every revision will be slower.
550 """
550 """
551 if self.cache and self.cache[1] == rev:
551 if self.cache and self.cache[1] == rev:
552 return len(self.cache[2])
552 return len(self.cache[2])
553
553
554 base = self.base(rev)
554 base = self.base(rev)
555 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
555 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
556 base = self.cache[1]
556 base = self.cache[1]
557 text = self.cache[2]
557 text = self.cache[2]
558 else:
558 else:
559 text = self.revision(self.node(base))
559 text = self.revision(self.node(base))
560
560
561 l = len(text)
561 l = len(text)
562 for x in xrange(base + 1, rev + 1):
562 for x in xrange(base + 1, rev + 1):
563 l = mdiff.patchedsize(l, self.chunk(x))
563 l = mdiff.patchedsize(l, self.chunk(x))
564 return l
564 return l
565 """
565 """
566
566
567 def reachable(self, node, stop=None):
567 def reachable(self, node, stop=None):
568 """return a hash of all nodes ancestral to a given node, including
568 """return a hash of all nodes ancestral to a given node, including
569 the node itself, stopping when stop is matched"""
569 the node itself, stopping when stop is matched"""
570 reachable = {}
570 reachable = {}
571 visit = [node]
571 visit = [node]
572 reachable[node] = 1
572 reachable[node] = 1
573 if stop:
573 if stop:
574 stopn = self.rev(stop)
574 stopn = self.rev(stop)
575 else:
575 else:
576 stopn = 0
576 stopn = 0
577 while visit:
577 while visit:
578 n = visit.pop(0)
578 n = visit.pop(0)
579 if n == stop:
579 if n == stop:
580 continue
580 continue
581 if n == nullid:
581 if n == nullid:
582 continue
582 continue
583 for p in self.parents(n):
583 for p in self.parents(n):
584 if self.rev(p) < stopn:
584 if self.rev(p) < stopn:
585 continue
585 continue
586 if p not in reachable:
586 if p not in reachable:
587 reachable[p] = 1
587 reachable[p] = 1
588 visit.append(p)
588 visit.append(p)
589 return reachable
589 return reachable
590
590
591 def nodesbetween(self, roots=None, heads=None):
591 def nodesbetween(self, roots=None, heads=None):
592 """Return a tuple containing three elements. Elements 1 and 2 contain
592 """Return a tuple containing three elements. Elements 1 and 2 contain
593 a final list bases and heads after all the unreachable ones have been
593 a final list bases and heads after all the unreachable ones have been
594 pruned. Element 0 contains a topologically sorted list of all
594 pruned. Element 0 contains a topologically sorted list of all
595
595
596 nodes that satisfy these constraints:
596 nodes that satisfy these constraints:
597 1. All nodes must be descended from a node in roots (the nodes on
597 1. All nodes must be descended from a node in roots (the nodes on
598 roots are considered descended from themselves).
598 roots are considered descended from themselves).
599 2. All nodes must also be ancestors of a node in heads (the nodes in
599 2. All nodes must also be ancestors of a node in heads (the nodes in
600 heads are considered to be their own ancestors).
600 heads are considered to be their own ancestors).
601
601
602 If roots is unspecified, nullid is assumed as the only root.
602 If roots is unspecified, nullid is assumed as the only root.
603 If heads is unspecified, it is taken to be the output of the
603 If heads is unspecified, it is taken to be the output of the
604 heads method (i.e. a list of all nodes in the repository that
604 heads method (i.e. a list of all nodes in the repository that
605 have no children)."""
605 have no children)."""
606 nonodes = ([], [], [])
606 nonodes = ([], [], [])
607 if roots is not None:
607 if roots is not None:
608 roots = list(roots)
608 roots = list(roots)
609 if not roots:
609 if not roots:
610 return nonodes
610 return nonodes
611 lowestrev = min([self.rev(n) for n in roots])
611 lowestrev = min([self.rev(n) for n in roots])
612 else:
612 else:
613 roots = [nullid] # Everybody's a descendent of nullid
613 roots = [nullid] # Everybody's a descendent of nullid
614 lowestrev = nullrev
614 lowestrev = nullrev
615 if (lowestrev == nullrev) and (heads is None):
615 if (lowestrev == nullrev) and (heads is None):
616 # We want _all_ the nodes!
616 # We want _all_ the nodes!
617 return ([self.node(r) for r in xrange(0, self.count())],
617 return ([self.node(r) for r in xrange(0, self.count())],
618 [nullid], list(self.heads()))
618 [nullid], list(self.heads()))
619 if heads is None:
619 if heads is None:
620 # All nodes are ancestors, so the latest ancestor is the last
620 # All nodes are ancestors, so the latest ancestor is the last
621 # node.
621 # node.
622 highestrev = self.count() - 1
622 highestrev = self.count() - 1
623 # Set ancestors to None to signal that every node is an ancestor.
623 # Set ancestors to None to signal that every node is an ancestor.
624 ancestors = None
624 ancestors = None
625 # Set heads to an empty dictionary for later discovery of heads
625 # Set heads to an empty dictionary for later discovery of heads
626 heads = {}
626 heads = {}
627 else:
627 else:
628 heads = list(heads)
628 heads = list(heads)
629 if not heads:
629 if not heads:
630 return nonodes
630 return nonodes
631 ancestors = {}
631 ancestors = {}
632 # Turn heads into a dictionary so we can remove 'fake' heads.
632 # Turn heads into a dictionary so we can remove 'fake' heads.
633 # Also, later we will be using it to filter out the heads we can't
633 # Also, later we will be using it to filter out the heads we can't
634 # find from roots.
634 # find from roots.
635 heads = dict.fromkeys(heads, 0)
635 heads = dict.fromkeys(heads, 0)
636 # Start at the top and keep marking parents until we're done.
636 # Start at the top and keep marking parents until we're done.
637 nodestotag = heads.keys()
637 nodestotag = heads.keys()
638 # Remember where the top was so we can use it as a limit later.
638 # Remember where the top was so we can use it as a limit later.
639 highestrev = max([self.rev(n) for n in nodestotag])
639 highestrev = max([self.rev(n) for n in nodestotag])
640 while nodestotag:
640 while nodestotag:
641 # grab a node to tag
641 # grab a node to tag
642 n = nodestotag.pop()
642 n = nodestotag.pop()
643 # Never tag nullid
643 # Never tag nullid
644 if n == nullid:
644 if n == nullid:
645 continue
645 continue
646 # A node's revision number represents its place in a
646 # A node's revision number represents its place in a
647 # topologically sorted list of nodes.
647 # topologically sorted list of nodes.
648 r = self.rev(n)
648 r = self.rev(n)
649 if r >= lowestrev:
649 if r >= lowestrev:
650 if n not in ancestors:
650 if n not in ancestors:
651 # If we are possibly a descendent of one of the roots
651 # If we are possibly a descendent of one of the roots
652 # and we haven't already been marked as an ancestor
652 # and we haven't already been marked as an ancestor
653 ancestors[n] = 1 # Mark as ancestor
653 ancestors[n] = 1 # Mark as ancestor
654 # Add non-nullid parents to list of nodes to tag.
654 # Add non-nullid parents to list of nodes to tag.
655 nodestotag.extend([p for p in self.parents(n) if
655 nodestotag.extend([p for p in self.parents(n) if
656 p != nullid])
656 p != nullid])
657 elif n in heads: # We've seen it before, is it a fake head?
657 elif n in heads: # We've seen it before, is it a fake head?
658 # So it is, real heads should not be the ancestors of
658 # So it is, real heads should not be the ancestors of
659 # any other heads.
659 # any other heads.
660 heads.pop(n)
660 heads.pop(n)
661 if not ancestors:
661 if not ancestors:
662 return nonodes
662 return nonodes
663 # Now that we have our set of ancestors, we want to remove any
663 # Now that we have our set of ancestors, we want to remove any
664 # roots that are not ancestors.
664 # roots that are not ancestors.
665
665
666 # If one of the roots was nullid, everything is included anyway.
666 # If one of the roots was nullid, everything is included anyway.
667 if lowestrev > nullrev:
667 if lowestrev > nullrev:
668 # But, since we weren't, let's recompute the lowest rev to not
668 # But, since we weren't, let's recompute the lowest rev to not
669 # include roots that aren't ancestors.
669 # include roots that aren't ancestors.
670
670
671 # Filter out roots that aren't ancestors of heads
671 # Filter out roots that aren't ancestors of heads
672 roots = [n for n in roots if n in ancestors]
672 roots = [n for n in roots if n in ancestors]
673 # Recompute the lowest revision
673 # Recompute the lowest revision
674 if roots:
674 if roots:
675 lowestrev = min([self.rev(n) for n in roots])
675 lowestrev = min([self.rev(n) for n in roots])
676 else:
676 else:
677 # No more roots? Return empty list
677 # No more roots? Return empty list
678 return nonodes
678 return nonodes
679 else:
679 else:
680 # We are descending from nullid, and don't need to care about
680 # We are descending from nullid, and don't need to care about
681 # any other roots.
681 # any other roots.
682 lowestrev = nullrev
682 lowestrev = nullrev
683 roots = [nullid]
683 roots = [nullid]
684 # Transform our roots list into a 'set' (i.e. a dictionary where the
684 # Transform our roots list into a 'set' (i.e. a dictionary where the
685 # values don't matter.
685 # values don't matter.
686 descendents = dict.fromkeys(roots, 1)
686 descendents = dict.fromkeys(roots, 1)
687 # Also, keep the original roots so we can filter out roots that aren't
687 # Also, keep the original roots so we can filter out roots that aren't
688 # 'real' roots (i.e. are descended from other roots).
688 # 'real' roots (i.e. are descended from other roots).
689 roots = descendents.copy()
689 roots = descendents.copy()
690 # Our topologically sorted list of output nodes.
690 # Our topologically sorted list of output nodes.
691 orderedout = []
691 orderedout = []
692 # Don't start at nullid since we don't want nullid in our output list,
692 # Don't start at nullid since we don't want nullid in our output list,
693 # and if nullid shows up in descedents, empty parents will look like
693 # and if nullid shows up in descedents, empty parents will look like
694 # they're descendents.
694 # they're descendents.
695 for r in xrange(max(lowestrev, 0), highestrev + 1):
695 for r in xrange(max(lowestrev, 0), highestrev + 1):
696 n = self.node(r)
696 n = self.node(r)
697 isdescendent = False
697 isdescendent = False
698 if lowestrev == nullrev: # Everybody is a descendent of nullid
698 if lowestrev == nullrev: # Everybody is a descendent of nullid
699 isdescendent = True
699 isdescendent = True
700 elif n in descendents:
700 elif n in descendents:
701 # n is already a descendent
701 # n is already a descendent
702 isdescendent = True
702 isdescendent = True
703 # This check only needs to be done here because all the roots
703 # This check only needs to be done here because all the roots
704 # will start being marked is descendents before the loop.
704 # will start being marked is descendents before the loop.
705 if n in roots:
705 if n in roots:
706 # If n was a root, check if it's a 'real' root.
706 # If n was a root, check if it's a 'real' root.
707 p = tuple(self.parents(n))
707 p = tuple(self.parents(n))
708 # If any of its parents are descendents, it's not a root.
708 # If any of its parents are descendents, it's not a root.
709 if (p[0] in descendents) or (p[1] in descendents):
709 if (p[0] in descendents) or (p[1] in descendents):
710 roots.pop(n)
710 roots.pop(n)
711 else:
711 else:
712 p = tuple(self.parents(n))
712 p = tuple(self.parents(n))
713 # A node is a descendent if either of its parents are
713 # A node is a descendent if either of its parents are
714 # descendents. (We seeded the dependents list with the roots
714 # descendents. (We seeded the dependents list with the roots
715 # up there, remember?)
715 # up there, remember?)
716 if (p[0] in descendents) or (p[1] in descendents):
716 if (p[0] in descendents) or (p[1] in descendents):
717 descendents[n] = 1
717 descendents[n] = 1
718 isdescendent = True
718 isdescendent = True
719 if isdescendent and ((ancestors is None) or (n in ancestors)):
719 if isdescendent and ((ancestors is None) or (n in ancestors)):
720 # Only include nodes that are both descendents and ancestors.
720 # Only include nodes that are both descendents and ancestors.
721 orderedout.append(n)
721 orderedout.append(n)
722 if (ancestors is not None) and (n in heads):
722 if (ancestors is not None) and (n in heads):
723 # We're trying to figure out which heads are reachable
723 # We're trying to figure out which heads are reachable
724 # from roots.
724 # from roots.
725 # Mark this head as having been reached
725 # Mark this head as having been reached
726 heads[n] = 1
726 heads[n] = 1
727 elif ancestors is None:
727 elif ancestors is None:
728 # Otherwise, we're trying to discover the heads.
728 # Otherwise, we're trying to discover the heads.
729 # Assume this is a head because if it isn't, the next step
729 # Assume this is a head because if it isn't, the next step
730 # will eventually remove it.
730 # will eventually remove it.
731 heads[n] = 1
731 heads[n] = 1
732 # But, obviously its parents aren't.
732 # But, obviously its parents aren't.
733 for p in self.parents(n):
733 for p in self.parents(n):
734 heads.pop(p, None)
734 heads.pop(p, None)
735 heads = [n for n in heads.iterkeys() if heads[n] != 0]
735 heads = [n for n in heads.iterkeys() if heads[n] != 0]
736 roots = roots.keys()
736 roots = roots.keys()
737 assert orderedout
737 assert orderedout
738 assert roots
738 assert roots
739 assert heads
739 assert heads
740 return (orderedout, roots, heads)
740 return (orderedout, roots, heads)
741
741
742 def heads(self, start=None, stop=None):
742 def heads(self, start=None, stop=None):
743 """return the list of all nodes that have no children
743 """return the list of all nodes that have no children
744
744
745 if start is specified, only heads that are descendants of
745 if start is specified, only heads that are descendants of
746 start will be returned
746 start will be returned
747 if stop is specified, it will consider all the revs from stop
747 if stop is specified, it will consider all the revs from stop
748 as if they had no children
748 as if they had no children
749 """
749 """
750 if start is None and stop is None:
750 if start is None and stop is None:
751 count = self.count()
751 count = self.count()
752 if not count:
752 if not count:
753 return [nullid]
753 return [nullid]
754 ishead = [1] * (count + 1)
754 ishead = [1] * (count + 1)
755 index = self.index
755 index = self.index
756 for r in xrange(count):
756 for r in xrange(count):
757 e = index[r]
757 e = index[r]
758 ishead[e[5]] = ishead[e[6]] = 0
758 ishead[e[5]] = ishead[e[6]] = 0
759 return [self.node(r) for r in xrange(count) if ishead[r]]
759 return [self.node(r) for r in xrange(count) if ishead[r]]
760
760
761 if start is None:
761 if start is None:
762 start = nullid
762 start = nullid
763 if stop is None:
763 if stop is None:
764 stop = []
764 stop = []
765 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
765 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
766 startrev = self.rev(start)
766 startrev = self.rev(start)
767 reachable = {startrev: 1}
767 reachable = {startrev: 1}
768 heads = {startrev: 1}
768 heads = {startrev: 1}
769
769
770 parentrevs = self.parentrevs
770 parentrevs = self.parentrevs
771 for r in xrange(startrev + 1, self.count()):
771 for r in xrange(startrev + 1, self.count()):
772 for p in parentrevs(r):
772 for p in parentrevs(r):
773 if p in reachable:
773 if p in reachable:
774 if r not in stoprevs:
774 if r not in stoprevs:
775 reachable[r] = 1
775 reachable[r] = 1
776 heads[r] = 1
776 heads[r] = 1
777 if p in heads and p not in stoprevs:
777 if p in heads and p not in stoprevs:
778 del heads[p]
778 del heads[p]
779
779
780 return [self.node(r) for r in heads]
780 return [self.node(r) for r in heads]
781
781
782 def children(self, node):
782 def children(self, node):
783 """find the children of a given node"""
783 """find the children of a given node"""
784 c = []
784 c = []
785 p = self.rev(node)
785 p = self.rev(node)
786 for r in range(p + 1, self.count()):
786 for r in range(p + 1, self.count()):
787 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
787 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 if prevs:
788 if prevs:
789 for pr in prevs:
789 for pr in prevs:
790 if pr == p:
790 if pr == p:
791 c.append(self.node(r))
791 c.append(self.node(r))
792 elif p == nullrev:
792 elif p == nullrev:
793 c.append(self.node(r))
793 c.append(self.node(r))
794 return c
794 return c
795
795
796 def _match(self, id):
796 def _match(self, id):
797 if isinstance(id, (long, int)):
797 if isinstance(id, (long, int)):
798 # rev
798 # rev
799 return self.node(id)
799 return self.node(id)
800 if len(id) == 20:
800 if len(id) == 20:
801 # possibly a binary node
801 # possibly a binary node
802 # odds of a binary node being all hex in ASCII are 1 in 10**25
802 # odds of a binary node being all hex in ASCII are 1 in 10**25
803 try:
803 try:
804 node = id
804 node = id
805 r = self.rev(node) # quick search the index
805 r = self.rev(node) # quick search the index
806 return node
806 return node
807 except LookupError:
807 except LookupError:
808 pass # may be partial hex id
808 pass # may be partial hex id
809 try:
809 try:
810 # str(rev)
810 # str(rev)
811 rev = int(id)
811 rev = int(id)
812 if str(rev) != id:
812 if str(rev) != id:
813 raise ValueError
813 raise ValueError
814 if rev < 0:
814 if rev < 0:
815 rev = self.count() + rev
815 rev = self.count() + rev
816 if rev < 0 or rev >= self.count():
816 if rev < 0 or rev >= self.count():
817 raise ValueError
817 raise ValueError
818 return self.node(rev)
818 return self.node(rev)
819 except (ValueError, OverflowError):
819 except (ValueError, OverflowError):
820 pass
820 pass
821 if len(id) == 40:
821 if len(id) == 40:
822 try:
822 try:
823 # a full hex nodeid?
823 # a full hex nodeid?
824 node = bin(id)
824 node = bin(id)
825 r = self.rev(node)
825 r = self.rev(node)
826 return node
826 return node
827 except TypeError:
827 except TypeError:
828 pass
828 pass
829
829
830 def _partialmatch(self, id):
830 def _partialmatch(self, id):
831 if len(id) < 40:
831 if len(id) < 40:
832 try:
832 try:
833 # hex(node)[:...]
833 # hex(node)[:...]
834 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
834 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
835 node = None
835 node = None
836 for n in self.nodemap:
836 for n in self.nodemap:
837 if n.startswith(bin_id) and hex(n).startswith(id):
837 if n.startswith(bin_id) and hex(n).startswith(id):
838 if node is not None:
838 if node is not None:
839 raise LookupError(_("Ambiguous identifier"))
839 raise LookupError(_("Ambiguous identifier"))
840 node = n
840 node = n
841 if node is not None:
841 if node is not None:
842 return node
842 return node
843 except TypeError:
843 except TypeError:
844 pass
844 pass
845
845
846 def lookup(self, id):
846 def lookup(self, id):
847 """locate a node based on:
847 """locate a node based on:
848 - revision number or str(revision number)
848 - revision number or str(revision number)
849 - nodeid or subset of hex nodeid
849 - nodeid or subset of hex nodeid
850 """
850 """
851 n = self._match(id)
851 n = self._match(id)
852 if n is not None:
852 if n is not None:
853 return n
853 return n
854 n = self._partialmatch(id)
854 n = self._partialmatch(id)
855 if n:
855 if n:
856 return n
856 return n
857
857
858 raise LookupError(_("No match found"))
858 raise LookupError(_("No match found"))
859
859
860 def cmp(self, node, text):
860 def cmp(self, node, text):
861 """compare text with a given file revision"""
861 """compare text with a given file revision"""
862 p1, p2 = self.parents(node)
862 p1, p2 = self.parents(node)
863 return hash(text, p1, p2) != node
863 return hash(text, p1, p2) != node
864
864
865 def chunk(self, rev, df=None):
865 def chunk(self, rev, df=None):
866 def loadcache(df):
866 def loadcache(df):
867 if not df:
867 if not df:
868 if self._inline:
868 if self._inline:
869 df = self.opener(self.indexfile)
869 df = self.opener(self.indexfile)
870 else:
870 else:
871 df = self.opener(self.datafile)
871 df = self.opener(self.datafile)
872 df.seek(start)
872 df.seek(start)
873 self._chunkcache = (start, df.read(cache_length))
873 self._chunkcache = (start, df.read(cache_length))
874
874
875 start, length = self.start(rev), self.length(rev)
875 start, length = self.start(rev), self.length(rev)
876 if self._inline:
876 if self._inline:
877 start += (rev + 1) * self._io.size
877 start += (rev + 1) * self._io.size
878 end = start + length
878 end = start + length
879
879
880 offset = 0
880 offset = 0
881 if not self._chunkcache:
881 if not self._chunkcache:
882 cache_length = max(65536, length)
882 cache_length = max(65536, length)
883 loadcache(df)
883 loadcache(df)
884 else:
884 else:
885 cache_start = self._chunkcache[0]
885 cache_start = self._chunkcache[0]
886 cache_length = len(self._chunkcache[1])
886 cache_length = len(self._chunkcache[1])
887 cache_end = cache_start + cache_length
887 cache_end = cache_start + cache_length
888 if start >= cache_start and end <= cache_end:
888 if start >= cache_start and end <= cache_end:
889 # it is cached
889 # it is cached
890 offset = start - cache_start
890 offset = start - cache_start
891 else:
891 else:
892 cache_length = max(65536, length)
892 cache_length = max(65536, length)
893 loadcache(df)
893 loadcache(df)
894
894
895 # avoid copying large chunks
895 # avoid copying large chunks
896 c = self._chunkcache[1]
896 c = self._chunkcache[1]
897 if cache_length != length:
897 if cache_length != length:
898 c = c[offset:offset + length]
898 c = c[offset:offset + length]
899
899
900 return decompress(c)
900 return decompress(c)
901
901
902 def delta(self, node):
902 def delta(self, node):
903 """return or calculate a delta between a node and its predecessor"""
903 """return or calculate a delta between a node and its predecessor"""
904 r = self.rev(node)
904 r = self.rev(node)
905 return self.revdiff(r - 1, r)
905 return self.revdiff(r - 1, r)
906
906
907 def revdiff(self, rev1, rev2):
907 def revdiff(self, rev1, rev2):
908 """return or calculate a delta between two revisions"""
908 """return or calculate a delta between two revisions"""
909 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
909 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
910 return self.chunk(rev2)
910 return self.chunk(rev2)
911
911
912 return mdiff.textdiff(self.revision(self.node(rev1)),
912 return mdiff.textdiff(self.revision(self.node(rev1)),
913 self.revision(self.node(rev2)))
913 self.revision(self.node(rev2)))
914
914
915 def revision(self, node):
915 def revision(self, node):
916 """return an uncompressed revision of a given"""
916 """return an uncompressed revision of a given"""
917 if node == nullid:
917 if node == nullid:
918 return ""
918 return ""
919 if self._cache and self._cache[0] == node:
919 if self._cache and self._cache[0] == node:
920 return str(self._cache[2])
920 return str(self._cache[2])
921
921
922 # look up what we need to read
922 # look up what we need to read
923 text = None
923 text = None
924 rev = self.rev(node)
924 rev = self.rev(node)
925 base = self.base(rev)
925 base = self.base(rev)
926
926
927 # check rev flags
927 # check rev flags
928 if self.index[rev][0] & 0xFFFF:
928 if self.index[rev][0] & 0xFFFF:
929 raise RevlogError(_('incompatible revision flag %x') %
929 raise RevlogError(_('incompatible revision flag %x') %
930 (self.index[rev][0] & 0xFFFF))
930 (self.index[rev][0] & 0xFFFF))
931
931
932 if self._inline:
932 if self._inline:
933 # we probably have the whole chunk cached
933 # we probably have the whole chunk cached
934 df = None
934 df = None
935 else:
935 else:
936 df = self.opener(self.datafile)
936 df = self.opener(self.datafile)
937
937
938 # do we have useful data cached?
938 # do we have useful data cached?
939 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
939 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
940 base = self._cache[1]
940 base = self._cache[1]
941 text = str(self._cache[2])
941 text = str(self._cache[2])
942 self._loadindex(base, rev + 1)
942 self._loadindex(base, rev + 1)
943 else:
943 else:
944 self._loadindex(base, rev + 1)
944 self._loadindex(base, rev + 1)
945 text = self.chunk(base, df=df)
945 text = self.chunk(base, df=df)
946
946
947 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
947 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
948 text = mdiff.patches(text, bins)
948 text = mdiff.patches(text, bins)
949 p1, p2 = self.parents(node)
949 p1, p2 = self.parents(node)
950 if node != hash(text, p1, p2):
950 if node != hash(text, p1, p2):
951 raise RevlogError(_("integrity check failed on %s:%d")
951 raise RevlogError(_("integrity check failed on %s:%d")
952 % (self.datafile, rev))
952 % (self.datafile, rev))
953
953
954 self._cache = (node, rev, text)
954 self._cache = (node, rev, text)
955 return text
955 return text
956
956
957 def checkinlinesize(self, tr, fp=None):
957 def checkinlinesize(self, tr, fp=None):
958 if not self._inline:
958 if not self._inline:
959 return
959 return
960 if not fp:
960 if not fp:
961 fp = self.opener(self.indexfile, 'r')
961 fp = self.opener(self.indexfile, 'r')
962 fp.seek(0, 2)
962 fp.seek(0, 2)
963 size = fp.tell()
963 size = fp.tell()
964 if size < 131072:
964 if size < 131072:
965 return
965 return
966 trinfo = tr.find(self.indexfile)
966 trinfo = tr.find(self.indexfile)
967 if trinfo == None:
967 if trinfo == None:
968 raise RevlogError(_("%s not found in the transaction")
968 raise RevlogError(_("%s not found in the transaction")
969 % self.indexfile)
969 % self.indexfile)
970
970
971 trindex = trinfo[2]
971 trindex = trinfo[2]
972 dataoff = self.start(trindex)
972 dataoff = self.start(trindex)
973
973
974 tr.add(self.datafile, dataoff)
974 tr.add(self.datafile, dataoff)
975 df = self.opener(self.datafile, 'w')
975 df = self.opener(self.datafile, 'w')
976 calc = self._io.size
976 calc = self._io.size
977 for r in xrange(self.count()):
977 for r in xrange(self.count()):
978 start = self.start(r) + (r + 1) * calc
978 start = self.start(r) + (r + 1) * calc
979 length = self.length(r)
979 length = self.length(r)
980 fp.seek(start)
980 fp.seek(start)
981 d = fp.read(length)
981 d = fp.read(length)
982 df.write(d)
982 df.write(d)
983 fp.close()
983 fp.close()
984 df.close()
984 df.close()
985 fp = self.opener(self.indexfile, 'w', atomictemp=True)
985 fp = self.opener(self.indexfile, 'w', atomictemp=True)
986 self.version &= ~(REVLOGNGINLINEDATA)
986 self.version &= ~(REVLOGNGINLINEDATA)
987 self._inline = False
987 self._inline = False
988 for i in xrange(self.count()):
988 for i in xrange(self.count()):
989 e = self._io.packentry(self.index[i], self.node, self.version, i)
989 e = self._io.packentry(self.index[i], self.node, self.version, i)
990 fp.write(e)
990 fp.write(e)
991
991
992 # if we don't call rename, the temp file will never replace the
992 # if we don't call rename, the temp file will never replace the
993 # real index
993 # real index
994 fp.rename()
994 fp.rename()
995
995
996 tr.replace(self.indexfile, trindex * calc)
996 tr.replace(self.indexfile, trindex * calc)
997 self._chunkcache = None
997 self._chunkcache = None
998
998
999 def addrevision(self, text, transaction, link, p1, p2, d=None):
999 def addrevision(self, text, transaction, link, p1, p2, d=None):
1000 """add a revision to the log
1000 """add a revision to the log
1001
1001
1002 text - the revision data to add
1002 text - the revision data to add
1003 transaction - the transaction object used for rollback
1003 transaction - the transaction object used for rollback
1004 link - the linkrev data to add
1004 link - the linkrev data to add
1005 p1, p2 - the parent nodeids of the revision
1005 p1, p2 - the parent nodeids of the revision
1006 d - an optional precomputed delta
1006 d - an optional precomputed delta
1007 """
1007 """
1008 dfh = None
1008 dfh = None
1009 if not self._inline:
1009 if not self._inline:
1010 dfh = self.opener(self.datafile, "a")
1010 dfh = self.opener(self.datafile, "a")
1011 ifh = self.opener(self.indexfile, "a+")
1011 ifh = self.opener(self.indexfile, "a+")
1012 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1012 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1013
1013
1014 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1014 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1015 node = hash(text, p1, p2)
1015 node = hash(text, p1, p2)
1016 if node in self.nodemap:
1016 if node in self.nodemap:
1017 return node
1017 return node
1018
1018
1019 curr = self.count()
1019 curr = self.count()
1020 prev = curr - 1
1020 prev = curr - 1
1021 base = self.base(prev)
1021 base = self.base(prev)
1022 offset = self.end(prev)
1022 offset = self.end(prev)
1023
1023
1024 if curr:
1024 if curr:
1025 if not d:
1025 if not d:
1026 ptext = self.revision(self.node(prev))
1026 ptext = self.revision(self.node(prev))
1027 d = mdiff.textdiff(ptext, text)
1027 d = mdiff.textdiff(ptext, text)
1028 data = compress(d)
1028 data = compress(d)
1029 l = len(data[1]) + len(data[0])
1029 l = len(data[1]) + len(data[0])
1030 dist = l + offset - self.start(base)
1030 dist = l + offset - self.start(base)
1031
1031
1032 # full versions are inserted when the needed deltas
1032 # full versions are inserted when the needed deltas
1033 # become comparable to the uncompressed text
1033 # become comparable to the uncompressed text
1034 if not curr or dist > len(text) * 2:
1034 if not curr or dist > len(text) * 2:
1035 data = compress(text)
1035 data = compress(text)
1036 l = len(data[1]) + len(data[0])
1036 l = len(data[1]) + len(data[0])
1037 base = curr
1037 base = curr
1038
1038
1039 e = (offset_type(offset, 0), l, len(text),
1039 e = (offset_type(offset, 0), l, len(text),
1040 base, link, self.rev(p1), self.rev(p2), node)
1040 base, link, self.rev(p1), self.rev(p2), node)
1041 self.index.insert(-1, e)
1041 self.index.insert(-1, e)
1042 self.nodemap[node] = curr
1042 self.nodemap[node] = curr
1043
1043
1044 entry = self._io.packentry(e, self.node, self.version, curr)
1044 entry = self._io.packentry(e, self.node, self.version, curr)
1045 if not self._inline:
1045 if not self._inline:
1046 transaction.add(self.datafile, offset)
1046 transaction.add(self.datafile, offset)
1047 transaction.add(self.indexfile, curr * len(entry))
1047 transaction.add(self.indexfile, curr * len(entry))
1048 if data[0]:
1048 if data[0]:
1049 dfh.write(data[0])
1049 dfh.write(data[0])
1050 dfh.write(data[1])
1050 dfh.write(data[1])
1051 dfh.flush()
1051 dfh.flush()
1052 ifh.write(entry)
1052 ifh.write(entry)
1053 else:
1053 else:
1054 offset += curr * self._io.size
1054 offset += curr * self._io.size
1055 transaction.add(self.indexfile, offset, curr)
1055 transaction.add(self.indexfile, offset, curr)
1056 ifh.write(entry)
1056 ifh.write(entry)
1057 ifh.write(data[0])
1057 ifh.write(data[0])
1058 ifh.write(data[1])
1058 ifh.write(data[1])
1059 self.checkinlinesize(transaction, ifh)
1059 self.checkinlinesize(transaction, ifh)
1060
1060
1061 self._cache = (node, curr, text)
1061 self._cache = (node, curr, text)
1062 return node
1062 return node
1063
1063
1064 def ancestor(self, a, b):
1064 def ancestor(self, a, b):
1065 """calculate the least common ancestor of nodes a and b"""
1065 """calculate the least common ancestor of nodes a and b"""
1066
1066
1067 def parents(rev):
1067 def parents(rev):
1068 return [p for p in self.parentrevs(rev) if p != nullrev]
1068 return [p for p in self.parentrevs(rev) if p != nullrev]
1069
1069
1070 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1070 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1071 if c is None:
1071 if c is None:
1072 return nullid
1072 return nullid
1073
1073
1074 return self.node(c)
1074 return self.node(c)
1075
1075
1076 def group(self, nodelist, lookup, infocollect=None):
1076 def group(self, nodelist, lookup, infocollect=None):
1077 """calculate a delta group
1077 """calculate a delta group
1078
1078
1079 Given a list of changeset revs, return a set of deltas and
1079 Given a list of changeset revs, return a set of deltas and
1080 metadata corresponding to nodes. the first delta is
1080 metadata corresponding to nodes. the first delta is
1081 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1081 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1082 have this parent as it has all history before these
1082 have this parent as it has all history before these
1083 changesets. parent is parent[0]
1083 changesets. parent is parent[0]
1084 """
1084 """
1085 revs = [self.rev(n) for n in nodelist]
1085 revs = [self.rev(n) for n in nodelist]
1086
1086
1087 # if we don't have any revisions touched by these changesets, bail
1087 # if we don't have any revisions touched by these changesets, bail
1088 if not revs:
1088 if not revs:
1089 yield changegroup.closechunk()
1089 yield changegroup.closechunk()
1090 return
1090 return
1091
1091
1092 # add the parent of the first rev
1092 # add the parent of the first rev
1093 p = self.parents(self.node(revs[0]))[0]
1093 p = self.parents(self.node(revs[0]))[0]
1094 revs.insert(0, self.rev(p))
1094 revs.insert(0, self.rev(p))
1095
1095
1096 # build deltas
1096 # build deltas
1097 for d in xrange(0, len(revs) - 1):
1097 for d in xrange(0, len(revs) - 1):
1098 a, b = revs[d], revs[d + 1]
1098 a, b = revs[d], revs[d + 1]
1099 nb = self.node(b)
1099 nb = self.node(b)
1100
1100
1101 if infocollect is not None:
1101 if infocollect is not None:
1102 infocollect(nb)
1102 infocollect(nb)
1103
1103
1104 p = self.parents(nb)
1104 p = self.parents(nb)
1105 meta = nb + p[0] + p[1] + lookup(nb)
1105 meta = nb + p[0] + p[1] + lookup(nb)
1106 if a == -1:
1106 if a == -1:
1107 d = self.revision(nb)
1107 d = self.revision(nb)
1108 meta += mdiff.trivialdiffheader(len(d))
1108 meta += mdiff.trivialdiffheader(len(d))
1109 else:
1109 else:
1110 d = self.revdiff(a, b)
1110 d = self.revdiff(a, b)
1111 yield changegroup.chunkheader(len(meta) + len(d))
1111 yield changegroup.chunkheader(len(meta) + len(d))
1112 yield meta
1112 yield meta
1113 if len(d) > 2**20:
1113 if len(d) > 2**20:
1114 pos = 0
1114 pos = 0
1115 while pos < len(d):
1115 while pos < len(d):
1116 pos2 = pos + 2 ** 18
1116 pos2 = pos + 2 ** 18
1117 yield d[pos:pos2]
1117 yield d[pos:pos2]
1118 pos = pos2
1118 pos = pos2
1119 else:
1119 else:
1120 yield d
1120 yield d
1121
1121
1122 yield changegroup.closechunk()
1122 yield changegroup.closechunk()
1123
1123
1124 def addgroup(self, revs, linkmapper, transaction, unique=0):
1124 def addgroup(self, revs, linkmapper, transaction, unique=0):
1125 """
1125 """
1126 add a delta group
1126 add a delta group
1127
1127
1128 given a set of deltas, add them to the revision log. the
1128 given a set of deltas, add them to the revision log. the
1129 first delta is against its parent, which should be in our
1129 first delta is against its parent, which should be in our
1130 log, the rest are against the previous delta.
1130 log, the rest are against the previous delta.
1131 """
1131 """
1132
1132
1133 #track the base of the current delta log
1133 #track the base of the current delta log
1134 r = self.count()
1134 r = self.count()
1135 t = r - 1
1135 t = r - 1
1136 node = None
1136 node = None
1137
1137
1138 base = prev = nullrev
1138 base = prev = nullrev
1139 start = end = textlen = 0
1139 start = end = textlen = 0
1140 if r:
1140 if r:
1141 end = self.end(t)
1141 end = self.end(t)
1142
1142
1143 ifh = self.opener(self.indexfile, "a+")
1143 ifh = self.opener(self.indexfile, "a+")
1144 isize = r * self._io.size
1144 isize = r * self._io.size
1145 if self._inline:
1145 if self._inline:
1146 transaction.add(self.indexfile, end + isize, r)
1146 transaction.add(self.indexfile, end + isize, r)
1147 dfh = None
1147 dfh = None
1148 else:
1148 else:
1149 transaction.add(self.indexfile, isize, r)
1149 transaction.add(self.indexfile, isize, r)
1150 transaction.add(self.datafile, end)
1150 transaction.add(self.datafile, end)
1151 dfh = self.opener(self.datafile, "a")
1151 dfh = self.opener(self.datafile, "a")
1152
1152
1153 # loop through our set of deltas
1153 # loop through our set of deltas
1154 chain = None
1154 chain = None
1155 for chunk in revs:
1155 for chunk in revs:
1156 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1156 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1157 link = linkmapper(cs)
1157 link = linkmapper(cs)
1158 if node in self.nodemap:
1158 if node in self.nodemap:
1159 # this can happen if two branches make the same change
1159 # this can happen if two branches make the same change
1160 # if unique:
1160 # if unique:
1161 # raise RevlogError(_("already have %s") % hex(node[:4]))
1161 # raise RevlogError(_("already have %s") % hex(node[:4]))
1162 chain = node
1162 chain = node
1163 continue
1163 continue
1164 delta = buffer(chunk, 80)
1164 delta = buffer(chunk, 80)
1165 del chunk
1165 del chunk
1166
1166
1167 for p in (p1, p2):
1167 for p in (p1, p2):
1168 if not p in self.nodemap:
1168 if not p in self.nodemap:
1169 raise LookupError(_("unknown parent %s") % short(p))
1169 raise LookupError(_("unknown parent %s") % short(p))
1170
1170
1171 if not chain:
1171 if not chain:
1172 # retrieve the parent revision of the delta chain
1172 # retrieve the parent revision of the delta chain
1173 chain = p1
1173 chain = p1
1174 if not chain in self.nodemap:
1174 if not chain in self.nodemap:
1175 raise LookupError(_("unknown base %s") % short(chain[:4]))
1175 raise LookupError(_("unknown base %s") % short(chain[:4]))
1176
1176
1177 # full versions are inserted when the needed deltas become
1177 # full versions are inserted when the needed deltas become
1178 # comparable to the uncompressed text or when the previous
1178 # comparable to the uncompressed text or when the previous
1179 # version is not the one we have a delta against. We use
1179 # version is not the one we have a delta against. We use
1180 # the size of the previous full rev as a proxy for the
1180 # the size of the previous full rev as a proxy for the
1181 # current size.
1181 # current size.
1182
1182
1183 if chain == prev:
1183 if chain == prev:
1184 cdelta = compress(delta)
1184 cdelta = compress(delta)
1185 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1185 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1186 textlen = mdiff.patchedsize(textlen, delta)
1186 textlen = mdiff.patchedsize(textlen, delta)
1187
1187
1188 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1188 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1189 # flush our writes here so we can read it in revision
1189 # flush our writes here so we can read it in revision
1190 if dfh:
1190 if dfh:
1191 dfh.flush()
1191 dfh.flush()
1192 ifh.flush()
1192 ifh.flush()
1193 text = self.revision(chain)
1193 text = self.revision(chain)
1194 if len(text) == 0:
1194 if len(text) == 0:
1195 # skip over trivial delta header
1195 # skip over trivial delta header
1196 text = buffer(delta, 12)
1196 text = buffer(delta, 12)
1197 else:
1197 else:
1198 text = mdiff.patches(text, [delta])
1198 text = mdiff.patches(text, [delta])
1199 del delta
1199 del delta
1200 chk = self._addrevision(text, transaction, link, p1, p2, None,
1200 chk = self._addrevision(text, transaction, link, p1, p2, None,
1201 ifh, dfh)
1201 ifh, dfh)
1202 if not dfh and not self._inline:
1202 if not dfh and not self._inline:
1203 # addrevision switched from inline to conventional
1203 # addrevision switched from inline to conventional
1204 # reopen the index
1204 # reopen the index
1205 dfh = self.opener(self.datafile, "a")
1205 dfh = self.opener(self.datafile, "a")
1206 ifh = self.opener(self.indexfile, "a")
1206 ifh = self.opener(self.indexfile, "a")
1207 if chk != node:
1207 if chk != node:
1208 raise RevlogError(_("consistency error adding group"))
1208 raise RevlogError(_("consistency error adding group"))
1209 textlen = len(text)
1209 textlen = len(text)
1210 else:
1210 else:
1211 e = (offset_type(end, 0), cdeltalen, textlen, base,
1211 e = (offset_type(end, 0), cdeltalen, textlen, base,
1212 link, self.rev(p1), self.rev(p2), node)
1212 link, self.rev(p1), self.rev(p2), node)
1213 self.index.insert(-1, e)
1213 self.index.insert(-1, e)
1214 self.nodemap[node] = r
1214 self.nodemap[node] = r
1215 entry = self._io.packentry(e, self.node, self.version, r)
1215 entry = self._io.packentry(e, self.node, self.version, r)
1216 if self._inline:
1216 if self._inline:
1217 ifh.write(entry)
1217 ifh.write(entry)
1218 ifh.write(cdelta[0])
1218 ifh.write(cdelta[0])
1219 ifh.write(cdelta[1])
1219 ifh.write(cdelta[1])
1220 self.checkinlinesize(transaction, ifh)
1220 self.checkinlinesize(transaction, ifh)
1221 if not self._inline:
1221 if not self._inline:
1222 dfh = self.opener(self.datafile, "a")
1222 dfh = self.opener(self.datafile, "a")
1223 ifh = self.opener(self.indexfile, "a")
1223 ifh = self.opener(self.indexfile, "a")
1224 else:
1224 else:
1225 dfh.write(cdelta[0])
1225 dfh.write(cdelta[0])
1226 dfh.write(cdelta[1])
1226 dfh.write(cdelta[1])
1227 ifh.write(entry)
1227 ifh.write(entry)
1228
1228
1229 t, r, chain, prev = r, r + 1, node, node
1229 t, r, chain, prev = r, r + 1, node, node
1230 base = self.base(t)
1230 base = self.base(t)
1231 start = self.start(base)
1231 start = self.start(base)
1232 end = self.end(t)
1232 end = self.end(t)
1233
1233
1234 return node
1234 return node
1235
1235
1236 def strip(self, rev, minlink):
1236 def strip(self, rev, minlink):
1237 if self.count() == 0 or rev >= self.count():
1237 if self.count() == 0 or rev >= self.count():
1238 return
1238 return
1239
1239
1240 if isinstance(self.index, lazyindex):
1240 if isinstance(self.index, lazyindex):
1241 self._loadindexmap()
1241 self._loadindexmap()
1242
1242
1243 # When stripping away a revision, we need to make sure it
1243 # When stripping away a revision, we need to make sure it
1244 # does not actually belong to an older changeset.
1244 # does not actually belong to an older changeset.
1245 # The minlink parameter defines the oldest revision
1245 # The minlink parameter defines the oldest revision
1246 # we're allowed to strip away.
1246 # we're allowed to strip away.
1247 while minlink > self.index[rev][4]:
1247 while minlink > self.index[rev][4]:
1248 rev += 1
1248 rev += 1
1249 if rev >= self.count():
1249 if rev >= self.count():
1250 return
1250 return
1251
1251
1252 # first truncate the files on disk
1252 # first truncate the files on disk
1253 end = self.start(rev)
1253 end = self.start(rev)
1254 if not self._inline:
1254 if not self._inline:
1255 df = self.opener(self.datafile, "a")
1255 df = self.opener(self.datafile, "a")
1256 df.truncate(end)
1256 df.truncate(end)
1257 end = rev * self._io.size
1257 end = rev * self._io.size
1258 else:
1258 else:
1259 end += rev * self._io.size
1259 end += rev * self._io.size
1260
1260
1261 indexf = self.opener(self.indexfile, "a")
1261 indexf = self.opener(self.indexfile, "a")
1262 indexf.truncate(end)
1262 indexf.truncate(end)
1263
1263
1264 # then reset internal state in memory to forget those revisions
1264 # then reset internal state in memory to forget those revisions
1265 self._cache = None
1265 self._cache = None
1266 self._chunkcache = None
1266 self._chunkcache = None
1267 for x in xrange(rev, self.count()):
1267 for x in xrange(rev, self.count()):
1268 del self.nodemap[self.node(x)]
1268 del self.nodemap[self.node(x)]
1269
1269
1270 del self.index[rev:-1]
1270 del self.index[rev:-1]
1271
1271
1272 def checksize(self):
1272 def checksize(self):
1273 expected = 0
1273 expected = 0
1274 if self.count():
1274 if self.count():
1275 expected = max(0, self.end(self.count() - 1))
1275 expected = max(0, self.end(self.count() - 1))
1276
1276
1277 try:
1277 try:
1278 f = self.opener(self.datafile)
1278 f = self.opener(self.datafile)
1279 f.seek(0, 2)
1279 f.seek(0, 2)
1280 actual = f.tell()
1280 actual = f.tell()
1281 dd = actual - expected
1281 dd = actual - expected
1282 except IOError, inst:
1282 except IOError, inst:
1283 if inst.errno != errno.ENOENT:
1283 if inst.errno != errno.ENOENT:
1284 raise
1284 raise
1285 dd = 0
1285 dd = 0
1286
1286
1287 try:
1287 try:
1288 f = self.opener(self.indexfile)
1288 f = self.opener(self.indexfile)
1289 f.seek(0, 2)
1289 f.seek(0, 2)
1290 actual = f.tell()
1290 actual = f.tell()
1291 s = self._io.size
1291 s = self._io.size
1292 i = max(0, actual / s)
1292 i = max(0, actual / s)
1293 di = actual - (i * s)
1293 di = actual - (i * s)
1294 if self._inline:
1294 if self._inline:
1295 databytes = 0
1295 databytes = 0
1296 for r in xrange(self.count()):
1296 for r in xrange(self.count()):
1297 databytes += max(0, self.length(r))
1297 databytes += max(0, self.length(r))
1298 dd = 0
1298 dd = 0
1299 di = actual - self.count() * s - databytes
1299 di = actual - self.count() * s - databytes
1300 except IOError, inst:
1300 except IOError, inst:
1301 if inst.errno != errno.ENOENT:
1301 if inst.errno != errno.ENOENT:
1302 raise
1302 raise
1303 di = 0
1303 di = 0
1304
1304
1305 return (dd, di)
1305 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now