##// END OF EJS Templates
revlog: reduce memory usage in addgroup...
Matt Mackall -
r5445:64cf1c85 default
parent child Browse files
Show More
@@ -1,1276 +1,1284
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = sha.new
22 _sha = sha.new
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34 class LookupError(RevlogError):
34 class LookupError(RevlogError):
35 pass
35 pass
36
36
37 def getoffset(q):
37 def getoffset(q):
38 return int(q >> 16)
38 return int(q >> 16)
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43 def offset_type(offset, type):
43 def offset_type(offset, type):
44 return long(long(offset) << 16 | type)
44 return long(long(offset) << 16 | type)
45
45
46 def hash(text, p1, p2):
46 def hash(text, p1, p2):
47 """generate a hash from the given text and its parent hashes
47 """generate a hash from the given text and its parent hashes
48
48
49 This hash combines both the current file contents and its history
49 This hash combines both the current file contents and its history
50 in a manner that makes it easy to distinguish nodes with the same
50 in a manner that makes it easy to distinguish nodes with the same
51 content in the revision graph.
51 content in the revision graph.
52 """
52 """
53 l = [p1, p2]
53 l = [p1, p2]
54 l.sort()
54 l.sort()
55 s = _sha(l[0])
55 s = _sha(l[0])
56 s.update(l[1])
56 s.update(l[1])
57 s.update(text)
57 s.update(text)
58 return s.digest()
58 return s.digest()
59
59
60 def compress(text):
60 def compress(text):
61 """ generate a possibly-compressed representation of text """
61 """ generate a possibly-compressed representation of text """
62 if not text:
62 if not text:
63 return ("", text)
63 return ("", text)
64 if len(text) < 44:
64 if len(text) < 44:
65 if text[0] == '\0':
65 if text[0] == '\0':
66 return ("", text)
66 return ("", text)
67 return ('u', text)
67 return ('u', text)
68 bin = _compress(text)
68 bin = _compress(text)
69 if len(bin) > len(text):
69 if len(bin) > len(text):
70 if text[0] == '\0':
70 if text[0] == '\0':
71 return ("", text)
71 return ("", text)
72 return ('u', text)
72 return ('u', text)
73 return ("", bin)
73 return ("", bin)
74
74
75 def decompress(bin):
75 def decompress(bin):
76 """ decompress the given input """
76 """ decompress the given input """
77 if not bin:
77 if not bin:
78 return bin
78 return bin
79 t = bin[0]
79 t = bin[0]
80 if t == '\0':
80 if t == '\0':
81 return bin
81 return bin
82 if t == 'x':
82 if t == 'x':
83 return _decompress(bin)
83 return _decompress(bin)
84 if t == 'u':
84 if t == 'u':
85 return bin[1:]
85 return bin[1:]
86 raise RevlogError(_("unknown compression type %r") % t)
86 raise RevlogError(_("unknown compression type %r") % t)
87
87
88 class lazyparser(object):
88 class lazyparser(object):
89 """
89 """
90 this class avoids the need to parse the entirety of large indices
90 this class avoids the need to parse the entirety of large indices
91 """
91 """
92
92
93 # lazyparser is not safe to use on windows if win32 extensions not
93 # lazyparser is not safe to use on windows if win32 extensions not
94 # available. it keeps file handle open, which make it not possible
94 # available. it keeps file handle open, which make it not possible
95 # to break hardlinks on local cloned repos.
95 # to break hardlinks on local cloned repos.
96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
97 hasattr(util, 'win32api'))
97 hasattr(util, 'win32api'))
98
98
99 def __init__(self, dataf, size):
99 def __init__(self, dataf, size):
100 self.dataf = dataf
100 self.dataf = dataf
101 self.s = struct.calcsize(indexformatng)
101 self.s = struct.calcsize(indexformatng)
102 self.datasize = size
102 self.datasize = size
103 self.l = size/self.s
103 self.l = size/self.s
104 self.index = [None] * self.l
104 self.index = [None] * self.l
105 self.map = {nullid: nullrev}
105 self.map = {nullid: nullrev}
106 self.allmap = 0
106 self.allmap = 0
107 self.all = 0
107 self.all = 0
108 self.mapfind_count = 0
108 self.mapfind_count = 0
109
109
110 def loadmap(self):
110 def loadmap(self):
111 """
111 """
112 during a commit, we need to make sure the rev being added is
112 during a commit, we need to make sure the rev being added is
113 not a duplicate. This requires loading the entire index,
113 not a duplicate. This requires loading the entire index,
114 which is fairly slow. loadmap can load up just the node map,
114 which is fairly slow. loadmap can load up just the node map,
115 which takes much less time.
115 which takes much less time.
116 """
116 """
117 if self.allmap:
117 if self.allmap:
118 return
118 return
119 end = self.datasize
119 end = self.datasize
120 self.allmap = 1
120 self.allmap = 1
121 cur = 0
121 cur = 0
122 count = 0
122 count = 0
123 blocksize = self.s * 256
123 blocksize = self.s * 256
124 self.dataf.seek(0)
124 self.dataf.seek(0)
125 while cur < end:
125 while cur < end:
126 data = self.dataf.read(blocksize)
126 data = self.dataf.read(blocksize)
127 off = 0
127 off = 0
128 for x in xrange(256):
128 for x in xrange(256):
129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
130 self.map[n] = count
130 self.map[n] = count
131 count += 1
131 count += 1
132 if count >= self.l:
132 if count >= self.l:
133 break
133 break
134 off += self.s
134 off += self.s
135 cur += blocksize
135 cur += blocksize
136
136
137 def loadblock(self, blockstart, blocksize, data=None):
137 def loadblock(self, blockstart, blocksize, data=None):
138 if self.all:
138 if self.all:
139 return
139 return
140 if data is None:
140 if data is None:
141 self.dataf.seek(blockstart)
141 self.dataf.seek(blockstart)
142 if blockstart + blocksize > self.datasize:
142 if blockstart + blocksize > self.datasize:
143 # the revlog may have grown since we've started running,
143 # the revlog may have grown since we've started running,
144 # but we don't have space in self.index for more entries.
144 # but we don't have space in self.index for more entries.
145 # limit blocksize so that we don't get too much data.
145 # limit blocksize so that we don't get too much data.
146 blocksize = max(self.datasize - blockstart, 0)
146 blocksize = max(self.datasize - blockstart, 0)
147 data = self.dataf.read(blocksize)
147 data = self.dataf.read(blocksize)
148 lend = len(data) / self.s
148 lend = len(data) / self.s
149 i = blockstart / self.s
149 i = blockstart / self.s
150 off = 0
150 off = 0
151 # lazyindex supports __delitem__
151 # lazyindex supports __delitem__
152 if lend > len(self.index) - i:
152 if lend > len(self.index) - i:
153 lend = len(self.index) - i
153 lend = len(self.index) - i
154 for x in xrange(lend):
154 for x in xrange(lend):
155 if self.index[i + x] == None:
155 if self.index[i + x] == None:
156 b = data[off : off + self.s]
156 b = data[off : off + self.s]
157 self.index[i + x] = b
157 self.index[i + x] = b
158 n = b[ngshaoffset:ngshaoffset + 20]
158 n = b[ngshaoffset:ngshaoffset + 20]
159 self.map[n] = i + x
159 self.map[n] = i + x
160 off += self.s
160 off += self.s
161
161
162 def findnode(self, node):
162 def findnode(self, node):
163 """search backwards through the index file for a specific node"""
163 """search backwards through the index file for a specific node"""
164 if self.allmap:
164 if self.allmap:
165 return None
165 return None
166
166
167 # hg log will cause many many searches for the manifest
167 # hg log will cause many many searches for the manifest
168 # nodes. After we get called a few times, just load the whole
168 # nodes. After we get called a few times, just load the whole
169 # thing.
169 # thing.
170 if self.mapfind_count > 8:
170 if self.mapfind_count > 8:
171 self.loadmap()
171 self.loadmap()
172 if node in self.map:
172 if node in self.map:
173 return node
173 return node
174 return None
174 return None
175 self.mapfind_count += 1
175 self.mapfind_count += 1
176 last = self.l - 1
176 last = self.l - 1
177 while self.index[last] != None:
177 while self.index[last] != None:
178 if last == 0:
178 if last == 0:
179 self.all = 1
179 self.all = 1
180 self.allmap = 1
180 self.allmap = 1
181 return None
181 return None
182 last -= 1
182 last -= 1
183 end = (last + 1) * self.s
183 end = (last + 1) * self.s
184 blocksize = self.s * 256
184 blocksize = self.s * 256
185 while end >= 0:
185 while end >= 0:
186 start = max(end - blocksize, 0)
186 start = max(end - blocksize, 0)
187 self.dataf.seek(start)
187 self.dataf.seek(start)
188 data = self.dataf.read(end - start)
188 data = self.dataf.read(end - start)
189 findend = end - start
189 findend = end - start
190 while True:
190 while True:
191 # we're searching backwards, so we have to make sure
191 # we're searching backwards, so we have to make sure
192 # we don't find a changeset where this node is a parent
192 # we don't find a changeset where this node is a parent
193 off = data.find(node, 0, findend)
193 off = data.find(node, 0, findend)
194 findend = off
194 findend = off
195 if off >= 0:
195 if off >= 0:
196 i = off / self.s
196 i = off / self.s
197 off = i * self.s
197 off = i * self.s
198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
199 if n == node:
199 if n == node:
200 self.map[n] = i + start / self.s
200 self.map[n] = i + start / self.s
201 return node
201 return node
202 else:
202 else:
203 break
203 break
204 end -= blocksize
204 end -= blocksize
205 return None
205 return None
206
206
207 def loadindex(self, i=None, end=None):
207 def loadindex(self, i=None, end=None):
208 if self.all:
208 if self.all:
209 return
209 return
210 all = False
210 all = False
211 if i == None:
211 if i == None:
212 blockstart = 0
212 blockstart = 0
213 blocksize = (65536 / self.s) * self.s
213 blocksize = (65536 / self.s) * self.s
214 end = self.datasize
214 end = self.datasize
215 all = True
215 all = True
216 else:
216 else:
217 if end:
217 if end:
218 blockstart = i * self.s
218 blockstart = i * self.s
219 end = end * self.s
219 end = end * self.s
220 blocksize = end - blockstart
220 blocksize = end - blockstart
221 else:
221 else:
222 blockstart = (i & ~1023) * self.s
222 blockstart = (i & ~1023) * self.s
223 blocksize = self.s * 1024
223 blocksize = self.s * 1024
224 end = blockstart + blocksize
224 end = blockstart + blocksize
225 while blockstart < end:
225 while blockstart < end:
226 self.loadblock(blockstart, blocksize)
226 self.loadblock(blockstart, blocksize)
227 blockstart += blocksize
227 blockstart += blocksize
228 if all:
228 if all:
229 self.all = True
229 self.all = True
230
230
231 class lazyindex(object):
231 class lazyindex(object):
232 """a lazy version of the index array"""
232 """a lazy version of the index array"""
233 def __init__(self, parser):
233 def __init__(self, parser):
234 self.p = parser
234 self.p = parser
235 def __len__(self):
235 def __len__(self):
236 return len(self.p.index)
236 return len(self.p.index)
237 def load(self, pos):
237 def load(self, pos):
238 if pos < 0:
238 if pos < 0:
239 pos += len(self.p.index)
239 pos += len(self.p.index)
240 self.p.loadindex(pos)
240 self.p.loadindex(pos)
241 return self.p.index[pos]
241 return self.p.index[pos]
242 def __getitem__(self, pos):
242 def __getitem__(self, pos):
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
244 def __setitem__(self, pos, item):
244 def __setitem__(self, pos, item):
245 self.p.index[pos] = _pack(indexformatng, *item)
245 self.p.index[pos] = _pack(indexformatng, *item)
246 def __delitem__(self, pos):
246 def __delitem__(self, pos):
247 del self.p.index[pos]
247 del self.p.index[pos]
248 def insert(self, pos, e):
248 def insert(self, pos, e):
249 self.p.index.insert(pos, _pack(indexformatng, *e))
249 self.p.index.insert(pos, _pack(indexformatng, *e))
250 def append(self, e):
250 def append(self, e):
251 self.p.index.append(_pack(indexformatng, *e))
251 self.p.index.append(_pack(indexformatng, *e))
252
252
253 class lazymap(object):
253 class lazymap(object):
254 """a lazy version of the node map"""
254 """a lazy version of the node map"""
255 def __init__(self, parser):
255 def __init__(self, parser):
256 self.p = parser
256 self.p = parser
257 def load(self, key):
257 def load(self, key):
258 n = self.p.findnode(key)
258 n = self.p.findnode(key)
259 if n == None:
259 if n == None:
260 raise KeyError(key)
260 raise KeyError(key)
261 def __contains__(self, key):
261 def __contains__(self, key):
262 if key in self.p.map:
262 if key in self.p.map:
263 return True
263 return True
264 self.p.loadmap()
264 self.p.loadmap()
265 return key in self.p.map
265 return key in self.p.map
266 def __iter__(self):
266 def __iter__(self):
267 yield nullid
267 yield nullid
268 for i in xrange(self.p.l):
268 for i in xrange(self.p.l):
269 ret = self.p.index[i]
269 ret = self.p.index[i]
270 if not ret:
270 if not ret:
271 self.p.loadindex(i)
271 self.p.loadindex(i)
272 ret = self.p.index[i]
272 ret = self.p.index[i]
273 if isinstance(ret, str):
273 if isinstance(ret, str):
274 ret = _unpack(indexformatng, ret)
274 ret = _unpack(indexformatng, ret)
275 yield ret[7]
275 yield ret[7]
276 def __getitem__(self, key):
276 def __getitem__(self, key):
277 try:
277 try:
278 return self.p.map[key]
278 return self.p.map[key]
279 except KeyError:
279 except KeyError:
280 try:
280 try:
281 self.load(key)
281 self.load(key)
282 return self.p.map[key]
282 return self.p.map[key]
283 except KeyError:
283 except KeyError:
284 raise KeyError("node " + hex(key))
284 raise KeyError("node " + hex(key))
285 def __setitem__(self, key, val):
285 def __setitem__(self, key, val):
286 self.p.map[key] = val
286 self.p.map[key] = val
287 def __delitem__(self, key):
287 def __delitem__(self, key):
288 del self.p.map[key]
288 del self.p.map[key]
289
289
290 indexformatv0 = ">4l20s20s20s"
290 indexformatv0 = ">4l20s20s20s"
291 v0shaoffset = 56
291 v0shaoffset = 56
292
292
293 class revlogoldio(object):
293 class revlogoldio(object):
294 def __init__(self):
294 def __init__(self):
295 self.size = struct.calcsize(indexformatv0)
295 self.size = struct.calcsize(indexformatv0)
296
296
297 def parseindex(self, fp, inline):
297 def parseindex(self, fp, inline):
298 s = self.size
298 s = self.size
299 index = []
299 index = []
300 nodemap = {nullid: nullrev}
300 nodemap = {nullid: nullrev}
301 n = off = 0
301 n = off = 0
302 data = fp.read()
302 data = fp.read()
303 l = len(data)
303 l = len(data)
304 while off + s <= l:
304 while off + s <= l:
305 cur = data[off:off + s]
305 cur = data[off:off + s]
306 off += s
306 off += s
307 e = _unpack(indexformatv0, cur)
307 e = _unpack(indexformatv0, cur)
308 # transform to revlogv1 format
308 # transform to revlogv1 format
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 nodemap[e[4]], nodemap[e[5]], e[6])
310 nodemap[e[4]], nodemap[e[5]], e[6])
311 index.append(e2)
311 index.append(e2)
312 nodemap[e[6]] = n
312 nodemap[e[6]] = n
313 n += 1
313 n += 1
314
314
315 return index, nodemap, None
315 return index, nodemap, None
316
316
317 def packentry(self, entry, node, version, rev):
317 def packentry(self, entry, node, version, rev):
318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
319 node(entry[5]), node(entry[6]), entry[7])
319 node(entry[5]), node(entry[6]), entry[7])
320 return _pack(indexformatv0, *e2)
320 return _pack(indexformatv0, *e2)
321
321
322 # index ng:
322 # index ng:
323 # 6 bytes offset
323 # 6 bytes offset
324 # 2 bytes flags
324 # 2 bytes flags
325 # 4 bytes compressed length
325 # 4 bytes compressed length
326 # 4 bytes uncompressed length
326 # 4 bytes uncompressed length
327 # 4 bytes: base rev
327 # 4 bytes: base rev
328 # 4 bytes link rev
328 # 4 bytes link rev
329 # 4 bytes parent 1 rev
329 # 4 bytes parent 1 rev
330 # 4 bytes parent 2 rev
330 # 4 bytes parent 2 rev
331 # 32 bytes: nodeid
331 # 32 bytes: nodeid
332 indexformatng = ">Qiiiiii20s12x"
332 indexformatng = ">Qiiiiii20s12x"
333 ngshaoffset = 32
333 ngshaoffset = 32
334 versionformat = ">I"
334 versionformat = ">I"
335
335
336 class revlogio(object):
336 class revlogio(object):
337 def __init__(self):
337 def __init__(self):
338 self.size = struct.calcsize(indexformatng)
338 self.size = struct.calcsize(indexformatng)
339
339
340 def parseindex(self, fp, inline):
340 def parseindex(self, fp, inline):
341 try:
341 try:
342 size = util.fstat(fp).st_size
342 size = util.fstat(fp).st_size
343 except AttributeError:
343 except AttributeError:
344 size = 0
344 size = 0
345
345
346 if lazyparser.safe_to_use and not inline and size > 1000000:
346 if lazyparser.safe_to_use and not inline and size > 1000000:
347 # big index, let's parse it on demand
347 # big index, let's parse it on demand
348 parser = lazyparser(fp, size)
348 parser = lazyparser(fp, size)
349 index = lazyindex(parser)
349 index = lazyindex(parser)
350 nodemap = lazymap(parser)
350 nodemap = lazymap(parser)
351 e = list(index[0])
351 e = list(index[0])
352 type = gettype(e[0])
352 type = gettype(e[0])
353 e[0] = offset_type(0, type)
353 e[0] = offset_type(0, type)
354 index[0] = e
354 index[0] = e
355 return index, nodemap, None
355 return index, nodemap, None
356
356
357 s = self.size
357 s = self.size
358 cache = None
358 cache = None
359 index = []
359 index = []
360 nodemap = {nullid: nullrev}
360 nodemap = {nullid: nullrev}
361 n = off = 0
361 n = off = 0
362 # if we're not using lazymap, always read the whole index
362 # if we're not using lazymap, always read the whole index
363 data = fp.read()
363 data = fp.read()
364 l = len(data) - s
364 l = len(data) - s
365 append = index.append
365 append = index.append
366 if inline:
366 if inline:
367 cache = (0, data)
367 cache = (0, data)
368 while off <= l:
368 while off <= l:
369 e = _unpack(indexformatng, data[off:off + s])
369 e = _unpack(indexformatng, data[off:off + s])
370 nodemap[e[7]] = n
370 nodemap[e[7]] = n
371 append(e)
371 append(e)
372 n += 1
372 n += 1
373 if e[1] < 0:
373 if e[1] < 0:
374 break
374 break
375 off += e[1] + s
375 off += e[1] + s
376 else:
376 else:
377 while off <= l:
377 while off <= l:
378 e = _unpack(indexformatng, data[off:off + s])
378 e = _unpack(indexformatng, data[off:off + s])
379 nodemap[e[7]] = n
379 nodemap[e[7]] = n
380 append(e)
380 append(e)
381 n += 1
381 n += 1
382 off += s
382 off += s
383
383
384 e = list(index[0])
384 e = list(index[0])
385 type = gettype(e[0])
385 type = gettype(e[0])
386 e[0] = offset_type(0, type)
386 e[0] = offset_type(0, type)
387 index[0] = e
387 index[0] = e
388
388
389 return index, nodemap, cache
389 return index, nodemap, cache
390
390
391 def packentry(self, entry, node, version, rev):
391 def packentry(self, entry, node, version, rev):
392 p = _pack(indexformatng, *entry)
392 p = _pack(indexformatng, *entry)
393 if rev == 0:
393 if rev == 0:
394 p = _pack(versionformat, version) + p[4:]
394 p = _pack(versionformat, version) + p[4:]
395 return p
395 return p
396
396
397 class revlog(object):
397 class revlog(object):
398 """
398 """
399 the underlying revision storage object
399 the underlying revision storage object
400
400
401 A revlog consists of two parts, an index and the revision data.
401 A revlog consists of two parts, an index and the revision data.
402
402
403 The index is a file with a fixed record size containing
403 The index is a file with a fixed record size containing
404 information on each revision, includings its nodeid (hash), the
404 information on each revision, includings its nodeid (hash), the
405 nodeids of its parents, the position and offset of its data within
405 nodeids of its parents, the position and offset of its data within
406 the data file, and the revision it's based on. Finally, each entry
406 the data file, and the revision it's based on. Finally, each entry
407 contains a linkrev entry that can serve as a pointer to external
407 contains a linkrev entry that can serve as a pointer to external
408 data.
408 data.
409
409
410 The revision data itself is a linear collection of data chunks.
410 The revision data itself is a linear collection of data chunks.
411 Each chunk represents a revision and is usually represented as a
411 Each chunk represents a revision and is usually represented as a
412 delta against the previous chunk. To bound lookup time, runs of
412 delta against the previous chunk. To bound lookup time, runs of
413 deltas are limited to about 2 times the length of the original
413 deltas are limited to about 2 times the length of the original
414 version data. This makes retrieval of a version proportional to
414 version data. This makes retrieval of a version proportional to
415 its size, or O(1) relative to the number of revisions.
415 its size, or O(1) relative to the number of revisions.
416
416
417 Both pieces of the revlog are written to in an append-only
417 Both pieces of the revlog are written to in an append-only
418 fashion, which means we never need to rewrite a file to insert or
418 fashion, which means we never need to rewrite a file to insert or
419 remove data, and can use some simple techniques to avoid the need
419 remove data, and can use some simple techniques to avoid the need
420 for locking while reading.
420 for locking while reading.
421 """
421 """
422 def __init__(self, opener, indexfile):
422 def __init__(self, opener, indexfile):
423 """
423 """
424 create a revlog object
424 create a revlog object
425
425
426 opener is a function that abstracts the file opening operation
426 opener is a function that abstracts the file opening operation
427 and can be used to implement COW semantics or the like.
427 and can be used to implement COW semantics or the like.
428 """
428 """
429 self.indexfile = indexfile
429 self.indexfile = indexfile
430 self.datafile = indexfile[:-2] + ".d"
430 self.datafile = indexfile[:-2] + ".d"
431 self.opener = opener
431 self.opener = opener
432 self._cache = None
432 self._cache = None
433 self._chunkcache = None
433 self._chunkcache = None
434 self.nodemap = {nullid: nullrev}
434 self.nodemap = {nullid: nullrev}
435 self.index = []
435 self.index = []
436
436
437 v = REVLOG_DEFAULT_VERSION
437 v = REVLOG_DEFAULT_VERSION
438 if hasattr(opener, "defversion"):
438 if hasattr(opener, "defversion"):
439 v = opener.defversion
439 v = opener.defversion
440 if v & REVLOGNG:
440 if v & REVLOGNG:
441 v |= REVLOGNGINLINEDATA
441 v |= REVLOGNGINLINEDATA
442
442
443 i = ""
443 i = ""
444 try:
444 try:
445 f = self.opener(self.indexfile)
445 f = self.opener(self.indexfile)
446 i = f.read(4)
446 i = f.read(4)
447 f.seek(0)
447 f.seek(0)
448 if len(i) > 0:
448 if len(i) > 0:
449 v = struct.unpack(versionformat, i)[0]
449 v = struct.unpack(versionformat, i)[0]
450 except IOError, inst:
450 except IOError, inst:
451 if inst.errno != errno.ENOENT:
451 if inst.errno != errno.ENOENT:
452 raise
452 raise
453
453
454 self.version = v
454 self.version = v
455 self._inline = v & REVLOGNGINLINEDATA
455 self._inline = v & REVLOGNGINLINEDATA
456 flags = v & ~0xFFFF
456 flags = v & ~0xFFFF
457 fmt = v & 0xFFFF
457 fmt = v & 0xFFFF
458 if fmt == REVLOGV0 and flags:
458 if fmt == REVLOGV0 and flags:
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 % (self.indexfile, flags >> 16))
460 % (self.indexfile, flags >> 16))
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 % (self.indexfile, flags >> 16))
463 % (self.indexfile, flags >> 16))
464 elif fmt > REVLOGNG:
464 elif fmt > REVLOGNG:
465 raise RevlogError(_("index %s unknown format %d")
465 raise RevlogError(_("index %s unknown format %d")
466 % (self.indexfile, fmt))
466 % (self.indexfile, fmt))
467
467
468 self._io = revlogio()
468 self._io = revlogio()
469 if self.version == REVLOGV0:
469 if self.version == REVLOGV0:
470 self._io = revlogoldio()
470 self._io = revlogoldio()
471 if i:
471 if i:
472 d = self._io.parseindex(f, self._inline)
472 d = self._io.parseindex(f, self._inline)
473 self.index, self.nodemap, self._chunkcache = d
473 self.index, self.nodemap, self._chunkcache = d
474
474
475 # add the magic null revision at -1
475 # add the magic null revision at -1
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477
477
478 def _loadindex(self, start, end):
478 def _loadindex(self, start, end):
479 """load a block of indexes all at once from the lazy parser"""
479 """load a block of indexes all at once from the lazy parser"""
480 if isinstance(self.index, lazyindex):
480 if isinstance(self.index, lazyindex):
481 self.index.p.loadindex(start, end)
481 self.index.p.loadindex(start, end)
482
482
483 def _loadindexmap(self):
483 def _loadindexmap(self):
484 """loads both the map and the index from the lazy parser"""
484 """loads both the map and the index from the lazy parser"""
485 if isinstance(self.index, lazyindex):
485 if isinstance(self.index, lazyindex):
486 p = self.index.p
486 p = self.index.p
487 p.loadindex()
487 p.loadindex()
488 self.nodemap = p.map
488 self.nodemap = p.map
489
489
490 def _loadmap(self):
490 def _loadmap(self):
491 """loads the map from the lazy parser"""
491 """loads the map from the lazy parser"""
492 if isinstance(self.nodemap, lazymap):
492 if isinstance(self.nodemap, lazymap):
493 self.nodemap.p.loadmap()
493 self.nodemap.p.loadmap()
494 self.nodemap = self.nodemap.p.map
494 self.nodemap = self.nodemap.p.map
495
495
496 def tip(self):
496 def tip(self):
497 return self.node(len(self.index) - 2)
497 return self.node(len(self.index) - 2)
498 def count(self):
498 def count(self):
499 return len(self.index) - 1
499 return len(self.index) - 1
500
500
501 def rev(self, node):
501 def rev(self, node):
502 try:
502 try:
503 return self.nodemap[node]
503 return self.nodemap[node]
504 except KeyError:
504 except KeyError:
505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 def node(self, rev):
506 def node(self, rev):
507 return self.index[rev][7]
507 return self.index[rev][7]
508 def linkrev(self, node):
508 def linkrev(self, node):
509 return self.index[self.rev(node)][4]
509 return self.index[self.rev(node)][4]
510 def parents(self, node):
510 def parents(self, node):
511 d = self.index[self.rev(node)][5:7]
511 d = self.index[self.rev(node)][5:7]
512 return (self.node(d[0]), self.node(d[1]))
512 return (self.node(d[0]), self.node(d[1]))
513 def parentrevs(self, rev):
513 def parentrevs(self, rev):
514 return self.index[rev][5:7]
514 return self.index[rev][5:7]
515 def start(self, rev):
515 def start(self, rev):
516 return int(self.index[rev][0] >> 16)
516 return int(self.index[rev][0] >> 16)
517 def end(self, rev):
517 def end(self, rev):
518 return self.start(rev) + self.length(rev)
518 return self.start(rev) + self.length(rev)
519 def length(self, rev):
519 def length(self, rev):
520 return self.index[rev][1]
520 return self.index[rev][1]
521 def base(self, rev):
521 def base(self, rev):
522 return self.index[rev][3]
522 return self.index[rev][3]
523
523
524 def size(self, rev):
524 def size(self, rev):
525 """return the length of the uncompressed text for a given revision"""
525 """return the length of the uncompressed text for a given revision"""
526 l = self.index[rev][2]
526 l = self.index[rev][2]
527 if l >= 0:
527 if l >= 0:
528 return l
528 return l
529
529
530 t = self.revision(self.node(rev))
530 t = self.revision(self.node(rev))
531 return len(t)
531 return len(t)
532
532
533 # alternate implementation, The advantage to this code is it
533 # alternate implementation, The advantage to this code is it
534 # will be faster for a single revision. But, the results are not
534 # will be faster for a single revision. But, the results are not
535 # cached, so finding the size of every revision will be slower.
535 # cached, so finding the size of every revision will be slower.
536 """
536 """
537 if self.cache and self.cache[1] == rev:
537 if self.cache and self.cache[1] == rev:
538 return len(self.cache[2])
538 return len(self.cache[2])
539
539
540 base = self.base(rev)
540 base = self.base(rev)
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 base = self.cache[1]
542 base = self.cache[1]
543 text = self.cache[2]
543 text = self.cache[2]
544 else:
544 else:
545 text = self.revision(self.node(base))
545 text = self.revision(self.node(base))
546
546
547 l = len(text)
547 l = len(text)
548 for x in xrange(base + 1, rev + 1):
548 for x in xrange(base + 1, rev + 1):
549 l = mdiff.patchedsize(l, self.chunk(x))
549 l = mdiff.patchedsize(l, self.chunk(x))
550 return l
550 return l
551 """
551 """
552
552
553 def reachable(self, node, stop=None):
553 def reachable(self, node, stop=None):
554 """return a hash of all nodes ancestral to a given node, including
554 """return a hash of all nodes ancestral to a given node, including
555 the node itself, stopping when stop is matched"""
555 the node itself, stopping when stop is matched"""
556 reachable = {}
556 reachable = {}
557 visit = [node]
557 visit = [node]
558 reachable[node] = 1
558 reachable[node] = 1
559 if stop:
559 if stop:
560 stopn = self.rev(stop)
560 stopn = self.rev(stop)
561 else:
561 else:
562 stopn = 0
562 stopn = 0
563 while visit:
563 while visit:
564 n = visit.pop(0)
564 n = visit.pop(0)
565 if n == stop:
565 if n == stop:
566 continue
566 continue
567 if n == nullid:
567 if n == nullid:
568 continue
568 continue
569 for p in self.parents(n):
569 for p in self.parents(n):
570 if self.rev(p) < stopn:
570 if self.rev(p) < stopn:
571 continue
571 continue
572 if p not in reachable:
572 if p not in reachable:
573 reachable[p] = 1
573 reachable[p] = 1
574 visit.append(p)
574 visit.append(p)
575 return reachable
575 return reachable
576
576
577 def nodesbetween(self, roots=None, heads=None):
577 def nodesbetween(self, roots=None, heads=None):
578 """Return a tuple containing three elements. Elements 1 and 2 contain
578 """Return a tuple containing three elements. Elements 1 and 2 contain
579 a final list bases and heads after all the unreachable ones have been
579 a final list bases and heads after all the unreachable ones have been
580 pruned. Element 0 contains a topologically sorted list of all
580 pruned. Element 0 contains a topologically sorted list of all
581
581
582 nodes that satisfy these constraints:
582 nodes that satisfy these constraints:
583 1. All nodes must be descended from a node in roots (the nodes on
583 1. All nodes must be descended from a node in roots (the nodes on
584 roots are considered descended from themselves).
584 roots are considered descended from themselves).
585 2. All nodes must also be ancestors of a node in heads (the nodes in
585 2. All nodes must also be ancestors of a node in heads (the nodes in
586 heads are considered to be their own ancestors).
586 heads are considered to be their own ancestors).
587
587
588 If roots is unspecified, nullid is assumed as the only root.
588 If roots is unspecified, nullid is assumed as the only root.
589 If heads is unspecified, it is taken to be the output of the
589 If heads is unspecified, it is taken to be the output of the
590 heads method (i.e. a list of all nodes in the repository that
590 heads method (i.e. a list of all nodes in the repository that
591 have no children)."""
591 have no children)."""
592 nonodes = ([], [], [])
592 nonodes = ([], [], [])
593 if roots is not None:
593 if roots is not None:
594 roots = list(roots)
594 roots = list(roots)
595 if not roots:
595 if not roots:
596 return nonodes
596 return nonodes
597 lowestrev = min([self.rev(n) for n in roots])
597 lowestrev = min([self.rev(n) for n in roots])
598 else:
598 else:
599 roots = [nullid] # Everybody's a descendent of nullid
599 roots = [nullid] # Everybody's a descendent of nullid
600 lowestrev = nullrev
600 lowestrev = nullrev
601 if (lowestrev == nullrev) and (heads is None):
601 if (lowestrev == nullrev) and (heads is None):
602 # We want _all_ the nodes!
602 # We want _all_ the nodes!
603 return ([self.node(r) for r in xrange(0, self.count())],
603 return ([self.node(r) for r in xrange(0, self.count())],
604 [nullid], list(self.heads()))
604 [nullid], list(self.heads()))
605 if heads is None:
605 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
606 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
607 # node.
608 highestrev = self.count() - 1
608 highestrev = self.count() - 1
609 # Set ancestors to None to signal that every node is an ancestor.
609 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
610 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
611 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
612 heads = {}
613 else:
613 else:
614 heads = list(heads)
614 heads = list(heads)
615 if not heads:
615 if not heads:
616 return nonodes
616 return nonodes
617 ancestors = {}
617 ancestors = {}
618 # Turn heads into a dictionary so we can remove 'fake' heads.
618 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
619 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
620 # find from roots.
621 heads = dict.fromkeys(heads, 0)
621 heads = dict.fromkeys(heads, 0)
622 # Start at the top and keep marking parents until we're done.
622 # Start at the top and keep marking parents until we're done.
623 nodestotag = heads.keys()
623 nodestotag = heads.keys()
624 # Remember where the top was so we can use it as a limit later.
624 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
625 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
626 while nodestotag:
627 # grab a node to tag
627 # grab a node to tag
628 n = nodestotag.pop()
628 n = nodestotag.pop()
629 # Never tag nullid
629 # Never tag nullid
630 if n == nullid:
630 if n == nullid:
631 continue
631 continue
632 # A node's revision number represents its place in a
632 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
633 # topologically sorted list of nodes.
634 r = self.rev(n)
634 r = self.rev(n)
635 if r >= lowestrev:
635 if r >= lowestrev:
636 if n not in ancestors:
636 if n not in ancestors:
637 # If we are possibly a descendent of one of the roots
637 # If we are possibly a descendent of one of the roots
638 # and we haven't already been marked as an ancestor
638 # and we haven't already been marked as an ancestor
639 ancestors[n] = 1 # Mark as ancestor
639 ancestors[n] = 1 # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
640 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.extend([p for p in self.parents(n) if
641 nodestotag.extend([p for p in self.parents(n) if
642 p != nullid])
642 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
643 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
644 # So it is, real heads should not be the ancestors of
645 # any other heads.
645 # any other heads.
646 heads.pop(n)
646 heads.pop(n)
647 if not ancestors:
647 if not ancestors:
648 return nonodes
648 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
649 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
650 # roots that are not ancestors.
651
651
652 # If one of the roots was nullid, everything is included anyway.
652 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
653 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
654 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
655 # include roots that aren't ancestors.
656
656
657 # Filter out roots that aren't ancestors of heads
657 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
658 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
659 # Recompute the lowest revision
660 if roots:
660 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 # No more roots? Return empty list
663 # No more roots? Return empty list
664 return nonodes
664 return nonodes
665 else:
665 else:
666 # We are descending from nullid, and don't need to care about
666 # We are descending from nullid, and don't need to care about
667 # any other roots.
667 # any other roots.
668 lowestrev = nullrev
668 lowestrev = nullrev
669 roots = [nullid]
669 roots = [nullid]
670 # Transform our roots list into a 'set' (i.e. a dictionary where the
670 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 # values don't matter.
671 # values don't matter.
672 descendents = dict.fromkeys(roots, 1)
672 descendents = dict.fromkeys(roots, 1)
673 # Also, keep the original roots so we can filter out roots that aren't
673 # Also, keep the original roots so we can filter out roots that aren't
674 # 'real' roots (i.e. are descended from other roots).
674 # 'real' roots (i.e. are descended from other roots).
675 roots = descendents.copy()
675 roots = descendents.copy()
676 # Our topologically sorted list of output nodes.
676 # Our topologically sorted list of output nodes.
677 orderedout = []
677 orderedout = []
678 # Don't start at nullid since we don't want nullid in our output list,
678 # Don't start at nullid since we don't want nullid in our output list,
679 # and if nullid shows up in descedents, empty parents will look like
679 # and if nullid shows up in descedents, empty parents will look like
680 # they're descendents.
680 # they're descendents.
681 for r in xrange(max(lowestrev, 0), highestrev + 1):
681 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 n = self.node(r)
682 n = self.node(r)
683 isdescendent = False
683 isdescendent = False
684 if lowestrev == nullrev: # Everybody is a descendent of nullid
684 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 isdescendent = True
685 isdescendent = True
686 elif n in descendents:
686 elif n in descendents:
687 # n is already a descendent
687 # n is already a descendent
688 isdescendent = True
688 isdescendent = True
689 # This check only needs to be done here because all the roots
689 # This check only needs to be done here because all the roots
690 # will start being marked is descendents before the loop.
690 # will start being marked is descendents before the loop.
691 if n in roots:
691 if n in roots:
692 # If n was a root, check if it's a 'real' root.
692 # If n was a root, check if it's a 'real' root.
693 p = tuple(self.parents(n))
693 p = tuple(self.parents(n))
694 # If any of its parents are descendents, it's not a root.
694 # If any of its parents are descendents, it's not a root.
695 if (p[0] in descendents) or (p[1] in descendents):
695 if (p[0] in descendents) or (p[1] in descendents):
696 roots.pop(n)
696 roots.pop(n)
697 else:
697 else:
698 p = tuple(self.parents(n))
698 p = tuple(self.parents(n))
699 # A node is a descendent if either of its parents are
699 # A node is a descendent if either of its parents are
700 # descendents. (We seeded the dependents list with the roots
700 # descendents. (We seeded the dependents list with the roots
701 # up there, remember?)
701 # up there, remember?)
702 if (p[0] in descendents) or (p[1] in descendents):
702 if (p[0] in descendents) or (p[1] in descendents):
703 descendents[n] = 1
703 descendents[n] = 1
704 isdescendent = True
704 isdescendent = True
705 if isdescendent and ((ancestors is None) or (n in ancestors)):
705 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 # Only include nodes that are both descendents and ancestors.
706 # Only include nodes that are both descendents and ancestors.
707 orderedout.append(n)
707 orderedout.append(n)
708 if (ancestors is not None) and (n in heads):
708 if (ancestors is not None) and (n in heads):
709 # We're trying to figure out which heads are reachable
709 # We're trying to figure out which heads are reachable
710 # from roots.
710 # from roots.
711 # Mark this head as having been reached
711 # Mark this head as having been reached
712 heads[n] = 1
712 heads[n] = 1
713 elif ancestors is None:
713 elif ancestors is None:
714 # Otherwise, we're trying to discover the heads.
714 # Otherwise, we're trying to discover the heads.
715 # Assume this is a head because if it isn't, the next step
715 # Assume this is a head because if it isn't, the next step
716 # will eventually remove it.
716 # will eventually remove it.
717 heads[n] = 1
717 heads[n] = 1
718 # But, obviously its parents aren't.
718 # But, obviously its parents aren't.
719 for p in self.parents(n):
719 for p in self.parents(n):
720 heads.pop(p, None)
720 heads.pop(p, None)
721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 roots = roots.keys()
722 roots = roots.keys()
723 assert orderedout
723 assert orderedout
724 assert roots
724 assert roots
725 assert heads
725 assert heads
726 return (orderedout, roots, heads)
726 return (orderedout, roots, heads)
727
727
728 def heads(self, start=None, stop=None):
728 def heads(self, start=None, stop=None):
729 """return the list of all nodes that have no children
729 """return the list of all nodes that have no children
730
730
731 if start is specified, only heads that are descendants of
731 if start is specified, only heads that are descendants of
732 start will be returned
732 start will be returned
733 if stop is specified, it will consider all the revs from stop
733 if stop is specified, it will consider all the revs from stop
734 as if they had no children
734 as if they had no children
735 """
735 """
736 if start is None and stop is None:
736 if start is None and stop is None:
737 count = self.count()
737 count = self.count()
738 if not count:
738 if not count:
739 return [nullid]
739 return [nullid]
740 ishead = [1] * (count + 1)
740 ishead = [1] * (count + 1)
741 index = self.index
741 index = self.index
742 for r in xrange(count):
742 for r in xrange(count):
743 e = index[r]
743 e = index[r]
744 ishead[e[5]] = ishead[e[6]] = 0
744 ishead[e[5]] = ishead[e[6]] = 0
745 return [self.node(r) for r in xrange(count) if ishead[r]]
745 return [self.node(r) for r in xrange(count) if ishead[r]]
746
746
747 if start is None:
747 if start is None:
748 start = nullid
748 start = nullid
749 if stop is None:
749 if stop is None:
750 stop = []
750 stop = []
751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 startrev = self.rev(start)
752 startrev = self.rev(start)
753 reachable = {startrev: 1}
753 reachable = {startrev: 1}
754 heads = {startrev: 1}
754 heads = {startrev: 1}
755
755
756 parentrevs = self.parentrevs
756 parentrevs = self.parentrevs
757 for r in xrange(startrev + 1, self.count()):
757 for r in xrange(startrev + 1, self.count()):
758 for p in parentrevs(r):
758 for p in parentrevs(r):
759 if p in reachable:
759 if p in reachable:
760 if r not in stoprevs:
760 if r not in stoprevs:
761 reachable[r] = 1
761 reachable[r] = 1
762 heads[r] = 1
762 heads[r] = 1
763 if p in heads and p not in stoprevs:
763 if p in heads and p not in stoprevs:
764 del heads[p]
764 del heads[p]
765
765
766 return [self.node(r) for r in heads]
766 return [self.node(r) for r in heads]
767
767
768 def children(self, node):
768 def children(self, node):
769 """find the children of a given node"""
769 """find the children of a given node"""
770 c = []
770 c = []
771 p = self.rev(node)
771 p = self.rev(node)
772 for r in range(p + 1, self.count()):
772 for r in range(p + 1, self.count()):
773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 if prevs:
774 if prevs:
775 for pr in prevs:
775 for pr in prevs:
776 if pr == p:
776 if pr == p:
777 c.append(self.node(r))
777 c.append(self.node(r))
778 elif p == nullrev:
778 elif p == nullrev:
779 c.append(self.node(r))
779 c.append(self.node(r))
780 return c
780 return c
781
781
782 def _match(self, id):
782 def _match(self, id):
783 if isinstance(id, (long, int)):
783 if isinstance(id, (long, int)):
784 # rev
784 # rev
785 return self.node(id)
785 return self.node(id)
786 if len(id) == 20:
786 if len(id) == 20:
787 # possibly a binary node
787 # possibly a binary node
788 # odds of a binary node being all hex in ASCII are 1 in 10**25
788 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 try:
789 try:
790 node = id
790 node = id
791 r = self.rev(node) # quick search the index
791 r = self.rev(node) # quick search the index
792 return node
792 return node
793 except LookupError:
793 except LookupError:
794 pass # may be partial hex id
794 pass # may be partial hex id
795 try:
795 try:
796 # str(rev)
796 # str(rev)
797 rev = int(id)
797 rev = int(id)
798 if str(rev) != id:
798 if str(rev) != id:
799 raise ValueError
799 raise ValueError
800 if rev < 0:
800 if rev < 0:
801 rev = self.count() + rev
801 rev = self.count() + rev
802 if rev < 0 or rev >= self.count():
802 if rev < 0 or rev >= self.count():
803 raise ValueError
803 raise ValueError
804 return self.node(rev)
804 return self.node(rev)
805 except (ValueError, OverflowError):
805 except (ValueError, OverflowError):
806 pass
806 pass
807 if len(id) == 40:
807 if len(id) == 40:
808 try:
808 try:
809 # a full hex nodeid?
809 # a full hex nodeid?
810 node = bin(id)
810 node = bin(id)
811 r = self.rev(node)
811 r = self.rev(node)
812 return node
812 return node
813 except TypeError:
813 except TypeError:
814 pass
814 pass
815
815
816 def _partialmatch(self, id):
816 def _partialmatch(self, id):
817 if len(id) < 40:
817 if len(id) < 40:
818 try:
818 try:
819 # hex(node)[:...]
819 # hex(node)[:...]
820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 node = None
821 node = None
822 for n in self.nodemap:
822 for n in self.nodemap:
823 if n.startswith(bin_id) and hex(n).startswith(id):
823 if n.startswith(bin_id) and hex(n).startswith(id):
824 if node is not None:
824 if node is not None:
825 raise LookupError(_("Ambiguous identifier"))
825 raise LookupError(_("Ambiguous identifier"))
826 node = n
826 node = n
827 if node is not None:
827 if node is not None:
828 return node
828 return node
829 except TypeError:
829 except TypeError:
830 pass
830 pass
831
831
832 def lookup(self, id):
832 def lookup(self, id):
833 """locate a node based on:
833 """locate a node based on:
834 - revision number or str(revision number)
834 - revision number or str(revision number)
835 - nodeid or subset of hex nodeid
835 - nodeid or subset of hex nodeid
836 """
836 """
837 n = self._match(id)
837 n = self._match(id)
838 if n is not None:
838 if n is not None:
839 return n
839 return n
840 n = self._partialmatch(id)
840 n = self._partialmatch(id)
841 if n:
841 if n:
842 return n
842 return n
843
843
844 raise LookupError(_("No match found"))
844 raise LookupError(_("No match found"))
845
845
846 def cmp(self, node, text):
846 def cmp(self, node, text):
847 """compare text with a given file revision"""
847 """compare text with a given file revision"""
848 p1, p2 = self.parents(node)
848 p1, p2 = self.parents(node)
849 return hash(text, p1, p2) != node
849 return hash(text, p1, p2) != node
850
850
851 def chunk(self, rev, df=None):
851 def chunk(self, rev, df=None):
852 def loadcache(df):
852 def loadcache(df):
853 if not df:
853 if not df:
854 if self._inline:
854 if self._inline:
855 df = self.opener(self.indexfile)
855 df = self.opener(self.indexfile)
856 else:
856 else:
857 df = self.opener(self.datafile)
857 df = self.opener(self.datafile)
858 df.seek(start)
858 df.seek(start)
859 self._chunkcache = (start, df.read(cache_length))
859 self._chunkcache = (start, df.read(cache_length))
860
860
861 start, length = self.start(rev), self.length(rev)
861 start, length = self.start(rev), self.length(rev)
862 if self._inline:
862 if self._inline:
863 start += (rev + 1) * self._io.size
863 start += (rev + 1) * self._io.size
864 end = start + length
864 end = start + length
865
865
866 offset = 0
866 offset = 0
867 if not self._chunkcache:
867 if not self._chunkcache:
868 cache_length = max(65536, length)
868 cache_length = max(65536, length)
869 loadcache(df)
869 loadcache(df)
870 else:
870 else:
871 cache_start = self._chunkcache[0]
871 cache_start = self._chunkcache[0]
872 cache_length = len(self._chunkcache[1])
872 cache_length = len(self._chunkcache[1])
873 cache_end = cache_start + cache_length
873 cache_end = cache_start + cache_length
874 if start >= cache_start and end <= cache_end:
874 if start >= cache_start and end <= cache_end:
875 # it is cached
875 # it is cached
876 offset = start - cache_start
876 offset = start - cache_start
877 else:
877 else:
878 cache_length = max(65536, length)
878 cache_length = max(65536, length)
879 loadcache(df)
879 loadcache(df)
880
880
881 # avoid copying large chunks
881 # avoid copying large chunks
882 c = self._chunkcache[1]
882 c = self._chunkcache[1]
883 if cache_length != length:
883 if cache_length != length:
884 c = c[offset:offset + length]
884 c = c[offset:offset + length]
885
885
886 return decompress(c)
886 return decompress(c)
887
887
888 def delta(self, node):
888 def delta(self, node):
889 """return or calculate a delta between a node and its predecessor"""
889 """return or calculate a delta between a node and its predecessor"""
890 r = self.rev(node)
890 r = self.rev(node)
891 return self.revdiff(r - 1, r)
891 return self.revdiff(r - 1, r)
892
892
893 def revdiff(self, rev1, rev2):
893 def revdiff(self, rev1, rev2):
894 """return or calculate a delta between two revisions"""
894 """return or calculate a delta between two revisions"""
895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
896 return self.chunk(rev2)
896 return self.chunk(rev2)
897
897
898 return mdiff.textdiff(self.revision(self.node(rev1)),
898 return mdiff.textdiff(self.revision(self.node(rev1)),
899 self.revision(self.node(rev2)))
899 self.revision(self.node(rev2)))
900
900
901 def revision(self, node):
901 def revision(self, node):
902 """return an uncompressed revision of a given"""
902 """return an uncompressed revision of a given"""
903 if node == nullid:
903 if node == nullid:
904 return ""
904 return ""
905 if self._cache and self._cache[0] == node:
905 if self._cache and self._cache[0] == node:
906 return self._cache[2]
906 return self._cache[2]
907
907
908 # look up what we need to read
908 # look up what we need to read
909 text = None
909 text = None
910 rev = self.rev(node)
910 rev = self.rev(node)
911 base = self.base(rev)
911 base = self.base(rev)
912
912
913 # check rev flags
913 # check rev flags
914 if self.index[rev][0] & 0xFFFF:
914 if self.index[rev][0] & 0xFFFF:
915 raise RevlogError(_('incompatible revision flag %x') %
915 raise RevlogError(_('incompatible revision flag %x') %
916 (self.index[rev][0] & 0xFFFF))
916 (self.index[rev][0] & 0xFFFF))
917
917
918 if self._inline:
918 if self._inline:
919 # we probably have the whole chunk cached
919 # we probably have the whole chunk cached
920 df = None
920 df = None
921 else:
921 else:
922 df = self.opener(self.datafile)
922 df = self.opener(self.datafile)
923
923
924 # do we have useful data cached?
924 # do we have useful data cached?
925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
926 base = self._cache[1]
926 base = self._cache[1]
927 text = self._cache[2]
927 text = self._cache[2]
928 self._loadindex(base, rev + 1)
928 self._loadindex(base, rev + 1)
929 else:
929 else:
930 self._loadindex(base, rev + 1)
930 self._loadindex(base, rev + 1)
931 text = self.chunk(base, df=df)
931 text = self.chunk(base, df=df)
932
932
933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
934 text = mdiff.patches(text, bins)
934 text = mdiff.patches(text, bins)
935 p1, p2 = self.parents(node)
935 p1, p2 = self.parents(node)
936 if node != hash(text, p1, p2):
936 if node != hash(text, p1, p2):
937 raise RevlogError(_("integrity check failed on %s:%d")
937 raise RevlogError(_("integrity check failed on %s:%d")
938 % (self.datafile, rev))
938 % (self.datafile, rev))
939
939
940 self._cache = (node, rev, text)
940 self._cache = (node, rev, text)
941 return text
941 return text
942
942
943 def checkinlinesize(self, tr, fp=None):
943 def checkinlinesize(self, tr, fp=None):
944 if not self._inline:
944 if not self._inline:
945 return
945 return
946 if not fp:
946 if not fp:
947 fp = self.opener(self.indexfile, 'r')
947 fp = self.opener(self.indexfile, 'r')
948 fp.seek(0, 2)
948 fp.seek(0, 2)
949 size = fp.tell()
949 size = fp.tell()
950 if size < 131072:
950 if size < 131072:
951 return
951 return
952 trinfo = tr.find(self.indexfile)
952 trinfo = tr.find(self.indexfile)
953 if trinfo == None:
953 if trinfo == None:
954 raise RevlogError(_("%s not found in the transaction")
954 raise RevlogError(_("%s not found in the transaction")
955 % self.indexfile)
955 % self.indexfile)
956
956
957 trindex = trinfo[2]
957 trindex = trinfo[2]
958 dataoff = self.start(trindex)
958 dataoff = self.start(trindex)
959
959
960 tr.add(self.datafile, dataoff)
960 tr.add(self.datafile, dataoff)
961 df = self.opener(self.datafile, 'w')
961 df = self.opener(self.datafile, 'w')
962 calc = self._io.size
962 calc = self._io.size
963 for r in xrange(self.count()):
963 for r in xrange(self.count()):
964 start = self.start(r) + (r + 1) * calc
964 start = self.start(r) + (r + 1) * calc
965 length = self.length(r)
965 length = self.length(r)
966 fp.seek(start)
966 fp.seek(start)
967 d = fp.read(length)
967 d = fp.read(length)
968 df.write(d)
968 df.write(d)
969 fp.close()
969 fp.close()
970 df.close()
970 df.close()
971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
972 self.version &= ~(REVLOGNGINLINEDATA)
972 self.version &= ~(REVLOGNGINLINEDATA)
973 self._inline = False
973 self._inline = False
974 for i in xrange(self.count()):
974 for i in xrange(self.count()):
975 e = self._io.packentry(self.index[i], self.node, self.version, i)
975 e = self._io.packentry(self.index[i], self.node, self.version, i)
976 fp.write(e)
976 fp.write(e)
977
977
978 # if we don't call rename, the temp file will never replace the
978 # if we don't call rename, the temp file will never replace the
979 # real index
979 # real index
980 fp.rename()
980 fp.rename()
981
981
982 tr.replace(self.indexfile, trindex * calc)
982 tr.replace(self.indexfile, trindex * calc)
983 self._chunkcache = None
983 self._chunkcache = None
984
984
985 def addrevision(self, text, transaction, link, p1, p2, d=None):
985 def addrevision(self, text, transaction, link, p1, p2, d=None):
986 """add a revision to the log
986 """add a revision to the log
987
987
988 text - the revision data to add
988 text - the revision data to add
989 transaction - the transaction object used for rollback
989 transaction - the transaction object used for rollback
990 link - the linkrev data to add
990 link - the linkrev data to add
991 p1, p2 - the parent nodeids of the revision
991 p1, p2 - the parent nodeids of the revision
992 d - an optional precomputed delta
992 d - an optional precomputed delta
993 """
993 """
994 dfh = None
994 dfh = None
995 if not self._inline:
995 if not self._inline:
996 dfh = self.opener(self.datafile, "a")
996 dfh = self.opener(self.datafile, "a")
997 ifh = self.opener(self.indexfile, "a+")
997 ifh = self.opener(self.indexfile, "a+")
998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999
999
1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 node = hash(text, p1, p2)
1001 node = hash(text, p1, p2)
1002 if node in self.nodemap:
1002 if node in self.nodemap:
1003 return node
1003 return node
1004
1004
1005 curr = self.count()
1005 curr = self.count()
1006 prev = curr - 1
1006 prev = curr - 1
1007 base = self.base(prev)
1007 base = self.base(prev)
1008 offset = self.end(prev)
1008 offset = self.end(prev)
1009
1009
1010 if curr:
1010 if curr:
1011 if not d:
1011 if not d:
1012 ptext = self.revision(self.node(prev))
1012 ptext = self.revision(self.node(prev))
1013 d = mdiff.textdiff(ptext, text)
1013 d = mdiff.textdiff(ptext, text)
1014 data = compress(d)
1014 data = compress(d)
1015 l = len(data[1]) + len(data[0])
1015 l = len(data[1]) + len(data[0])
1016 dist = l + offset - self.start(base)
1016 dist = l + offset - self.start(base)
1017
1017
1018 # full versions are inserted when the needed deltas
1018 # full versions are inserted when the needed deltas
1019 # become comparable to the uncompressed text
1019 # become comparable to the uncompressed text
1020 if not curr or dist > len(text) * 2:
1020 if not curr or dist > len(text) * 2:
1021 data = compress(text)
1021 data = compress(text)
1022 l = len(data[1]) + len(data[0])
1022 l = len(data[1]) + len(data[0])
1023 base = curr
1023 base = curr
1024
1024
1025 e = (offset_type(offset, 0), l, len(text),
1025 e = (offset_type(offset, 0), l, len(text),
1026 base, link, self.rev(p1), self.rev(p2), node)
1026 base, link, self.rev(p1), self.rev(p2), node)
1027 self.index.insert(-1, e)
1027 self.index.insert(-1, e)
1028 self.nodemap[node] = curr
1028 self.nodemap[node] = curr
1029
1029
1030 entry = self._io.packentry(e, self.node, self.version, curr)
1030 entry = self._io.packentry(e, self.node, self.version, curr)
1031 if not self._inline:
1031 if not self._inline:
1032 transaction.add(self.datafile, offset)
1032 transaction.add(self.datafile, offset)
1033 transaction.add(self.indexfile, curr * len(entry))
1033 transaction.add(self.indexfile, curr * len(entry))
1034 if data[0]:
1034 if data[0]:
1035 dfh.write(data[0])
1035 dfh.write(data[0])
1036 dfh.write(data[1])
1036 dfh.write(data[1])
1037 dfh.flush()
1037 dfh.flush()
1038 ifh.write(entry)
1038 ifh.write(entry)
1039 else:
1039 else:
1040 offset += curr * self._io.size
1040 offset += curr * self._io.size
1041 transaction.add(self.indexfile, offset, curr)
1041 transaction.add(self.indexfile, offset, curr)
1042 ifh.write(entry)
1042 ifh.write(entry)
1043 ifh.write(data[0])
1043 ifh.write(data[0])
1044 ifh.write(data[1])
1044 ifh.write(data[1])
1045 self.checkinlinesize(transaction, ifh)
1045 self.checkinlinesize(transaction, ifh)
1046
1046
1047 self._cache = (node, curr, text)
1047 self._cache = (node, curr, text)
1048 return node
1048 return node
1049
1049
1050 def ancestor(self, a, b):
1050 def ancestor(self, a, b):
1051 """calculate the least common ancestor of nodes a and b"""
1051 """calculate the least common ancestor of nodes a and b"""
1052
1052
1053 def parents(rev):
1053 def parents(rev):
1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1055
1055
1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1057 if c is None:
1057 if c is None:
1058 return nullid
1058 return nullid
1059
1059
1060 return self.node(c)
1060 return self.node(c)
1061
1061
1062 def group(self, nodelist, lookup, infocollect=None):
1062 def group(self, nodelist, lookup, infocollect=None):
1063 """calculate a delta group
1063 """calculate a delta group
1064
1064
1065 Given a list of changeset revs, return a set of deltas and
1065 Given a list of changeset revs, return a set of deltas and
1066 metadata corresponding to nodes. the first delta is
1066 metadata corresponding to nodes. the first delta is
1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 have this parent as it has all history before these
1068 have this parent as it has all history before these
1069 changesets. parent is parent[0]
1069 changesets. parent is parent[0]
1070 """
1070 """
1071 revs = [self.rev(n) for n in nodelist]
1071 revs = [self.rev(n) for n in nodelist]
1072
1072
1073 # if we don't have any revisions touched by these changesets, bail
1073 # if we don't have any revisions touched by these changesets, bail
1074 if not revs:
1074 if not revs:
1075 yield changegroup.closechunk()
1075 yield changegroup.closechunk()
1076 return
1076 return
1077
1077
1078 # add the parent of the first rev
1078 # add the parent of the first rev
1079 p = self.parents(self.node(revs[0]))[0]
1079 p = self.parents(self.node(revs[0]))[0]
1080 revs.insert(0, self.rev(p))
1080 revs.insert(0, self.rev(p))
1081
1081
1082 # build deltas
1082 # build deltas
1083 for d in xrange(0, len(revs) - 1):
1083 for d in xrange(0, len(revs) - 1):
1084 a, b = revs[d], revs[d + 1]
1084 a, b = revs[d], revs[d + 1]
1085 nb = self.node(b)
1085 nb = self.node(b)
1086
1086
1087 if infocollect is not None:
1087 if infocollect is not None:
1088 infocollect(nb)
1088 infocollect(nb)
1089
1089
1090 p = self.parents(nb)
1090 p = self.parents(nb)
1091 meta = nb + p[0] + p[1] + lookup(nb)
1091 meta = nb + p[0] + p[1] + lookup(nb)
1092 if a == -1:
1092 if a == -1:
1093 d = self.revision(nb)
1093 d = self.revision(nb)
1094 meta += mdiff.trivialdiffheader(len(d))
1094 meta += mdiff.trivialdiffheader(len(d))
1095 else:
1095 else:
1096 d = self.revdiff(a, b)
1096 d = self.revdiff(a, b)
1097 yield changegroup.chunkheader(len(meta) + len(d))
1097 yield changegroup.chunkheader(len(meta) + len(d))
1098 yield meta
1098 yield meta
1099 yield d
1099 yield d
1100
1100
1101 yield changegroup.closechunk()
1101 yield changegroup.closechunk()
1102
1102
1103 def addgroup(self, revs, linkmapper, transaction, unique=0):
1103 def addgroup(self, revs, linkmapper, transaction, unique=0):
1104 """
1104 """
1105 add a delta group
1105 add a delta group
1106
1106
1107 given a set of deltas, add them to the revision log. the
1107 given a set of deltas, add them to the revision log. the
1108 first delta is against its parent, which should be in our
1108 first delta is against its parent, which should be in our
1109 log, the rest are against the previous delta.
1109 log, the rest are against the previous delta.
1110 """
1110 """
1111
1111
1112 #track the base of the current delta log
1112 #track the base of the current delta log
1113 r = self.count()
1113 r = self.count()
1114 t = r - 1
1114 t = r - 1
1115 node = None
1115 node = None
1116
1116
1117 base = prev = nullrev
1117 base = prev = nullrev
1118 start = end = textlen = 0
1118 start = end = textlen = 0
1119 if r:
1119 if r:
1120 end = self.end(t)
1120 end = self.end(t)
1121
1121
1122 ifh = self.opener(self.indexfile, "a+")
1122 ifh = self.opener(self.indexfile, "a+")
1123 isize = r * self._io.size
1123 isize = r * self._io.size
1124 if self._inline:
1124 if self._inline:
1125 transaction.add(self.indexfile, end + isize, r)
1125 transaction.add(self.indexfile, end + isize, r)
1126 dfh = None
1126 dfh = None
1127 else:
1127 else:
1128 transaction.add(self.indexfile, isize, r)
1128 transaction.add(self.indexfile, isize, r)
1129 transaction.add(self.datafile, end)
1129 transaction.add(self.datafile, end)
1130 dfh = self.opener(self.datafile, "a")
1130 dfh = self.opener(self.datafile, "a")
1131
1131
1132 # loop through our set of deltas
1132 # loop through our set of deltas
1133 chain = None
1133 chain = None
1134 for chunk in revs:
1134 for chunk in revs:
1135 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1135 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1136 link = linkmapper(cs)
1136 link = linkmapper(cs)
1137 if node in self.nodemap:
1137 if node in self.nodemap:
1138 # this can happen if two branches make the same change
1138 # this can happen if two branches make the same change
1139 # if unique:
1139 # if unique:
1140 # raise RevlogError(_("already have %s") % hex(node[:4]))
1140 # raise RevlogError(_("already have %s") % hex(node[:4]))
1141 chain = node
1141 chain = node
1142 continue
1142 continue
1143 delta = chunk[80:]
1143 delta = buffer(chunk, 80)
1144 del chunk
1144
1145
1145 for p in (p1, p2):
1146 for p in (p1, p2):
1146 if not p in self.nodemap:
1147 if not p in self.nodemap:
1147 raise LookupError(_("unknown parent %s") % short(p))
1148 raise LookupError(_("unknown parent %s") % short(p))
1148
1149
1149 if not chain:
1150 if not chain:
1150 # retrieve the parent revision of the delta chain
1151 # retrieve the parent revision of the delta chain
1151 chain = p1
1152 chain = p1
1152 if not chain in self.nodemap:
1153 if not chain in self.nodemap:
1153 raise LookupError(_("unknown base %s") % short(chain[:4]))
1154 raise LookupError(_("unknown base %s") % short(chain[:4]))
1154
1155
1155 # full versions are inserted when the needed deltas become
1156 # full versions are inserted when the needed deltas become
1156 # comparable to the uncompressed text or when the previous
1157 # comparable to the uncompressed text or when the previous
1157 # version is not the one we have a delta against. We use
1158 # version is not the one we have a delta against. We use
1158 # the size of the previous full rev as a proxy for the
1159 # the size of the previous full rev as a proxy for the
1159 # current size.
1160 # current size.
1160
1161
1161 if chain == prev:
1162 if chain == prev:
1162 tempd = compress(delta)
1163 cdelta = compress(delta)
1163 cdelta = tempd[0] + tempd[1]
1164 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1164 textlen = mdiff.patchedsize(textlen, delta)
1165 textlen = mdiff.patchedsize(textlen, delta)
1165
1166
1166 if chain != prev or (end - start + len(cdelta)) > textlen * 2:
1167 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1167 # flush our writes here so we can read it in revision
1168 # flush our writes here so we can read it in revision
1168 if dfh:
1169 if dfh:
1169 dfh.flush()
1170 dfh.flush()
1170 ifh.flush()
1171 ifh.flush()
1171 text = self.revision(chain)
1172 text = self.revision(chain)
1172 text = mdiff.patches(text, [delta])
1173 if len(text) == 0:
1174 # skip over trivial delta header
1175 text = buffer(delta, 12)
1176 else:
1177 text = mdiff.patches(text, [delta])
1178 del delta
1173 chk = self._addrevision(text, transaction, link, p1, p2, None,
1179 chk = self._addrevision(text, transaction, link, p1, p2, None,
1174 ifh, dfh)
1180 ifh, dfh)
1175 if not dfh and not self._inline:
1181 if not dfh and not self._inline:
1176 # addrevision switched from inline to conventional
1182 # addrevision switched from inline to conventional
1177 # reopen the index
1183 # reopen the index
1178 dfh = self.opener(self.datafile, "a")
1184 dfh = self.opener(self.datafile, "a")
1179 ifh = self.opener(self.indexfile, "a")
1185 ifh = self.opener(self.indexfile, "a")
1180 if chk != node:
1186 if chk != node:
1181 raise RevlogError(_("consistency error adding group"))
1187 raise RevlogError(_("consistency error adding group"))
1182 textlen = len(text)
1188 textlen = len(text)
1183 else:
1189 else:
1184 e = (offset_type(end, 0), len(cdelta), textlen, base,
1190 e = (offset_type(end, 0), cdeltalen, textlen, base,
1185 link, self.rev(p1), self.rev(p2), node)
1191 link, self.rev(p1), self.rev(p2), node)
1186 self.index.insert(-1, e)
1192 self.index.insert(-1, e)
1187 self.nodemap[node] = r
1193 self.nodemap[node] = r
1188 entry = self._io.packentry(e, self.node, self.version, r)
1194 entry = self._io.packentry(e, self.node, self.version, r)
1189 if self._inline:
1195 if self._inline:
1190 ifh.write(entry)
1196 ifh.write(entry)
1191 ifh.write(cdelta)
1197 ifh.write(cdelta[0])
1198 ifh.write(cdelta[1])
1192 self.checkinlinesize(transaction, ifh)
1199 self.checkinlinesize(transaction, ifh)
1193 if not self._inline:
1200 if not self._inline:
1194 dfh = self.opener(self.datafile, "a")
1201 dfh = self.opener(self.datafile, "a")
1195 ifh = self.opener(self.indexfile, "a")
1202 ifh = self.opener(self.indexfile, "a")
1196 else:
1203 else:
1197 dfh.write(cdelta)
1204 dfh.write(cdelta[0])
1205 dfh.write(cdelta[1])
1198 ifh.write(entry)
1206 ifh.write(entry)
1199
1207
1200 t, r, chain, prev = r, r + 1, node, node
1208 t, r, chain, prev = r, r + 1, node, node
1201 base = self.base(t)
1209 base = self.base(t)
1202 start = self.start(base)
1210 start = self.start(base)
1203 end = self.end(t)
1211 end = self.end(t)
1204
1212
1205 return node
1213 return node
1206
1214
1207 def strip(self, rev, minlink):
1215 def strip(self, rev, minlink):
1208 if self.count() == 0 or rev >= self.count():
1216 if self.count() == 0 or rev >= self.count():
1209 return
1217 return
1210
1218
1211 if isinstance(self.index, lazyindex):
1219 if isinstance(self.index, lazyindex):
1212 self._loadindexmap()
1220 self._loadindexmap()
1213
1221
1214 # When stripping away a revision, we need to make sure it
1222 # When stripping away a revision, we need to make sure it
1215 # does not actually belong to an older changeset.
1223 # does not actually belong to an older changeset.
1216 # The minlink parameter defines the oldest revision
1224 # The minlink parameter defines the oldest revision
1217 # we're allowed to strip away.
1225 # we're allowed to strip away.
1218 while minlink > self.index[rev][4]:
1226 while minlink > self.index[rev][4]:
1219 rev += 1
1227 rev += 1
1220 if rev >= self.count():
1228 if rev >= self.count():
1221 return
1229 return
1222
1230
1223 # first truncate the files on disk
1231 # first truncate the files on disk
1224 end = self.start(rev)
1232 end = self.start(rev)
1225 if not self._inline:
1233 if not self._inline:
1226 df = self.opener(self.datafile, "a")
1234 df = self.opener(self.datafile, "a")
1227 df.truncate(end)
1235 df.truncate(end)
1228 end = rev * self._io.size
1236 end = rev * self._io.size
1229 else:
1237 else:
1230 end += rev * self._io.size
1238 end += rev * self._io.size
1231
1239
1232 indexf = self.opener(self.indexfile, "a")
1240 indexf = self.opener(self.indexfile, "a")
1233 indexf.truncate(end)
1241 indexf.truncate(end)
1234
1242
1235 # then reset internal state in memory to forget those revisions
1243 # then reset internal state in memory to forget those revisions
1236 self._cache = None
1244 self._cache = None
1237 self._chunkcache = None
1245 self._chunkcache = None
1238 for x in xrange(rev, self.count()):
1246 for x in xrange(rev, self.count()):
1239 del self.nodemap[self.node(x)]
1247 del self.nodemap[self.node(x)]
1240
1248
1241 del self.index[rev:-1]
1249 del self.index[rev:-1]
1242
1250
1243 def checksize(self):
1251 def checksize(self):
1244 expected = 0
1252 expected = 0
1245 if self.count():
1253 if self.count():
1246 expected = max(0, self.end(self.count() - 1))
1254 expected = max(0, self.end(self.count() - 1))
1247
1255
1248 try:
1256 try:
1249 f = self.opener(self.datafile)
1257 f = self.opener(self.datafile)
1250 f.seek(0, 2)
1258 f.seek(0, 2)
1251 actual = f.tell()
1259 actual = f.tell()
1252 dd = actual - expected
1260 dd = actual - expected
1253 except IOError, inst:
1261 except IOError, inst:
1254 if inst.errno != errno.ENOENT:
1262 if inst.errno != errno.ENOENT:
1255 raise
1263 raise
1256 dd = 0
1264 dd = 0
1257
1265
1258 try:
1266 try:
1259 f = self.opener(self.indexfile)
1267 f = self.opener(self.indexfile)
1260 f.seek(0, 2)
1268 f.seek(0, 2)
1261 actual = f.tell()
1269 actual = f.tell()
1262 s = self._io.size
1270 s = self._io.size
1263 i = max(0, actual / s)
1271 i = max(0, actual / s)
1264 di = actual - (i * s)
1272 di = actual - (i * s)
1265 if self._inline:
1273 if self._inline:
1266 databytes = 0
1274 databytes = 0
1267 for r in xrange(self.count()):
1275 for r in xrange(self.count()):
1268 databytes += max(0, self.length(r))
1276 databytes += max(0, self.length(r))
1269 dd = 0
1277 dd = 0
1270 di = actual - self.count() * s - databytes
1278 di = actual - self.count() * s - databytes
1271 except IOError, inst:
1279 except IOError, inst:
1272 if inst.errno != errno.ENOENT:
1280 if inst.errno != errno.ENOENT:
1273 raise
1281 raise
1274 di = 0
1282 di = 0
1275
1283
1276 return (dd, di)
1284 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now