##// END OF EJS Templates
revlog: break up compression of large deltas...
Matt Mackall -
r5451:0a438756 default
parent child Browse files
Show More
@@ -1,1291 +1,1306 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import *
13 from node import *
14 from i18n import _
14 from i18n import _
15 import binascii, changegroup, errno, ancestor, mdiff, os
15 import binascii, changegroup, errno, ancestor, mdiff, os
16 import sha, struct, util, zlib
16 import sha, struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = sha.new
22 _sha = sha.new
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34 class LookupError(RevlogError):
34 class LookupError(RevlogError):
35 pass
35 pass
36
36
37 def getoffset(q):
37 def getoffset(q):
38 return int(q >> 16)
38 return int(q >> 16)
39
39
40 def gettype(q):
40 def gettype(q):
41 return int(q & 0xFFFF)
41 return int(q & 0xFFFF)
42
42
43 def offset_type(offset, type):
43 def offset_type(offset, type):
44 return long(long(offset) << 16 | type)
44 return long(long(offset) << 16 | type)
45
45
46 def hash(text, p1, p2):
46 def hash(text, p1, p2):
47 """generate a hash from the given text and its parent hashes
47 """generate a hash from the given text and its parent hashes
48
48
49 This hash combines both the current file contents and its history
49 This hash combines both the current file contents and its history
50 in a manner that makes it easy to distinguish nodes with the same
50 in a manner that makes it easy to distinguish nodes with the same
51 content in the revision graph.
51 content in the revision graph.
52 """
52 """
53 l = [p1, p2]
53 l = [p1, p2]
54 l.sort()
54 l.sort()
55 s = _sha(l[0])
55 s = _sha(l[0])
56 s.update(l[1])
56 s.update(l[1])
57 s.update(text)
57 s.update(text)
58 return s.digest()
58 return s.digest()
59
59
60 def compress(text):
60 def compress(text):
61 """ generate a possibly-compressed representation of text """
61 """ generate a possibly-compressed representation of text """
62 if not text:
62 if not text:
63 return ("", text)
63 return ("", text)
64 if len(text) < 44:
64 l = len(text)
65 if l < 44:
65 if text[0] == '\0':
66 if text[0] == '\0':
66 return ("", text)
67 return ("", text)
67 return ('u', text)
68 return ('u', text)
68 bin = _compress(text)
69 elif l > 1000000:
69 if len(bin) > len(text):
70 # zlib makes an internal copy, thus doubling memory usage for
71 # large files, so lets do this in pieces
72 z = zlib.compressobj()
73 p = []
74 pos = 0
75 while pos < l:
76 pos2 = pos + 2**20
77 p.append(z.compress(text[pos:pos2]))
78 pos = pos2
79 p.append(z.flush())
80 if sum(map(len, p)) < l:
81 bin = "".join(p)
82 else:
83 bin = _compress(text)
84 if len(bin) > l:
70 if text[0] == '\0':
85 if text[0] == '\0':
71 return ("", text)
86 return ("", text)
72 return ('u', text)
87 return ('u', text)
73 return ("", bin)
88 return ("", bin)
74
89
75 def decompress(bin):
90 def decompress(bin):
76 """ decompress the given input """
91 """ decompress the given input """
77 if not bin:
92 if not bin:
78 return bin
93 return bin
79 t = bin[0]
94 t = bin[0]
80 if t == '\0':
95 if t == '\0':
81 return bin
96 return bin
82 if t == 'x':
97 if t == 'x':
83 return _decompress(bin)
98 return _decompress(bin)
84 if t == 'u':
99 if t == 'u':
85 return bin[1:]
100 return bin[1:]
86 raise RevlogError(_("unknown compression type %r") % t)
101 raise RevlogError(_("unknown compression type %r") % t)
87
102
88 class lazyparser(object):
103 class lazyparser(object):
89 """
104 """
90 this class avoids the need to parse the entirety of large indices
105 this class avoids the need to parse the entirety of large indices
91 """
106 """
92
107
93 # lazyparser is not safe to use on windows if win32 extensions not
108 # lazyparser is not safe to use on windows if win32 extensions not
94 # available. it keeps file handle open, which make it not possible
109 # available. it keeps file handle open, which make it not possible
95 # to break hardlinks on local cloned repos.
110 # to break hardlinks on local cloned repos.
96 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
111 safe_to_use = os.name != 'nt' or (not util.is_win_9x() and
97 hasattr(util, 'win32api'))
112 hasattr(util, 'win32api'))
98
113
99 def __init__(self, dataf, size):
114 def __init__(self, dataf, size):
100 self.dataf = dataf
115 self.dataf = dataf
101 self.s = struct.calcsize(indexformatng)
116 self.s = struct.calcsize(indexformatng)
102 self.datasize = size
117 self.datasize = size
103 self.l = size/self.s
118 self.l = size/self.s
104 self.index = [None] * self.l
119 self.index = [None] * self.l
105 self.map = {nullid: nullrev}
120 self.map = {nullid: nullrev}
106 self.allmap = 0
121 self.allmap = 0
107 self.all = 0
122 self.all = 0
108 self.mapfind_count = 0
123 self.mapfind_count = 0
109
124
110 def loadmap(self):
125 def loadmap(self):
111 """
126 """
112 during a commit, we need to make sure the rev being added is
127 during a commit, we need to make sure the rev being added is
113 not a duplicate. This requires loading the entire index,
128 not a duplicate. This requires loading the entire index,
114 which is fairly slow. loadmap can load up just the node map,
129 which is fairly slow. loadmap can load up just the node map,
115 which takes much less time.
130 which takes much less time.
116 """
131 """
117 if self.allmap:
132 if self.allmap:
118 return
133 return
119 end = self.datasize
134 end = self.datasize
120 self.allmap = 1
135 self.allmap = 1
121 cur = 0
136 cur = 0
122 count = 0
137 count = 0
123 blocksize = self.s * 256
138 blocksize = self.s * 256
124 self.dataf.seek(0)
139 self.dataf.seek(0)
125 while cur < end:
140 while cur < end:
126 data = self.dataf.read(blocksize)
141 data = self.dataf.read(blocksize)
127 off = 0
142 off = 0
128 for x in xrange(256):
143 for x in xrange(256):
129 n = data[off + ngshaoffset:off + ngshaoffset + 20]
144 n = data[off + ngshaoffset:off + ngshaoffset + 20]
130 self.map[n] = count
145 self.map[n] = count
131 count += 1
146 count += 1
132 if count >= self.l:
147 if count >= self.l:
133 break
148 break
134 off += self.s
149 off += self.s
135 cur += blocksize
150 cur += blocksize
136
151
137 def loadblock(self, blockstart, blocksize, data=None):
152 def loadblock(self, blockstart, blocksize, data=None):
138 if self.all:
153 if self.all:
139 return
154 return
140 if data is None:
155 if data is None:
141 self.dataf.seek(blockstart)
156 self.dataf.seek(blockstart)
142 if blockstart + blocksize > self.datasize:
157 if blockstart + blocksize > self.datasize:
143 # the revlog may have grown since we've started running,
158 # the revlog may have grown since we've started running,
144 # but we don't have space in self.index for more entries.
159 # but we don't have space in self.index for more entries.
145 # limit blocksize so that we don't get too much data.
160 # limit blocksize so that we don't get too much data.
146 blocksize = max(self.datasize - blockstart, 0)
161 blocksize = max(self.datasize - blockstart, 0)
147 data = self.dataf.read(blocksize)
162 data = self.dataf.read(blocksize)
148 lend = len(data) / self.s
163 lend = len(data) / self.s
149 i = blockstart / self.s
164 i = blockstart / self.s
150 off = 0
165 off = 0
151 # lazyindex supports __delitem__
166 # lazyindex supports __delitem__
152 if lend > len(self.index) - i:
167 if lend > len(self.index) - i:
153 lend = len(self.index) - i
168 lend = len(self.index) - i
154 for x in xrange(lend):
169 for x in xrange(lend):
155 if self.index[i + x] == None:
170 if self.index[i + x] == None:
156 b = data[off : off + self.s]
171 b = data[off : off + self.s]
157 self.index[i + x] = b
172 self.index[i + x] = b
158 n = b[ngshaoffset:ngshaoffset + 20]
173 n = b[ngshaoffset:ngshaoffset + 20]
159 self.map[n] = i + x
174 self.map[n] = i + x
160 off += self.s
175 off += self.s
161
176
162 def findnode(self, node):
177 def findnode(self, node):
163 """search backwards through the index file for a specific node"""
178 """search backwards through the index file for a specific node"""
164 if self.allmap:
179 if self.allmap:
165 return None
180 return None
166
181
167 # hg log will cause many many searches for the manifest
182 # hg log will cause many many searches for the manifest
168 # nodes. After we get called a few times, just load the whole
183 # nodes. After we get called a few times, just load the whole
169 # thing.
184 # thing.
170 if self.mapfind_count > 8:
185 if self.mapfind_count > 8:
171 self.loadmap()
186 self.loadmap()
172 if node in self.map:
187 if node in self.map:
173 return node
188 return node
174 return None
189 return None
175 self.mapfind_count += 1
190 self.mapfind_count += 1
176 last = self.l - 1
191 last = self.l - 1
177 while self.index[last] != None:
192 while self.index[last] != None:
178 if last == 0:
193 if last == 0:
179 self.all = 1
194 self.all = 1
180 self.allmap = 1
195 self.allmap = 1
181 return None
196 return None
182 last -= 1
197 last -= 1
183 end = (last + 1) * self.s
198 end = (last + 1) * self.s
184 blocksize = self.s * 256
199 blocksize = self.s * 256
185 while end >= 0:
200 while end >= 0:
186 start = max(end - blocksize, 0)
201 start = max(end - blocksize, 0)
187 self.dataf.seek(start)
202 self.dataf.seek(start)
188 data = self.dataf.read(end - start)
203 data = self.dataf.read(end - start)
189 findend = end - start
204 findend = end - start
190 while True:
205 while True:
191 # we're searching backwards, so we have to make sure
206 # we're searching backwards, so we have to make sure
192 # we don't find a changeset where this node is a parent
207 # we don't find a changeset where this node is a parent
193 off = data.find(node, 0, findend)
208 off = data.find(node, 0, findend)
194 findend = off
209 findend = off
195 if off >= 0:
210 if off >= 0:
196 i = off / self.s
211 i = off / self.s
197 off = i * self.s
212 off = i * self.s
198 n = data[off + ngshaoffset:off + ngshaoffset + 20]
213 n = data[off + ngshaoffset:off + ngshaoffset + 20]
199 if n == node:
214 if n == node:
200 self.map[n] = i + start / self.s
215 self.map[n] = i + start / self.s
201 return node
216 return node
202 else:
217 else:
203 break
218 break
204 end -= blocksize
219 end -= blocksize
205 return None
220 return None
206
221
207 def loadindex(self, i=None, end=None):
222 def loadindex(self, i=None, end=None):
208 if self.all:
223 if self.all:
209 return
224 return
210 all = False
225 all = False
211 if i == None:
226 if i == None:
212 blockstart = 0
227 blockstart = 0
213 blocksize = (65536 / self.s) * self.s
228 blocksize = (65536 / self.s) * self.s
214 end = self.datasize
229 end = self.datasize
215 all = True
230 all = True
216 else:
231 else:
217 if end:
232 if end:
218 blockstart = i * self.s
233 blockstart = i * self.s
219 end = end * self.s
234 end = end * self.s
220 blocksize = end - blockstart
235 blocksize = end - blockstart
221 else:
236 else:
222 blockstart = (i & ~1023) * self.s
237 blockstart = (i & ~1023) * self.s
223 blocksize = self.s * 1024
238 blocksize = self.s * 1024
224 end = blockstart + blocksize
239 end = blockstart + blocksize
225 while blockstart < end:
240 while blockstart < end:
226 self.loadblock(blockstart, blocksize)
241 self.loadblock(blockstart, blocksize)
227 blockstart += blocksize
242 blockstart += blocksize
228 if all:
243 if all:
229 self.all = True
244 self.all = True
230
245
231 class lazyindex(object):
246 class lazyindex(object):
232 """a lazy version of the index array"""
247 """a lazy version of the index array"""
233 def __init__(self, parser):
248 def __init__(self, parser):
234 self.p = parser
249 self.p = parser
235 def __len__(self):
250 def __len__(self):
236 return len(self.p.index)
251 return len(self.p.index)
237 def load(self, pos):
252 def load(self, pos):
238 if pos < 0:
253 if pos < 0:
239 pos += len(self.p.index)
254 pos += len(self.p.index)
240 self.p.loadindex(pos)
255 self.p.loadindex(pos)
241 return self.p.index[pos]
256 return self.p.index[pos]
242 def __getitem__(self, pos):
257 def __getitem__(self, pos):
243 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
258 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
244 def __setitem__(self, pos, item):
259 def __setitem__(self, pos, item):
245 self.p.index[pos] = _pack(indexformatng, *item)
260 self.p.index[pos] = _pack(indexformatng, *item)
246 def __delitem__(self, pos):
261 def __delitem__(self, pos):
247 del self.p.index[pos]
262 del self.p.index[pos]
248 def insert(self, pos, e):
263 def insert(self, pos, e):
249 self.p.index.insert(pos, _pack(indexformatng, *e))
264 self.p.index.insert(pos, _pack(indexformatng, *e))
250 def append(self, e):
265 def append(self, e):
251 self.p.index.append(_pack(indexformatng, *e))
266 self.p.index.append(_pack(indexformatng, *e))
252
267
253 class lazymap(object):
268 class lazymap(object):
254 """a lazy version of the node map"""
269 """a lazy version of the node map"""
255 def __init__(self, parser):
270 def __init__(self, parser):
256 self.p = parser
271 self.p = parser
257 def load(self, key):
272 def load(self, key):
258 n = self.p.findnode(key)
273 n = self.p.findnode(key)
259 if n == None:
274 if n == None:
260 raise KeyError(key)
275 raise KeyError(key)
261 def __contains__(self, key):
276 def __contains__(self, key):
262 if key in self.p.map:
277 if key in self.p.map:
263 return True
278 return True
264 self.p.loadmap()
279 self.p.loadmap()
265 return key in self.p.map
280 return key in self.p.map
266 def __iter__(self):
281 def __iter__(self):
267 yield nullid
282 yield nullid
268 for i in xrange(self.p.l):
283 for i in xrange(self.p.l):
269 ret = self.p.index[i]
284 ret = self.p.index[i]
270 if not ret:
285 if not ret:
271 self.p.loadindex(i)
286 self.p.loadindex(i)
272 ret = self.p.index[i]
287 ret = self.p.index[i]
273 if isinstance(ret, str):
288 if isinstance(ret, str):
274 ret = _unpack(indexformatng, ret)
289 ret = _unpack(indexformatng, ret)
275 yield ret[7]
290 yield ret[7]
276 def __getitem__(self, key):
291 def __getitem__(self, key):
277 try:
292 try:
278 return self.p.map[key]
293 return self.p.map[key]
279 except KeyError:
294 except KeyError:
280 try:
295 try:
281 self.load(key)
296 self.load(key)
282 return self.p.map[key]
297 return self.p.map[key]
283 except KeyError:
298 except KeyError:
284 raise KeyError("node " + hex(key))
299 raise KeyError("node " + hex(key))
285 def __setitem__(self, key, val):
300 def __setitem__(self, key, val):
286 self.p.map[key] = val
301 self.p.map[key] = val
287 def __delitem__(self, key):
302 def __delitem__(self, key):
288 del self.p.map[key]
303 del self.p.map[key]
289
304
290 indexformatv0 = ">4l20s20s20s"
305 indexformatv0 = ">4l20s20s20s"
291 v0shaoffset = 56
306 v0shaoffset = 56
292
307
293 class revlogoldio(object):
308 class revlogoldio(object):
294 def __init__(self):
309 def __init__(self):
295 self.size = struct.calcsize(indexformatv0)
310 self.size = struct.calcsize(indexformatv0)
296
311
297 def parseindex(self, fp, inline):
312 def parseindex(self, fp, inline):
298 s = self.size
313 s = self.size
299 index = []
314 index = []
300 nodemap = {nullid: nullrev}
315 nodemap = {nullid: nullrev}
301 n = off = 0
316 n = off = 0
302 data = fp.read()
317 data = fp.read()
303 l = len(data)
318 l = len(data)
304 while off + s <= l:
319 while off + s <= l:
305 cur = data[off:off + s]
320 cur = data[off:off + s]
306 off += s
321 off += s
307 e = _unpack(indexformatv0, cur)
322 e = _unpack(indexformatv0, cur)
308 # transform to revlogv1 format
323 # transform to revlogv1 format
309 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
324 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
310 nodemap[e[4]], nodemap[e[5]], e[6])
325 nodemap[e[4]], nodemap[e[5]], e[6])
311 index.append(e2)
326 index.append(e2)
312 nodemap[e[6]] = n
327 nodemap[e[6]] = n
313 n += 1
328 n += 1
314
329
315 return index, nodemap, None
330 return index, nodemap, None
316
331
317 def packentry(self, entry, node, version, rev):
332 def packentry(self, entry, node, version, rev):
318 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
333 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
319 node(entry[5]), node(entry[6]), entry[7])
334 node(entry[5]), node(entry[6]), entry[7])
320 return _pack(indexformatv0, *e2)
335 return _pack(indexformatv0, *e2)
321
336
322 # index ng:
337 # index ng:
323 # 6 bytes offset
338 # 6 bytes offset
324 # 2 bytes flags
339 # 2 bytes flags
325 # 4 bytes compressed length
340 # 4 bytes compressed length
326 # 4 bytes uncompressed length
341 # 4 bytes uncompressed length
327 # 4 bytes: base rev
342 # 4 bytes: base rev
328 # 4 bytes link rev
343 # 4 bytes link rev
329 # 4 bytes parent 1 rev
344 # 4 bytes parent 1 rev
330 # 4 bytes parent 2 rev
345 # 4 bytes parent 2 rev
331 # 32 bytes: nodeid
346 # 32 bytes: nodeid
332 indexformatng = ">Qiiiiii20s12x"
347 indexformatng = ">Qiiiiii20s12x"
333 ngshaoffset = 32
348 ngshaoffset = 32
334 versionformat = ">I"
349 versionformat = ">I"
335
350
336 class revlogio(object):
351 class revlogio(object):
337 def __init__(self):
352 def __init__(self):
338 self.size = struct.calcsize(indexformatng)
353 self.size = struct.calcsize(indexformatng)
339
354
340 def parseindex(self, fp, inline):
355 def parseindex(self, fp, inline):
341 try:
356 try:
342 size = util.fstat(fp).st_size
357 size = util.fstat(fp).st_size
343 except AttributeError:
358 except AttributeError:
344 size = 0
359 size = 0
345
360
346 if lazyparser.safe_to_use and not inline and size > 1000000:
361 if lazyparser.safe_to_use and not inline and size > 1000000:
347 # big index, let's parse it on demand
362 # big index, let's parse it on demand
348 parser = lazyparser(fp, size)
363 parser = lazyparser(fp, size)
349 index = lazyindex(parser)
364 index = lazyindex(parser)
350 nodemap = lazymap(parser)
365 nodemap = lazymap(parser)
351 e = list(index[0])
366 e = list(index[0])
352 type = gettype(e[0])
367 type = gettype(e[0])
353 e[0] = offset_type(0, type)
368 e[0] = offset_type(0, type)
354 index[0] = e
369 index[0] = e
355 return index, nodemap, None
370 return index, nodemap, None
356
371
357 s = self.size
372 s = self.size
358 cache = None
373 cache = None
359 index = []
374 index = []
360 nodemap = {nullid: nullrev}
375 nodemap = {nullid: nullrev}
361 n = off = 0
376 n = off = 0
362 # if we're not using lazymap, always read the whole index
377 # if we're not using lazymap, always read the whole index
363 data = fp.read()
378 data = fp.read()
364 l = len(data) - s
379 l = len(data) - s
365 append = index.append
380 append = index.append
366 if inline:
381 if inline:
367 cache = (0, data)
382 cache = (0, data)
368 while off <= l:
383 while off <= l:
369 e = _unpack(indexformatng, data[off:off + s])
384 e = _unpack(indexformatng, data[off:off + s])
370 nodemap[e[7]] = n
385 nodemap[e[7]] = n
371 append(e)
386 append(e)
372 n += 1
387 n += 1
373 if e[1] < 0:
388 if e[1] < 0:
374 break
389 break
375 off += e[1] + s
390 off += e[1] + s
376 else:
391 else:
377 while off <= l:
392 while off <= l:
378 e = _unpack(indexformatng, data[off:off + s])
393 e = _unpack(indexformatng, data[off:off + s])
379 nodemap[e[7]] = n
394 nodemap[e[7]] = n
380 append(e)
395 append(e)
381 n += 1
396 n += 1
382 off += s
397 off += s
383
398
384 e = list(index[0])
399 e = list(index[0])
385 type = gettype(e[0])
400 type = gettype(e[0])
386 e[0] = offset_type(0, type)
401 e[0] = offset_type(0, type)
387 index[0] = e
402 index[0] = e
388
403
389 return index, nodemap, cache
404 return index, nodemap, cache
390
405
391 def packentry(self, entry, node, version, rev):
406 def packentry(self, entry, node, version, rev):
392 p = _pack(indexformatng, *entry)
407 p = _pack(indexformatng, *entry)
393 if rev == 0:
408 if rev == 0:
394 p = _pack(versionformat, version) + p[4:]
409 p = _pack(versionformat, version) + p[4:]
395 return p
410 return p
396
411
397 class revlog(object):
412 class revlog(object):
398 """
413 """
399 the underlying revision storage object
414 the underlying revision storage object
400
415
401 A revlog consists of two parts, an index and the revision data.
416 A revlog consists of two parts, an index and the revision data.
402
417
403 The index is a file with a fixed record size containing
418 The index is a file with a fixed record size containing
404 information on each revision, includings its nodeid (hash), the
419 information on each revision, includings its nodeid (hash), the
405 nodeids of its parents, the position and offset of its data within
420 nodeids of its parents, the position and offset of its data within
406 the data file, and the revision it's based on. Finally, each entry
421 the data file, and the revision it's based on. Finally, each entry
407 contains a linkrev entry that can serve as a pointer to external
422 contains a linkrev entry that can serve as a pointer to external
408 data.
423 data.
409
424
410 The revision data itself is a linear collection of data chunks.
425 The revision data itself is a linear collection of data chunks.
411 Each chunk represents a revision and is usually represented as a
426 Each chunk represents a revision and is usually represented as a
412 delta against the previous chunk. To bound lookup time, runs of
427 delta against the previous chunk. To bound lookup time, runs of
413 deltas are limited to about 2 times the length of the original
428 deltas are limited to about 2 times the length of the original
414 version data. This makes retrieval of a version proportional to
429 version data. This makes retrieval of a version proportional to
415 its size, or O(1) relative to the number of revisions.
430 its size, or O(1) relative to the number of revisions.
416
431
417 Both pieces of the revlog are written to in an append-only
432 Both pieces of the revlog are written to in an append-only
418 fashion, which means we never need to rewrite a file to insert or
433 fashion, which means we never need to rewrite a file to insert or
419 remove data, and can use some simple techniques to avoid the need
434 remove data, and can use some simple techniques to avoid the need
420 for locking while reading.
435 for locking while reading.
421 """
436 """
422 def __init__(self, opener, indexfile):
437 def __init__(self, opener, indexfile):
423 """
438 """
424 create a revlog object
439 create a revlog object
425
440
426 opener is a function that abstracts the file opening operation
441 opener is a function that abstracts the file opening operation
427 and can be used to implement COW semantics or the like.
442 and can be used to implement COW semantics or the like.
428 """
443 """
429 self.indexfile = indexfile
444 self.indexfile = indexfile
430 self.datafile = indexfile[:-2] + ".d"
445 self.datafile = indexfile[:-2] + ".d"
431 self.opener = opener
446 self.opener = opener
432 self._cache = None
447 self._cache = None
433 self._chunkcache = None
448 self._chunkcache = None
434 self.nodemap = {nullid: nullrev}
449 self.nodemap = {nullid: nullrev}
435 self.index = []
450 self.index = []
436
451
437 v = REVLOG_DEFAULT_VERSION
452 v = REVLOG_DEFAULT_VERSION
438 if hasattr(opener, "defversion"):
453 if hasattr(opener, "defversion"):
439 v = opener.defversion
454 v = opener.defversion
440 if v & REVLOGNG:
455 if v & REVLOGNG:
441 v |= REVLOGNGINLINEDATA
456 v |= REVLOGNGINLINEDATA
442
457
443 i = ""
458 i = ""
444 try:
459 try:
445 f = self.opener(self.indexfile)
460 f = self.opener(self.indexfile)
446 i = f.read(4)
461 i = f.read(4)
447 f.seek(0)
462 f.seek(0)
448 if len(i) > 0:
463 if len(i) > 0:
449 v = struct.unpack(versionformat, i)[0]
464 v = struct.unpack(versionformat, i)[0]
450 except IOError, inst:
465 except IOError, inst:
451 if inst.errno != errno.ENOENT:
466 if inst.errno != errno.ENOENT:
452 raise
467 raise
453
468
454 self.version = v
469 self.version = v
455 self._inline = v & REVLOGNGINLINEDATA
470 self._inline = v & REVLOGNGINLINEDATA
456 flags = v & ~0xFFFF
471 flags = v & ~0xFFFF
457 fmt = v & 0xFFFF
472 fmt = v & 0xFFFF
458 if fmt == REVLOGV0 and flags:
473 if fmt == REVLOGV0 and flags:
459 raise RevlogError(_("index %s unknown flags %#04x for format v0")
474 raise RevlogError(_("index %s unknown flags %#04x for format v0")
460 % (self.indexfile, flags >> 16))
475 % (self.indexfile, flags >> 16))
461 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
476 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
462 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
477 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
463 % (self.indexfile, flags >> 16))
478 % (self.indexfile, flags >> 16))
464 elif fmt > REVLOGNG:
479 elif fmt > REVLOGNG:
465 raise RevlogError(_("index %s unknown format %d")
480 raise RevlogError(_("index %s unknown format %d")
466 % (self.indexfile, fmt))
481 % (self.indexfile, fmt))
467
482
468 self._io = revlogio()
483 self._io = revlogio()
469 if self.version == REVLOGV0:
484 if self.version == REVLOGV0:
470 self._io = revlogoldio()
485 self._io = revlogoldio()
471 if i:
486 if i:
472 d = self._io.parseindex(f, self._inline)
487 d = self._io.parseindex(f, self._inline)
473 self.index, self.nodemap, self._chunkcache = d
488 self.index, self.nodemap, self._chunkcache = d
474
489
475 # add the magic null revision at -1
490 # add the magic null revision at -1
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
491 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477
492
478 def _loadindex(self, start, end):
493 def _loadindex(self, start, end):
479 """load a block of indexes all at once from the lazy parser"""
494 """load a block of indexes all at once from the lazy parser"""
480 if isinstance(self.index, lazyindex):
495 if isinstance(self.index, lazyindex):
481 self.index.p.loadindex(start, end)
496 self.index.p.loadindex(start, end)
482
497
483 def _loadindexmap(self):
498 def _loadindexmap(self):
484 """loads both the map and the index from the lazy parser"""
499 """loads both the map and the index from the lazy parser"""
485 if isinstance(self.index, lazyindex):
500 if isinstance(self.index, lazyindex):
486 p = self.index.p
501 p = self.index.p
487 p.loadindex()
502 p.loadindex()
488 self.nodemap = p.map
503 self.nodemap = p.map
489
504
490 def _loadmap(self):
505 def _loadmap(self):
491 """loads the map from the lazy parser"""
506 """loads the map from the lazy parser"""
492 if isinstance(self.nodemap, lazymap):
507 if isinstance(self.nodemap, lazymap):
493 self.nodemap.p.loadmap()
508 self.nodemap.p.loadmap()
494 self.nodemap = self.nodemap.p.map
509 self.nodemap = self.nodemap.p.map
495
510
496 def tip(self):
511 def tip(self):
497 return self.node(len(self.index) - 2)
512 return self.node(len(self.index) - 2)
498 def count(self):
513 def count(self):
499 return len(self.index) - 1
514 return len(self.index) - 1
500
515
501 def rev(self, node):
516 def rev(self, node):
502 try:
517 try:
503 return self.nodemap[node]
518 return self.nodemap[node]
504 except KeyError:
519 except KeyError:
505 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
520 raise LookupError(_('%s: no node %s') % (self.indexfile, hex(node)))
506 def node(self, rev):
521 def node(self, rev):
507 return self.index[rev][7]
522 return self.index[rev][7]
508 def linkrev(self, node):
523 def linkrev(self, node):
509 return self.index[self.rev(node)][4]
524 return self.index[self.rev(node)][4]
510 def parents(self, node):
525 def parents(self, node):
511 d = self.index[self.rev(node)][5:7]
526 d = self.index[self.rev(node)][5:7]
512 return (self.node(d[0]), self.node(d[1]))
527 return (self.node(d[0]), self.node(d[1]))
513 def parentrevs(self, rev):
528 def parentrevs(self, rev):
514 return self.index[rev][5:7]
529 return self.index[rev][5:7]
515 def start(self, rev):
530 def start(self, rev):
516 return int(self.index[rev][0] >> 16)
531 return int(self.index[rev][0] >> 16)
517 def end(self, rev):
532 def end(self, rev):
518 return self.start(rev) + self.length(rev)
533 return self.start(rev) + self.length(rev)
519 def length(self, rev):
534 def length(self, rev):
520 return self.index[rev][1]
535 return self.index[rev][1]
521 def base(self, rev):
536 def base(self, rev):
522 return self.index[rev][3]
537 return self.index[rev][3]
523
538
524 def size(self, rev):
539 def size(self, rev):
525 """return the length of the uncompressed text for a given revision"""
540 """return the length of the uncompressed text for a given revision"""
526 l = self.index[rev][2]
541 l = self.index[rev][2]
527 if l >= 0:
542 if l >= 0:
528 return l
543 return l
529
544
530 t = self.revision(self.node(rev))
545 t = self.revision(self.node(rev))
531 return len(t)
546 return len(t)
532
547
533 # alternate implementation, The advantage to this code is it
548 # alternate implementation, The advantage to this code is it
534 # will be faster for a single revision. But, the results are not
549 # will be faster for a single revision. But, the results are not
535 # cached, so finding the size of every revision will be slower.
550 # cached, so finding the size of every revision will be slower.
536 """
551 """
537 if self.cache and self.cache[1] == rev:
552 if self.cache and self.cache[1] == rev:
538 return len(self.cache[2])
553 return len(self.cache[2])
539
554
540 base = self.base(rev)
555 base = self.base(rev)
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
556 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 base = self.cache[1]
557 base = self.cache[1]
543 text = self.cache[2]
558 text = self.cache[2]
544 else:
559 else:
545 text = self.revision(self.node(base))
560 text = self.revision(self.node(base))
546
561
547 l = len(text)
562 l = len(text)
548 for x in xrange(base + 1, rev + 1):
563 for x in xrange(base + 1, rev + 1):
549 l = mdiff.patchedsize(l, self.chunk(x))
564 l = mdiff.patchedsize(l, self.chunk(x))
550 return l
565 return l
551 """
566 """
552
567
553 def reachable(self, node, stop=None):
568 def reachable(self, node, stop=None):
554 """return a hash of all nodes ancestral to a given node, including
569 """return a hash of all nodes ancestral to a given node, including
555 the node itself, stopping when stop is matched"""
570 the node itself, stopping when stop is matched"""
556 reachable = {}
571 reachable = {}
557 visit = [node]
572 visit = [node]
558 reachable[node] = 1
573 reachable[node] = 1
559 if stop:
574 if stop:
560 stopn = self.rev(stop)
575 stopn = self.rev(stop)
561 else:
576 else:
562 stopn = 0
577 stopn = 0
563 while visit:
578 while visit:
564 n = visit.pop(0)
579 n = visit.pop(0)
565 if n == stop:
580 if n == stop:
566 continue
581 continue
567 if n == nullid:
582 if n == nullid:
568 continue
583 continue
569 for p in self.parents(n):
584 for p in self.parents(n):
570 if self.rev(p) < stopn:
585 if self.rev(p) < stopn:
571 continue
586 continue
572 if p not in reachable:
587 if p not in reachable:
573 reachable[p] = 1
588 reachable[p] = 1
574 visit.append(p)
589 visit.append(p)
575 return reachable
590 return reachable
576
591
577 def nodesbetween(self, roots=None, heads=None):
592 def nodesbetween(self, roots=None, heads=None):
578 """Return a tuple containing three elements. Elements 1 and 2 contain
593 """Return a tuple containing three elements. Elements 1 and 2 contain
579 a final list bases and heads after all the unreachable ones have been
594 a final list bases and heads after all the unreachable ones have been
580 pruned. Element 0 contains a topologically sorted list of all
595 pruned. Element 0 contains a topologically sorted list of all
581
596
582 nodes that satisfy these constraints:
597 nodes that satisfy these constraints:
583 1. All nodes must be descended from a node in roots (the nodes on
598 1. All nodes must be descended from a node in roots (the nodes on
584 roots are considered descended from themselves).
599 roots are considered descended from themselves).
585 2. All nodes must also be ancestors of a node in heads (the nodes in
600 2. All nodes must also be ancestors of a node in heads (the nodes in
586 heads are considered to be their own ancestors).
601 heads are considered to be their own ancestors).
587
602
588 If roots is unspecified, nullid is assumed as the only root.
603 If roots is unspecified, nullid is assumed as the only root.
589 If heads is unspecified, it is taken to be the output of the
604 If heads is unspecified, it is taken to be the output of the
590 heads method (i.e. a list of all nodes in the repository that
605 heads method (i.e. a list of all nodes in the repository that
591 have no children)."""
606 have no children)."""
592 nonodes = ([], [], [])
607 nonodes = ([], [], [])
593 if roots is not None:
608 if roots is not None:
594 roots = list(roots)
609 roots = list(roots)
595 if not roots:
610 if not roots:
596 return nonodes
611 return nonodes
597 lowestrev = min([self.rev(n) for n in roots])
612 lowestrev = min([self.rev(n) for n in roots])
598 else:
613 else:
599 roots = [nullid] # Everybody's a descendent of nullid
614 roots = [nullid] # Everybody's a descendent of nullid
600 lowestrev = nullrev
615 lowestrev = nullrev
601 if (lowestrev == nullrev) and (heads is None):
616 if (lowestrev == nullrev) and (heads is None):
602 # We want _all_ the nodes!
617 # We want _all_ the nodes!
603 return ([self.node(r) for r in xrange(0, self.count())],
618 return ([self.node(r) for r in xrange(0, self.count())],
604 [nullid], list(self.heads()))
619 [nullid], list(self.heads()))
605 if heads is None:
620 if heads is None:
606 # All nodes are ancestors, so the latest ancestor is the last
621 # All nodes are ancestors, so the latest ancestor is the last
607 # node.
622 # node.
608 highestrev = self.count() - 1
623 highestrev = self.count() - 1
609 # Set ancestors to None to signal that every node is an ancestor.
624 # Set ancestors to None to signal that every node is an ancestor.
610 ancestors = None
625 ancestors = None
611 # Set heads to an empty dictionary for later discovery of heads
626 # Set heads to an empty dictionary for later discovery of heads
612 heads = {}
627 heads = {}
613 else:
628 else:
614 heads = list(heads)
629 heads = list(heads)
615 if not heads:
630 if not heads:
616 return nonodes
631 return nonodes
617 ancestors = {}
632 ancestors = {}
618 # Turn heads into a dictionary so we can remove 'fake' heads.
633 # Turn heads into a dictionary so we can remove 'fake' heads.
619 # Also, later we will be using it to filter out the heads we can't
634 # Also, later we will be using it to filter out the heads we can't
620 # find from roots.
635 # find from roots.
621 heads = dict.fromkeys(heads, 0)
636 heads = dict.fromkeys(heads, 0)
622 # Start at the top and keep marking parents until we're done.
637 # Start at the top and keep marking parents until we're done.
623 nodestotag = heads.keys()
638 nodestotag = heads.keys()
624 # Remember where the top was so we can use it as a limit later.
639 # Remember where the top was so we can use it as a limit later.
625 highestrev = max([self.rev(n) for n in nodestotag])
640 highestrev = max([self.rev(n) for n in nodestotag])
626 while nodestotag:
641 while nodestotag:
627 # grab a node to tag
642 # grab a node to tag
628 n = nodestotag.pop()
643 n = nodestotag.pop()
629 # Never tag nullid
644 # Never tag nullid
630 if n == nullid:
645 if n == nullid:
631 continue
646 continue
632 # A node's revision number represents its place in a
647 # A node's revision number represents its place in a
633 # topologically sorted list of nodes.
648 # topologically sorted list of nodes.
634 r = self.rev(n)
649 r = self.rev(n)
635 if r >= lowestrev:
650 if r >= lowestrev:
636 if n not in ancestors:
651 if n not in ancestors:
637 # If we are possibly a descendent of one of the roots
652 # If we are possibly a descendent of one of the roots
638 # and we haven't already been marked as an ancestor
653 # and we haven't already been marked as an ancestor
639 ancestors[n] = 1 # Mark as ancestor
654 ancestors[n] = 1 # Mark as ancestor
640 # Add non-nullid parents to list of nodes to tag.
655 # Add non-nullid parents to list of nodes to tag.
641 nodestotag.extend([p for p in self.parents(n) if
656 nodestotag.extend([p for p in self.parents(n) if
642 p != nullid])
657 p != nullid])
643 elif n in heads: # We've seen it before, is it a fake head?
658 elif n in heads: # We've seen it before, is it a fake head?
644 # So it is, real heads should not be the ancestors of
659 # So it is, real heads should not be the ancestors of
645 # any other heads.
660 # any other heads.
646 heads.pop(n)
661 heads.pop(n)
647 if not ancestors:
662 if not ancestors:
648 return nonodes
663 return nonodes
649 # Now that we have our set of ancestors, we want to remove any
664 # Now that we have our set of ancestors, we want to remove any
650 # roots that are not ancestors.
665 # roots that are not ancestors.
651
666
652 # If one of the roots was nullid, everything is included anyway.
667 # If one of the roots was nullid, everything is included anyway.
653 if lowestrev > nullrev:
668 if lowestrev > nullrev:
654 # But, since we weren't, let's recompute the lowest rev to not
669 # But, since we weren't, let's recompute the lowest rev to not
655 # include roots that aren't ancestors.
670 # include roots that aren't ancestors.
656
671
657 # Filter out roots that aren't ancestors of heads
672 # Filter out roots that aren't ancestors of heads
658 roots = [n for n in roots if n in ancestors]
673 roots = [n for n in roots if n in ancestors]
659 # Recompute the lowest revision
674 # Recompute the lowest revision
660 if roots:
675 if roots:
661 lowestrev = min([self.rev(n) for n in roots])
676 lowestrev = min([self.rev(n) for n in roots])
662 else:
677 else:
663 # No more roots? Return empty list
678 # No more roots? Return empty list
664 return nonodes
679 return nonodes
665 else:
680 else:
666 # We are descending from nullid, and don't need to care about
681 # We are descending from nullid, and don't need to care about
667 # any other roots.
682 # any other roots.
668 lowestrev = nullrev
683 lowestrev = nullrev
669 roots = [nullid]
684 roots = [nullid]
670 # Transform our roots list into a 'set' (i.e. a dictionary where the
685 # Transform our roots list into a 'set' (i.e. a dictionary where the
671 # values don't matter.
686 # values don't matter.
672 descendents = dict.fromkeys(roots, 1)
687 descendents = dict.fromkeys(roots, 1)
673 # Also, keep the original roots so we can filter out roots that aren't
688 # Also, keep the original roots so we can filter out roots that aren't
674 # 'real' roots (i.e. are descended from other roots).
689 # 'real' roots (i.e. are descended from other roots).
675 roots = descendents.copy()
690 roots = descendents.copy()
676 # Our topologically sorted list of output nodes.
691 # Our topologically sorted list of output nodes.
677 orderedout = []
692 orderedout = []
678 # Don't start at nullid since we don't want nullid in our output list,
693 # Don't start at nullid since we don't want nullid in our output list,
679 # and if nullid shows up in descedents, empty parents will look like
694 # and if nullid shows up in descedents, empty parents will look like
680 # they're descendents.
695 # they're descendents.
681 for r in xrange(max(lowestrev, 0), highestrev + 1):
696 for r in xrange(max(lowestrev, 0), highestrev + 1):
682 n = self.node(r)
697 n = self.node(r)
683 isdescendent = False
698 isdescendent = False
684 if lowestrev == nullrev: # Everybody is a descendent of nullid
699 if lowestrev == nullrev: # Everybody is a descendent of nullid
685 isdescendent = True
700 isdescendent = True
686 elif n in descendents:
701 elif n in descendents:
687 # n is already a descendent
702 # n is already a descendent
688 isdescendent = True
703 isdescendent = True
689 # This check only needs to be done here because all the roots
704 # This check only needs to be done here because all the roots
690 # will start being marked is descendents before the loop.
705 # will start being marked is descendents before the loop.
691 if n in roots:
706 if n in roots:
692 # If n was a root, check if it's a 'real' root.
707 # If n was a root, check if it's a 'real' root.
693 p = tuple(self.parents(n))
708 p = tuple(self.parents(n))
694 # If any of its parents are descendents, it's not a root.
709 # If any of its parents are descendents, it's not a root.
695 if (p[0] in descendents) or (p[1] in descendents):
710 if (p[0] in descendents) or (p[1] in descendents):
696 roots.pop(n)
711 roots.pop(n)
697 else:
712 else:
698 p = tuple(self.parents(n))
713 p = tuple(self.parents(n))
699 # A node is a descendent if either of its parents are
714 # A node is a descendent if either of its parents are
700 # descendents. (We seeded the dependents list with the roots
715 # descendents. (We seeded the dependents list with the roots
701 # up there, remember?)
716 # up there, remember?)
702 if (p[0] in descendents) or (p[1] in descendents):
717 if (p[0] in descendents) or (p[1] in descendents):
703 descendents[n] = 1
718 descendents[n] = 1
704 isdescendent = True
719 isdescendent = True
705 if isdescendent and ((ancestors is None) or (n in ancestors)):
720 if isdescendent and ((ancestors is None) or (n in ancestors)):
706 # Only include nodes that are both descendents and ancestors.
721 # Only include nodes that are both descendents and ancestors.
707 orderedout.append(n)
722 orderedout.append(n)
708 if (ancestors is not None) and (n in heads):
723 if (ancestors is not None) and (n in heads):
709 # We're trying to figure out which heads are reachable
724 # We're trying to figure out which heads are reachable
710 # from roots.
725 # from roots.
711 # Mark this head as having been reached
726 # Mark this head as having been reached
712 heads[n] = 1
727 heads[n] = 1
713 elif ancestors is None:
728 elif ancestors is None:
714 # Otherwise, we're trying to discover the heads.
729 # Otherwise, we're trying to discover the heads.
715 # Assume this is a head because if it isn't, the next step
730 # Assume this is a head because if it isn't, the next step
716 # will eventually remove it.
731 # will eventually remove it.
717 heads[n] = 1
732 heads[n] = 1
718 # But, obviously its parents aren't.
733 # But, obviously its parents aren't.
719 for p in self.parents(n):
734 for p in self.parents(n):
720 heads.pop(p, None)
735 heads.pop(p, None)
721 heads = [n for n in heads.iterkeys() if heads[n] != 0]
736 heads = [n for n in heads.iterkeys() if heads[n] != 0]
722 roots = roots.keys()
737 roots = roots.keys()
723 assert orderedout
738 assert orderedout
724 assert roots
739 assert roots
725 assert heads
740 assert heads
726 return (orderedout, roots, heads)
741 return (orderedout, roots, heads)
727
742
728 def heads(self, start=None, stop=None):
743 def heads(self, start=None, stop=None):
729 """return the list of all nodes that have no children
744 """return the list of all nodes that have no children
730
745
731 if start is specified, only heads that are descendants of
746 if start is specified, only heads that are descendants of
732 start will be returned
747 start will be returned
733 if stop is specified, it will consider all the revs from stop
748 if stop is specified, it will consider all the revs from stop
734 as if they had no children
749 as if they had no children
735 """
750 """
736 if start is None and stop is None:
751 if start is None and stop is None:
737 count = self.count()
752 count = self.count()
738 if not count:
753 if not count:
739 return [nullid]
754 return [nullid]
740 ishead = [1] * (count + 1)
755 ishead = [1] * (count + 1)
741 index = self.index
756 index = self.index
742 for r in xrange(count):
757 for r in xrange(count):
743 e = index[r]
758 e = index[r]
744 ishead[e[5]] = ishead[e[6]] = 0
759 ishead[e[5]] = ishead[e[6]] = 0
745 return [self.node(r) for r in xrange(count) if ishead[r]]
760 return [self.node(r) for r in xrange(count) if ishead[r]]
746
761
747 if start is None:
762 if start is None:
748 start = nullid
763 start = nullid
749 if stop is None:
764 if stop is None:
750 stop = []
765 stop = []
751 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
766 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
752 startrev = self.rev(start)
767 startrev = self.rev(start)
753 reachable = {startrev: 1}
768 reachable = {startrev: 1}
754 heads = {startrev: 1}
769 heads = {startrev: 1}
755
770
756 parentrevs = self.parentrevs
771 parentrevs = self.parentrevs
757 for r in xrange(startrev + 1, self.count()):
772 for r in xrange(startrev + 1, self.count()):
758 for p in parentrevs(r):
773 for p in parentrevs(r):
759 if p in reachable:
774 if p in reachable:
760 if r not in stoprevs:
775 if r not in stoprevs:
761 reachable[r] = 1
776 reachable[r] = 1
762 heads[r] = 1
777 heads[r] = 1
763 if p in heads and p not in stoprevs:
778 if p in heads and p not in stoprevs:
764 del heads[p]
779 del heads[p]
765
780
766 return [self.node(r) for r in heads]
781 return [self.node(r) for r in heads]
767
782
768 def children(self, node):
783 def children(self, node):
769 """find the children of a given node"""
784 """find the children of a given node"""
770 c = []
785 c = []
771 p = self.rev(node)
786 p = self.rev(node)
772 for r in range(p + 1, self.count()):
787 for r in range(p + 1, self.count()):
773 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
774 if prevs:
789 if prevs:
775 for pr in prevs:
790 for pr in prevs:
776 if pr == p:
791 if pr == p:
777 c.append(self.node(r))
792 c.append(self.node(r))
778 elif p == nullrev:
793 elif p == nullrev:
779 c.append(self.node(r))
794 c.append(self.node(r))
780 return c
795 return c
781
796
782 def _match(self, id):
797 def _match(self, id):
783 if isinstance(id, (long, int)):
798 if isinstance(id, (long, int)):
784 # rev
799 # rev
785 return self.node(id)
800 return self.node(id)
786 if len(id) == 20:
801 if len(id) == 20:
787 # possibly a binary node
802 # possibly a binary node
788 # odds of a binary node being all hex in ASCII are 1 in 10**25
803 # odds of a binary node being all hex in ASCII are 1 in 10**25
789 try:
804 try:
790 node = id
805 node = id
791 r = self.rev(node) # quick search the index
806 r = self.rev(node) # quick search the index
792 return node
807 return node
793 except LookupError:
808 except LookupError:
794 pass # may be partial hex id
809 pass # may be partial hex id
795 try:
810 try:
796 # str(rev)
811 # str(rev)
797 rev = int(id)
812 rev = int(id)
798 if str(rev) != id:
813 if str(rev) != id:
799 raise ValueError
814 raise ValueError
800 if rev < 0:
815 if rev < 0:
801 rev = self.count() + rev
816 rev = self.count() + rev
802 if rev < 0 or rev >= self.count():
817 if rev < 0 or rev >= self.count():
803 raise ValueError
818 raise ValueError
804 return self.node(rev)
819 return self.node(rev)
805 except (ValueError, OverflowError):
820 except (ValueError, OverflowError):
806 pass
821 pass
807 if len(id) == 40:
822 if len(id) == 40:
808 try:
823 try:
809 # a full hex nodeid?
824 # a full hex nodeid?
810 node = bin(id)
825 node = bin(id)
811 r = self.rev(node)
826 r = self.rev(node)
812 return node
827 return node
813 except TypeError:
828 except TypeError:
814 pass
829 pass
815
830
816 def _partialmatch(self, id):
831 def _partialmatch(self, id):
817 if len(id) < 40:
832 if len(id) < 40:
818 try:
833 try:
819 # hex(node)[:...]
834 # hex(node)[:...]
820 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
835 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
821 node = None
836 node = None
822 for n in self.nodemap:
837 for n in self.nodemap:
823 if n.startswith(bin_id) and hex(n).startswith(id):
838 if n.startswith(bin_id) and hex(n).startswith(id):
824 if node is not None:
839 if node is not None:
825 raise LookupError(_("Ambiguous identifier"))
840 raise LookupError(_("Ambiguous identifier"))
826 node = n
841 node = n
827 if node is not None:
842 if node is not None:
828 return node
843 return node
829 except TypeError:
844 except TypeError:
830 pass
845 pass
831
846
832 def lookup(self, id):
847 def lookup(self, id):
833 """locate a node based on:
848 """locate a node based on:
834 - revision number or str(revision number)
849 - revision number or str(revision number)
835 - nodeid or subset of hex nodeid
850 - nodeid or subset of hex nodeid
836 """
851 """
837 n = self._match(id)
852 n = self._match(id)
838 if n is not None:
853 if n is not None:
839 return n
854 return n
840 n = self._partialmatch(id)
855 n = self._partialmatch(id)
841 if n:
856 if n:
842 return n
857 return n
843
858
844 raise LookupError(_("No match found"))
859 raise LookupError(_("No match found"))
845
860
846 def cmp(self, node, text):
861 def cmp(self, node, text):
847 """compare text with a given file revision"""
862 """compare text with a given file revision"""
848 p1, p2 = self.parents(node)
863 p1, p2 = self.parents(node)
849 return hash(text, p1, p2) != node
864 return hash(text, p1, p2) != node
850
865
851 def chunk(self, rev, df=None):
866 def chunk(self, rev, df=None):
852 def loadcache(df):
867 def loadcache(df):
853 if not df:
868 if not df:
854 if self._inline:
869 if self._inline:
855 df = self.opener(self.indexfile)
870 df = self.opener(self.indexfile)
856 else:
871 else:
857 df = self.opener(self.datafile)
872 df = self.opener(self.datafile)
858 df.seek(start)
873 df.seek(start)
859 self._chunkcache = (start, df.read(cache_length))
874 self._chunkcache = (start, df.read(cache_length))
860
875
861 start, length = self.start(rev), self.length(rev)
876 start, length = self.start(rev), self.length(rev)
862 if self._inline:
877 if self._inline:
863 start += (rev + 1) * self._io.size
878 start += (rev + 1) * self._io.size
864 end = start + length
879 end = start + length
865
880
866 offset = 0
881 offset = 0
867 if not self._chunkcache:
882 if not self._chunkcache:
868 cache_length = max(65536, length)
883 cache_length = max(65536, length)
869 loadcache(df)
884 loadcache(df)
870 else:
885 else:
871 cache_start = self._chunkcache[0]
886 cache_start = self._chunkcache[0]
872 cache_length = len(self._chunkcache[1])
887 cache_length = len(self._chunkcache[1])
873 cache_end = cache_start + cache_length
888 cache_end = cache_start + cache_length
874 if start >= cache_start and end <= cache_end:
889 if start >= cache_start and end <= cache_end:
875 # it is cached
890 # it is cached
876 offset = start - cache_start
891 offset = start - cache_start
877 else:
892 else:
878 cache_length = max(65536, length)
893 cache_length = max(65536, length)
879 loadcache(df)
894 loadcache(df)
880
895
881 # avoid copying large chunks
896 # avoid copying large chunks
882 c = self._chunkcache[1]
897 c = self._chunkcache[1]
883 if cache_length != length:
898 if cache_length != length:
884 c = c[offset:offset + length]
899 c = c[offset:offset + length]
885
900
886 return decompress(c)
901 return decompress(c)
887
902
888 def delta(self, node):
903 def delta(self, node):
889 """return or calculate a delta between a node and its predecessor"""
904 """return or calculate a delta between a node and its predecessor"""
890 r = self.rev(node)
905 r = self.rev(node)
891 return self.revdiff(r - 1, r)
906 return self.revdiff(r - 1, r)
892
907
893 def revdiff(self, rev1, rev2):
908 def revdiff(self, rev1, rev2):
894 """return or calculate a delta between two revisions"""
909 """return or calculate a delta between two revisions"""
895 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
910 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
896 return self.chunk(rev2)
911 return self.chunk(rev2)
897
912
898 return mdiff.textdiff(self.revision(self.node(rev1)),
913 return mdiff.textdiff(self.revision(self.node(rev1)),
899 self.revision(self.node(rev2)))
914 self.revision(self.node(rev2)))
900
915
901 def revision(self, node):
916 def revision(self, node):
902 """return an uncompressed revision of a given"""
917 """return an uncompressed revision of a given"""
903 if node == nullid:
918 if node == nullid:
904 return ""
919 return ""
905 if self._cache and self._cache[0] == node:
920 if self._cache and self._cache[0] == node:
906 return str(self._cache[2])
921 return str(self._cache[2])
907
922
908 # look up what we need to read
923 # look up what we need to read
909 text = None
924 text = None
910 rev = self.rev(node)
925 rev = self.rev(node)
911 base = self.base(rev)
926 base = self.base(rev)
912
927
913 # check rev flags
928 # check rev flags
914 if self.index[rev][0] & 0xFFFF:
929 if self.index[rev][0] & 0xFFFF:
915 raise RevlogError(_('incompatible revision flag %x') %
930 raise RevlogError(_('incompatible revision flag %x') %
916 (self.index[rev][0] & 0xFFFF))
931 (self.index[rev][0] & 0xFFFF))
917
932
918 if self._inline:
933 if self._inline:
919 # we probably have the whole chunk cached
934 # we probably have the whole chunk cached
920 df = None
935 df = None
921 else:
936 else:
922 df = self.opener(self.datafile)
937 df = self.opener(self.datafile)
923
938
924 # do we have useful data cached?
939 # do we have useful data cached?
925 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
940 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
926 base = self._cache[1]
941 base = self._cache[1]
927 text = str(self._cache[2])
942 text = str(self._cache[2])
928 self._loadindex(base, rev + 1)
943 self._loadindex(base, rev + 1)
929 else:
944 else:
930 self._loadindex(base, rev + 1)
945 self._loadindex(base, rev + 1)
931 text = self.chunk(base, df=df)
946 text = self.chunk(base, df=df)
932
947
933 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
948 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
934 text = mdiff.patches(text, bins)
949 text = mdiff.patches(text, bins)
935 p1, p2 = self.parents(node)
950 p1, p2 = self.parents(node)
936 if node != hash(text, p1, p2):
951 if node != hash(text, p1, p2):
937 raise RevlogError(_("integrity check failed on %s:%d")
952 raise RevlogError(_("integrity check failed on %s:%d")
938 % (self.datafile, rev))
953 % (self.datafile, rev))
939
954
940 self._cache = (node, rev, text)
955 self._cache = (node, rev, text)
941 return text
956 return text
942
957
943 def checkinlinesize(self, tr, fp=None):
958 def checkinlinesize(self, tr, fp=None):
944 if not self._inline:
959 if not self._inline:
945 return
960 return
946 if not fp:
961 if not fp:
947 fp = self.opener(self.indexfile, 'r')
962 fp = self.opener(self.indexfile, 'r')
948 fp.seek(0, 2)
963 fp.seek(0, 2)
949 size = fp.tell()
964 size = fp.tell()
950 if size < 131072:
965 if size < 131072:
951 return
966 return
952 trinfo = tr.find(self.indexfile)
967 trinfo = tr.find(self.indexfile)
953 if trinfo == None:
968 if trinfo == None:
954 raise RevlogError(_("%s not found in the transaction")
969 raise RevlogError(_("%s not found in the transaction")
955 % self.indexfile)
970 % self.indexfile)
956
971
957 trindex = trinfo[2]
972 trindex = trinfo[2]
958 dataoff = self.start(trindex)
973 dataoff = self.start(trindex)
959
974
960 tr.add(self.datafile, dataoff)
975 tr.add(self.datafile, dataoff)
961 df = self.opener(self.datafile, 'w')
976 df = self.opener(self.datafile, 'w')
962 calc = self._io.size
977 calc = self._io.size
963 for r in xrange(self.count()):
978 for r in xrange(self.count()):
964 start = self.start(r) + (r + 1) * calc
979 start = self.start(r) + (r + 1) * calc
965 length = self.length(r)
980 length = self.length(r)
966 fp.seek(start)
981 fp.seek(start)
967 d = fp.read(length)
982 d = fp.read(length)
968 df.write(d)
983 df.write(d)
969 fp.close()
984 fp.close()
970 df.close()
985 df.close()
971 fp = self.opener(self.indexfile, 'w', atomictemp=True)
986 fp = self.opener(self.indexfile, 'w', atomictemp=True)
972 self.version &= ~(REVLOGNGINLINEDATA)
987 self.version &= ~(REVLOGNGINLINEDATA)
973 self._inline = False
988 self._inline = False
974 for i in xrange(self.count()):
989 for i in xrange(self.count()):
975 e = self._io.packentry(self.index[i], self.node, self.version, i)
990 e = self._io.packentry(self.index[i], self.node, self.version, i)
976 fp.write(e)
991 fp.write(e)
977
992
978 # if we don't call rename, the temp file will never replace the
993 # if we don't call rename, the temp file will never replace the
979 # real index
994 # real index
980 fp.rename()
995 fp.rename()
981
996
982 tr.replace(self.indexfile, trindex * calc)
997 tr.replace(self.indexfile, trindex * calc)
983 self._chunkcache = None
998 self._chunkcache = None
984
999
985 def addrevision(self, text, transaction, link, p1, p2, d=None):
1000 def addrevision(self, text, transaction, link, p1, p2, d=None):
986 """add a revision to the log
1001 """add a revision to the log
987
1002
988 text - the revision data to add
1003 text - the revision data to add
989 transaction - the transaction object used for rollback
1004 transaction - the transaction object used for rollback
990 link - the linkrev data to add
1005 link - the linkrev data to add
991 p1, p2 - the parent nodeids of the revision
1006 p1, p2 - the parent nodeids of the revision
992 d - an optional precomputed delta
1007 d - an optional precomputed delta
993 """
1008 """
994 dfh = None
1009 dfh = None
995 if not self._inline:
1010 if not self._inline:
996 dfh = self.opener(self.datafile, "a")
1011 dfh = self.opener(self.datafile, "a")
997 ifh = self.opener(self.indexfile, "a+")
1012 ifh = self.opener(self.indexfile, "a+")
998 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1013 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
999
1014
1000 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1015 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1001 node = hash(text, p1, p2)
1016 node = hash(text, p1, p2)
1002 if node in self.nodemap:
1017 if node in self.nodemap:
1003 return node
1018 return node
1004
1019
1005 curr = self.count()
1020 curr = self.count()
1006 prev = curr - 1
1021 prev = curr - 1
1007 base = self.base(prev)
1022 base = self.base(prev)
1008 offset = self.end(prev)
1023 offset = self.end(prev)
1009
1024
1010 if curr:
1025 if curr:
1011 if not d:
1026 if not d:
1012 ptext = self.revision(self.node(prev))
1027 ptext = self.revision(self.node(prev))
1013 d = mdiff.textdiff(ptext, text)
1028 d = mdiff.textdiff(ptext, text)
1014 data = compress(d)
1029 data = compress(d)
1015 l = len(data[1]) + len(data[0])
1030 l = len(data[1]) + len(data[0])
1016 dist = l + offset - self.start(base)
1031 dist = l + offset - self.start(base)
1017
1032
1018 # full versions are inserted when the needed deltas
1033 # full versions are inserted when the needed deltas
1019 # become comparable to the uncompressed text
1034 # become comparable to the uncompressed text
1020 if not curr or dist > len(text) * 2:
1035 if not curr or dist > len(text) * 2:
1021 data = compress(text)
1036 data = compress(text)
1022 l = len(data[1]) + len(data[0])
1037 l = len(data[1]) + len(data[0])
1023 base = curr
1038 base = curr
1024
1039
1025 e = (offset_type(offset, 0), l, len(text),
1040 e = (offset_type(offset, 0), l, len(text),
1026 base, link, self.rev(p1), self.rev(p2), node)
1041 base, link, self.rev(p1), self.rev(p2), node)
1027 self.index.insert(-1, e)
1042 self.index.insert(-1, e)
1028 self.nodemap[node] = curr
1043 self.nodemap[node] = curr
1029
1044
1030 entry = self._io.packentry(e, self.node, self.version, curr)
1045 entry = self._io.packentry(e, self.node, self.version, curr)
1031 if not self._inline:
1046 if not self._inline:
1032 transaction.add(self.datafile, offset)
1047 transaction.add(self.datafile, offset)
1033 transaction.add(self.indexfile, curr * len(entry))
1048 transaction.add(self.indexfile, curr * len(entry))
1034 if data[0]:
1049 if data[0]:
1035 dfh.write(data[0])
1050 dfh.write(data[0])
1036 dfh.write(data[1])
1051 dfh.write(data[1])
1037 dfh.flush()
1052 dfh.flush()
1038 ifh.write(entry)
1053 ifh.write(entry)
1039 else:
1054 else:
1040 offset += curr * self._io.size
1055 offset += curr * self._io.size
1041 transaction.add(self.indexfile, offset, curr)
1056 transaction.add(self.indexfile, offset, curr)
1042 ifh.write(entry)
1057 ifh.write(entry)
1043 ifh.write(data[0])
1058 ifh.write(data[0])
1044 ifh.write(data[1])
1059 ifh.write(data[1])
1045 self.checkinlinesize(transaction, ifh)
1060 self.checkinlinesize(transaction, ifh)
1046
1061
1047 self._cache = (node, curr, text)
1062 self._cache = (node, curr, text)
1048 return node
1063 return node
1049
1064
1050 def ancestor(self, a, b):
1065 def ancestor(self, a, b):
1051 """calculate the least common ancestor of nodes a and b"""
1066 """calculate the least common ancestor of nodes a and b"""
1052
1067
1053 def parents(rev):
1068 def parents(rev):
1054 return [p for p in self.parentrevs(rev) if p != nullrev]
1069 return [p for p in self.parentrevs(rev) if p != nullrev]
1055
1070
1056 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1071 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1057 if c is None:
1072 if c is None:
1058 return nullid
1073 return nullid
1059
1074
1060 return self.node(c)
1075 return self.node(c)
1061
1076
1062 def group(self, nodelist, lookup, infocollect=None):
1077 def group(self, nodelist, lookup, infocollect=None):
1063 """calculate a delta group
1078 """calculate a delta group
1064
1079
1065 Given a list of changeset revs, return a set of deltas and
1080 Given a list of changeset revs, return a set of deltas and
1066 metadata corresponding to nodes. the first delta is
1081 metadata corresponding to nodes. the first delta is
1067 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1082 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1068 have this parent as it has all history before these
1083 have this parent as it has all history before these
1069 changesets. parent is parent[0]
1084 changesets. parent is parent[0]
1070 """
1085 """
1071 revs = [self.rev(n) for n in nodelist]
1086 revs = [self.rev(n) for n in nodelist]
1072
1087
1073 # if we don't have any revisions touched by these changesets, bail
1088 # if we don't have any revisions touched by these changesets, bail
1074 if not revs:
1089 if not revs:
1075 yield changegroup.closechunk()
1090 yield changegroup.closechunk()
1076 return
1091 return
1077
1092
1078 # add the parent of the first rev
1093 # add the parent of the first rev
1079 p = self.parents(self.node(revs[0]))[0]
1094 p = self.parents(self.node(revs[0]))[0]
1080 revs.insert(0, self.rev(p))
1095 revs.insert(0, self.rev(p))
1081
1096
1082 # build deltas
1097 # build deltas
1083 for d in xrange(0, len(revs) - 1):
1098 for d in xrange(0, len(revs) - 1):
1084 a, b = revs[d], revs[d + 1]
1099 a, b = revs[d], revs[d + 1]
1085 nb = self.node(b)
1100 nb = self.node(b)
1086
1101
1087 if infocollect is not None:
1102 if infocollect is not None:
1088 infocollect(nb)
1103 infocollect(nb)
1089
1104
1090 p = self.parents(nb)
1105 p = self.parents(nb)
1091 meta = nb + p[0] + p[1] + lookup(nb)
1106 meta = nb + p[0] + p[1] + lookup(nb)
1092 if a == -1:
1107 if a == -1:
1093 d = self.revision(nb)
1108 d = self.revision(nb)
1094 meta += mdiff.trivialdiffheader(len(d))
1109 meta += mdiff.trivialdiffheader(len(d))
1095 else:
1110 else:
1096 d = self.revdiff(a, b)
1111 d = self.revdiff(a, b)
1097 yield changegroup.chunkheader(len(meta) + len(d))
1112 yield changegroup.chunkheader(len(meta) + len(d))
1098 yield meta
1113 yield meta
1099 if len(d) > 2**20:
1114 if len(d) > 2**20:
1100 pos = 0
1115 pos = 0
1101 while pos < len(d):
1116 while pos < len(d):
1102 pos2 = pos + 2 ** 18
1117 pos2 = pos + 2 ** 18
1103 yield d[pos:pos2]
1118 yield d[pos:pos2]
1104 pos = pos2
1119 pos = pos2
1105 else:
1120 else:
1106 yield d
1121 yield d
1107
1122
1108 yield changegroup.closechunk()
1123 yield changegroup.closechunk()
1109
1124
1110 def addgroup(self, revs, linkmapper, transaction, unique=0):
1125 def addgroup(self, revs, linkmapper, transaction, unique=0):
1111 """
1126 """
1112 add a delta group
1127 add a delta group
1113
1128
1114 given a set of deltas, add them to the revision log. the
1129 given a set of deltas, add them to the revision log. the
1115 first delta is against its parent, which should be in our
1130 first delta is against its parent, which should be in our
1116 log, the rest are against the previous delta.
1131 log, the rest are against the previous delta.
1117 """
1132 """
1118
1133
1119 #track the base of the current delta log
1134 #track the base of the current delta log
1120 r = self.count()
1135 r = self.count()
1121 t = r - 1
1136 t = r - 1
1122 node = None
1137 node = None
1123
1138
1124 base = prev = nullrev
1139 base = prev = nullrev
1125 start = end = textlen = 0
1140 start = end = textlen = 0
1126 if r:
1141 if r:
1127 end = self.end(t)
1142 end = self.end(t)
1128
1143
1129 ifh = self.opener(self.indexfile, "a+")
1144 ifh = self.opener(self.indexfile, "a+")
1130 isize = r * self._io.size
1145 isize = r * self._io.size
1131 if self._inline:
1146 if self._inline:
1132 transaction.add(self.indexfile, end + isize, r)
1147 transaction.add(self.indexfile, end + isize, r)
1133 dfh = None
1148 dfh = None
1134 else:
1149 else:
1135 transaction.add(self.indexfile, isize, r)
1150 transaction.add(self.indexfile, isize, r)
1136 transaction.add(self.datafile, end)
1151 transaction.add(self.datafile, end)
1137 dfh = self.opener(self.datafile, "a")
1152 dfh = self.opener(self.datafile, "a")
1138
1153
1139 # loop through our set of deltas
1154 # loop through our set of deltas
1140 chain = None
1155 chain = None
1141 for chunk in revs:
1156 for chunk in revs:
1142 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1157 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1143 link = linkmapper(cs)
1158 link = linkmapper(cs)
1144 if node in self.nodemap:
1159 if node in self.nodemap:
1145 # this can happen if two branches make the same change
1160 # this can happen if two branches make the same change
1146 # if unique:
1161 # if unique:
1147 # raise RevlogError(_("already have %s") % hex(node[:4]))
1162 # raise RevlogError(_("already have %s") % hex(node[:4]))
1148 chain = node
1163 chain = node
1149 continue
1164 continue
1150 delta = buffer(chunk, 80)
1165 delta = buffer(chunk, 80)
1151 del chunk
1166 del chunk
1152
1167
1153 for p in (p1, p2):
1168 for p in (p1, p2):
1154 if not p in self.nodemap:
1169 if not p in self.nodemap:
1155 raise LookupError(_("unknown parent %s") % short(p))
1170 raise LookupError(_("unknown parent %s") % short(p))
1156
1171
1157 if not chain:
1172 if not chain:
1158 # retrieve the parent revision of the delta chain
1173 # retrieve the parent revision of the delta chain
1159 chain = p1
1174 chain = p1
1160 if not chain in self.nodemap:
1175 if not chain in self.nodemap:
1161 raise LookupError(_("unknown base %s") % short(chain[:4]))
1176 raise LookupError(_("unknown base %s") % short(chain[:4]))
1162
1177
1163 # full versions are inserted when the needed deltas become
1178 # full versions are inserted when the needed deltas become
1164 # comparable to the uncompressed text or when the previous
1179 # comparable to the uncompressed text or when the previous
1165 # version is not the one we have a delta against. We use
1180 # version is not the one we have a delta against. We use
1166 # the size of the previous full rev as a proxy for the
1181 # the size of the previous full rev as a proxy for the
1167 # current size.
1182 # current size.
1168
1183
1169 if chain == prev:
1184 if chain == prev:
1170 cdelta = compress(delta)
1185 cdelta = compress(delta)
1171 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1186 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1172 textlen = mdiff.patchedsize(textlen, delta)
1187 textlen = mdiff.patchedsize(textlen, delta)
1173
1188
1174 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1189 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1175 # flush our writes here so we can read it in revision
1190 # flush our writes here so we can read it in revision
1176 if dfh:
1191 if dfh:
1177 dfh.flush()
1192 dfh.flush()
1178 ifh.flush()
1193 ifh.flush()
1179 text = self.revision(chain)
1194 text = self.revision(chain)
1180 if len(text) == 0:
1195 if len(text) == 0:
1181 # skip over trivial delta header
1196 # skip over trivial delta header
1182 text = buffer(delta, 12)
1197 text = buffer(delta, 12)
1183 else:
1198 else:
1184 text = mdiff.patches(text, [delta])
1199 text = mdiff.patches(text, [delta])
1185 del delta
1200 del delta
1186 chk = self._addrevision(text, transaction, link, p1, p2, None,
1201 chk = self._addrevision(text, transaction, link, p1, p2, None,
1187 ifh, dfh)
1202 ifh, dfh)
1188 if not dfh and not self._inline:
1203 if not dfh and not self._inline:
1189 # addrevision switched from inline to conventional
1204 # addrevision switched from inline to conventional
1190 # reopen the index
1205 # reopen the index
1191 dfh = self.opener(self.datafile, "a")
1206 dfh = self.opener(self.datafile, "a")
1192 ifh = self.opener(self.indexfile, "a")
1207 ifh = self.opener(self.indexfile, "a")
1193 if chk != node:
1208 if chk != node:
1194 raise RevlogError(_("consistency error adding group"))
1209 raise RevlogError(_("consistency error adding group"))
1195 textlen = len(text)
1210 textlen = len(text)
1196 else:
1211 else:
1197 e = (offset_type(end, 0), cdeltalen, textlen, base,
1212 e = (offset_type(end, 0), cdeltalen, textlen, base,
1198 link, self.rev(p1), self.rev(p2), node)
1213 link, self.rev(p1), self.rev(p2), node)
1199 self.index.insert(-1, e)
1214 self.index.insert(-1, e)
1200 self.nodemap[node] = r
1215 self.nodemap[node] = r
1201 entry = self._io.packentry(e, self.node, self.version, r)
1216 entry = self._io.packentry(e, self.node, self.version, r)
1202 if self._inline:
1217 if self._inline:
1203 ifh.write(entry)
1218 ifh.write(entry)
1204 ifh.write(cdelta[0])
1219 ifh.write(cdelta[0])
1205 ifh.write(cdelta[1])
1220 ifh.write(cdelta[1])
1206 self.checkinlinesize(transaction, ifh)
1221 self.checkinlinesize(transaction, ifh)
1207 if not self._inline:
1222 if not self._inline:
1208 dfh = self.opener(self.datafile, "a")
1223 dfh = self.opener(self.datafile, "a")
1209 ifh = self.opener(self.indexfile, "a")
1224 ifh = self.opener(self.indexfile, "a")
1210 else:
1225 else:
1211 dfh.write(cdelta[0])
1226 dfh.write(cdelta[0])
1212 dfh.write(cdelta[1])
1227 dfh.write(cdelta[1])
1213 ifh.write(entry)
1228 ifh.write(entry)
1214
1229
1215 t, r, chain, prev = r, r + 1, node, node
1230 t, r, chain, prev = r, r + 1, node, node
1216 base = self.base(t)
1231 base = self.base(t)
1217 start = self.start(base)
1232 start = self.start(base)
1218 end = self.end(t)
1233 end = self.end(t)
1219
1234
1220 return node
1235 return node
1221
1236
1222 def strip(self, rev, minlink):
1237 def strip(self, rev, minlink):
1223 if self.count() == 0 or rev >= self.count():
1238 if self.count() == 0 or rev >= self.count():
1224 return
1239 return
1225
1240
1226 if isinstance(self.index, lazyindex):
1241 if isinstance(self.index, lazyindex):
1227 self._loadindexmap()
1242 self._loadindexmap()
1228
1243
1229 # When stripping away a revision, we need to make sure it
1244 # When stripping away a revision, we need to make sure it
1230 # does not actually belong to an older changeset.
1245 # does not actually belong to an older changeset.
1231 # The minlink parameter defines the oldest revision
1246 # The minlink parameter defines the oldest revision
1232 # we're allowed to strip away.
1247 # we're allowed to strip away.
1233 while minlink > self.index[rev][4]:
1248 while minlink > self.index[rev][4]:
1234 rev += 1
1249 rev += 1
1235 if rev >= self.count():
1250 if rev >= self.count():
1236 return
1251 return
1237
1252
1238 # first truncate the files on disk
1253 # first truncate the files on disk
1239 end = self.start(rev)
1254 end = self.start(rev)
1240 if not self._inline:
1255 if not self._inline:
1241 df = self.opener(self.datafile, "a")
1256 df = self.opener(self.datafile, "a")
1242 df.truncate(end)
1257 df.truncate(end)
1243 end = rev * self._io.size
1258 end = rev * self._io.size
1244 else:
1259 else:
1245 end += rev * self._io.size
1260 end += rev * self._io.size
1246
1261
1247 indexf = self.opener(self.indexfile, "a")
1262 indexf = self.opener(self.indexfile, "a")
1248 indexf.truncate(end)
1263 indexf.truncate(end)
1249
1264
1250 # then reset internal state in memory to forget those revisions
1265 # then reset internal state in memory to forget those revisions
1251 self._cache = None
1266 self._cache = None
1252 self._chunkcache = None
1267 self._chunkcache = None
1253 for x in xrange(rev, self.count()):
1268 for x in xrange(rev, self.count()):
1254 del self.nodemap[self.node(x)]
1269 del self.nodemap[self.node(x)]
1255
1270
1256 del self.index[rev:-1]
1271 del self.index[rev:-1]
1257
1272
1258 def checksize(self):
1273 def checksize(self):
1259 expected = 0
1274 expected = 0
1260 if self.count():
1275 if self.count():
1261 expected = max(0, self.end(self.count() - 1))
1276 expected = max(0, self.end(self.count() - 1))
1262
1277
1263 try:
1278 try:
1264 f = self.opener(self.datafile)
1279 f = self.opener(self.datafile)
1265 f.seek(0, 2)
1280 f.seek(0, 2)
1266 actual = f.tell()
1281 actual = f.tell()
1267 dd = actual - expected
1282 dd = actual - expected
1268 except IOError, inst:
1283 except IOError, inst:
1269 if inst.errno != errno.ENOENT:
1284 if inst.errno != errno.ENOENT:
1270 raise
1285 raise
1271 dd = 0
1286 dd = 0
1272
1287
1273 try:
1288 try:
1274 f = self.opener(self.indexfile)
1289 f = self.opener(self.indexfile)
1275 f.seek(0, 2)
1290 f.seek(0, 2)
1276 actual = f.tell()
1291 actual = f.tell()
1277 s = self._io.size
1292 s = self._io.size
1278 i = max(0, actual / s)
1293 i = max(0, actual / s)
1279 di = actual - (i * s)
1294 di = actual - (i * s)
1280 if self._inline:
1295 if self._inline:
1281 databytes = 0
1296 databytes = 0
1282 for r in xrange(self.count()):
1297 for r in xrange(self.count()):
1283 databytes += max(0, self.length(r))
1298 databytes += max(0, self.length(r))
1284 dd = 0
1299 dd = 0
1285 di = actual - self.count() * s - databytes
1300 di = actual - self.count() * s - databytes
1286 except IOError, inst:
1301 except IOError, inst:
1287 if inst.errno != errno.ENOENT:
1302 if inst.errno != errno.ENOENT:
1288 raise
1303 raise
1289 di = 0
1304 di = 0
1290
1305
1291 return (dd, di)
1306 return (dd, di)
General Comments 0
You need to be logged in to leave comments. Login now