##// END OF EJS Templates
use the new parseindex implementation C in parsers
Bernhard Leiner -
r7109:528b7fc1 default
parent child Browse files
Show More
@@ -1,1361 +1,1334 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import bin, hex, nullid, nullrev, short
13 from node import bin, hex, nullid, nullrev, short
14 from i18n import _
14 from i18n import _
15 import changegroup, errno, ancestor, mdiff
15 import changegroup, errno, ancestor, mdiff, parsers
16 import struct, util, zlib
16 import struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = util.sha1
22 _sha = util.sha1
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34
34
35 class LookupError(RevlogError, KeyError):
35 class LookupError(RevlogError, KeyError):
36 def __init__(self, name, index, message):
36 def __init__(self, name, index, message):
37 self.name = name
37 self.name = name
38 if isinstance(name, str) and len(name) == 20:
38 if isinstance(name, str) and len(name) == 20:
39 name = short(name)
39 name = short(name)
40 RevlogError.__init__(self, _('%s@%s: %s') % (index, name, message))
40 RevlogError.__init__(self, _('%s@%s: %s') % (index, name, message))
41
41
42 def __str__(self):
42 def __str__(self):
43 return RevlogError.__str__(self)
43 return RevlogError.__str__(self)
44
44
45 def getoffset(q):
45 def getoffset(q):
46 return int(q >> 16)
46 return int(q >> 16)
47
47
48 def gettype(q):
48 def gettype(q):
49 return int(q & 0xFFFF)
49 return int(q & 0xFFFF)
50
50
51 def offset_type(offset, type):
51 def offset_type(offset, type):
52 return long(long(offset) << 16 | type)
52 return long(long(offset) << 16 | type)
53
53
54 def hash(text, p1, p2):
54 def hash(text, p1, p2):
55 """generate a hash from the given text and its parent hashes
55 """generate a hash from the given text and its parent hashes
56
56
57 This hash combines both the current file contents and its history
57 This hash combines both the current file contents and its history
58 in a manner that makes it easy to distinguish nodes with the same
58 in a manner that makes it easy to distinguish nodes with the same
59 content in the revision graph.
59 content in the revision graph.
60 """
60 """
61 l = [p1, p2]
61 l = [p1, p2]
62 l.sort()
62 l.sort()
63 s = _sha(l[0])
63 s = _sha(l[0])
64 s.update(l[1])
64 s.update(l[1])
65 s.update(text)
65 s.update(text)
66 return s.digest()
66 return s.digest()
67
67
68 def compress(text):
68 def compress(text):
69 """ generate a possibly-compressed representation of text """
69 """ generate a possibly-compressed representation of text """
70 if not text:
70 if not text:
71 return ("", text)
71 return ("", text)
72 l = len(text)
72 l = len(text)
73 bin = None
73 bin = None
74 if l < 44:
74 if l < 44:
75 pass
75 pass
76 elif l > 1000000:
76 elif l > 1000000:
77 # zlib makes an internal copy, thus doubling memory usage for
77 # zlib makes an internal copy, thus doubling memory usage for
78 # large files, so lets do this in pieces
78 # large files, so lets do this in pieces
79 z = zlib.compressobj()
79 z = zlib.compressobj()
80 p = []
80 p = []
81 pos = 0
81 pos = 0
82 while pos < l:
82 while pos < l:
83 pos2 = pos + 2**20
83 pos2 = pos + 2**20
84 p.append(z.compress(text[pos:pos2]))
84 p.append(z.compress(text[pos:pos2]))
85 pos = pos2
85 pos = pos2
86 p.append(z.flush())
86 p.append(z.flush())
87 if sum(map(len, p)) < l:
87 if sum(map(len, p)) < l:
88 bin = "".join(p)
88 bin = "".join(p)
89 else:
89 else:
90 bin = _compress(text)
90 bin = _compress(text)
91 if bin is None or len(bin) > l:
91 if bin is None or len(bin) > l:
92 if text[0] == '\0':
92 if text[0] == '\0':
93 return ("", text)
93 return ("", text)
94 return ('u', text)
94 return ('u', text)
95 return ("", bin)
95 return ("", bin)
96
96
97 def decompress(bin):
97 def decompress(bin):
98 """ decompress the given input """
98 """ decompress the given input """
99 if not bin:
99 if not bin:
100 return bin
100 return bin
101 t = bin[0]
101 t = bin[0]
102 if t == '\0':
102 if t == '\0':
103 return bin
103 return bin
104 if t == 'x':
104 if t == 'x':
105 return _decompress(bin)
105 return _decompress(bin)
106 if t == 'u':
106 if t == 'u':
107 return bin[1:]
107 return bin[1:]
108 raise RevlogError(_("unknown compression type %r") % t)
108 raise RevlogError(_("unknown compression type %r") % t)
109
109
110 class lazyparser(object):
110 class lazyparser(object):
111 """
111 """
112 this class avoids the need to parse the entirety of large indices
112 this class avoids the need to parse the entirety of large indices
113 """
113 """
114
114
115 # lazyparser is not safe to use on windows if win32 extensions not
115 # lazyparser is not safe to use on windows if win32 extensions not
116 # available. it keeps file handle open, which make it not possible
116 # available. it keeps file handle open, which make it not possible
117 # to break hardlinks on local cloned repos.
117 # to break hardlinks on local cloned repos.
118
118
119 def __init__(self, dataf, size):
119 def __init__(self, dataf, size):
120 self.dataf = dataf
120 self.dataf = dataf
121 self.s = struct.calcsize(indexformatng)
121 self.s = struct.calcsize(indexformatng)
122 self.datasize = size
122 self.datasize = size
123 self.l = size/self.s
123 self.l = size/self.s
124 self.index = [None] * self.l
124 self.index = [None] * self.l
125 self.map = {nullid: nullrev}
125 self.map = {nullid: nullrev}
126 self.allmap = 0
126 self.allmap = 0
127 self.all = 0
127 self.all = 0
128 self.mapfind_count = 0
128 self.mapfind_count = 0
129
129
130 def loadmap(self):
130 def loadmap(self):
131 """
131 """
132 during a commit, we need to make sure the rev being added is
132 during a commit, we need to make sure the rev being added is
133 not a duplicate. This requires loading the entire index,
133 not a duplicate. This requires loading the entire index,
134 which is fairly slow. loadmap can load up just the node map,
134 which is fairly slow. loadmap can load up just the node map,
135 which takes much less time.
135 which takes much less time.
136 """
136 """
137 if self.allmap:
137 if self.allmap:
138 return
138 return
139 end = self.datasize
139 end = self.datasize
140 self.allmap = 1
140 self.allmap = 1
141 cur = 0
141 cur = 0
142 count = 0
142 count = 0
143 blocksize = self.s * 256
143 blocksize = self.s * 256
144 self.dataf.seek(0)
144 self.dataf.seek(0)
145 while cur < end:
145 while cur < end:
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 off = 0
147 off = 0
148 for x in xrange(256):
148 for x in xrange(256):
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
150 self.map[n] = count
150 self.map[n] = count
151 count += 1
151 count += 1
152 if count >= self.l:
152 if count >= self.l:
153 break
153 break
154 off += self.s
154 off += self.s
155 cur += blocksize
155 cur += blocksize
156
156
157 def loadblock(self, blockstart, blocksize, data=None):
157 def loadblock(self, blockstart, blocksize, data=None):
158 if self.all:
158 if self.all:
159 return
159 return
160 if data is None:
160 if data is None:
161 self.dataf.seek(blockstart)
161 self.dataf.seek(blockstart)
162 if blockstart + blocksize > self.datasize:
162 if blockstart + blocksize > self.datasize:
163 # the revlog may have grown since we've started running,
163 # the revlog may have grown since we've started running,
164 # but we don't have space in self.index for more entries.
164 # but we don't have space in self.index for more entries.
165 # limit blocksize so that we don't get too much data.
165 # limit blocksize so that we don't get too much data.
166 blocksize = max(self.datasize - blockstart, 0)
166 blocksize = max(self.datasize - blockstart, 0)
167 data = self.dataf.read(blocksize)
167 data = self.dataf.read(blocksize)
168 lend = len(data) / self.s
168 lend = len(data) / self.s
169 i = blockstart / self.s
169 i = blockstart / self.s
170 off = 0
170 off = 0
171 # lazyindex supports __delitem__
171 # lazyindex supports __delitem__
172 if lend > len(self.index) - i:
172 if lend > len(self.index) - i:
173 lend = len(self.index) - i
173 lend = len(self.index) - i
174 for x in xrange(lend):
174 for x in xrange(lend):
175 if self.index[i + x] == None:
175 if self.index[i + x] == None:
176 b = data[off : off + self.s]
176 b = data[off : off + self.s]
177 self.index[i + x] = b
177 self.index[i + x] = b
178 n = b[ngshaoffset:ngshaoffset + 20]
178 n = b[ngshaoffset:ngshaoffset + 20]
179 self.map[n] = i + x
179 self.map[n] = i + x
180 off += self.s
180 off += self.s
181
181
182 def findnode(self, node):
182 def findnode(self, node):
183 """search backwards through the index file for a specific node"""
183 """search backwards through the index file for a specific node"""
184 if self.allmap:
184 if self.allmap:
185 return None
185 return None
186
186
187 # hg log will cause many many searches for the manifest
187 # hg log will cause many many searches for the manifest
188 # nodes. After we get called a few times, just load the whole
188 # nodes. After we get called a few times, just load the whole
189 # thing.
189 # thing.
190 if self.mapfind_count > 8:
190 if self.mapfind_count > 8:
191 self.loadmap()
191 self.loadmap()
192 if node in self.map:
192 if node in self.map:
193 return node
193 return node
194 return None
194 return None
195 self.mapfind_count += 1
195 self.mapfind_count += 1
196 last = self.l - 1
196 last = self.l - 1
197 while self.index[last] != None:
197 while self.index[last] != None:
198 if last == 0:
198 if last == 0:
199 self.all = 1
199 self.all = 1
200 self.allmap = 1
200 self.allmap = 1
201 return None
201 return None
202 last -= 1
202 last -= 1
203 end = (last + 1) * self.s
203 end = (last + 1) * self.s
204 blocksize = self.s * 256
204 blocksize = self.s * 256
205 while end >= 0:
205 while end >= 0:
206 start = max(end - blocksize, 0)
206 start = max(end - blocksize, 0)
207 self.dataf.seek(start)
207 self.dataf.seek(start)
208 data = self.dataf.read(end - start)
208 data = self.dataf.read(end - start)
209 findend = end - start
209 findend = end - start
210 while True:
210 while True:
211 # we're searching backwards, so we have to make sure
211 # we're searching backwards, so we have to make sure
212 # we don't find a changeset where this node is a parent
212 # we don't find a changeset where this node is a parent
213 off = data.find(node, 0, findend)
213 off = data.find(node, 0, findend)
214 findend = off
214 findend = off
215 if off >= 0:
215 if off >= 0:
216 i = off / self.s
216 i = off / self.s
217 off = i * self.s
217 off = i * self.s
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
219 if n == node:
219 if n == node:
220 self.map[n] = i + start / self.s
220 self.map[n] = i + start / self.s
221 return node
221 return node
222 else:
222 else:
223 break
223 break
224 end -= blocksize
224 end -= blocksize
225 return None
225 return None
226
226
227 def loadindex(self, i=None, end=None):
227 def loadindex(self, i=None, end=None):
228 if self.all:
228 if self.all:
229 return
229 return
230 all = False
230 all = False
231 if i == None:
231 if i == None:
232 blockstart = 0
232 blockstart = 0
233 blocksize = (65536 / self.s) * self.s
233 blocksize = (65536 / self.s) * self.s
234 end = self.datasize
234 end = self.datasize
235 all = True
235 all = True
236 else:
236 else:
237 if end:
237 if end:
238 blockstart = i * self.s
238 blockstart = i * self.s
239 end = end * self.s
239 end = end * self.s
240 blocksize = end - blockstart
240 blocksize = end - blockstart
241 else:
241 else:
242 blockstart = (i & ~1023) * self.s
242 blockstart = (i & ~1023) * self.s
243 blocksize = self.s * 1024
243 blocksize = self.s * 1024
244 end = blockstart + blocksize
244 end = blockstart + blocksize
245 while blockstart < end:
245 while blockstart < end:
246 self.loadblock(blockstart, blocksize)
246 self.loadblock(blockstart, blocksize)
247 blockstart += blocksize
247 blockstart += blocksize
248 if all:
248 if all:
249 self.all = True
249 self.all = True
250
250
251 class lazyindex(object):
251 class lazyindex(object):
252 """a lazy version of the index array"""
252 """a lazy version of the index array"""
253 def __init__(self, parser):
253 def __init__(self, parser):
254 self.p = parser
254 self.p = parser
255 def __len__(self):
255 def __len__(self):
256 return len(self.p.index)
256 return len(self.p.index)
257 def load(self, pos):
257 def load(self, pos):
258 if pos < 0:
258 if pos < 0:
259 pos += len(self.p.index)
259 pos += len(self.p.index)
260 self.p.loadindex(pos)
260 self.p.loadindex(pos)
261 return self.p.index[pos]
261 return self.p.index[pos]
262 def __getitem__(self, pos):
262 def __getitem__(self, pos):
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
264 def __setitem__(self, pos, item):
264 def __setitem__(self, pos, item):
265 self.p.index[pos] = _pack(indexformatng, *item)
265 self.p.index[pos] = _pack(indexformatng, *item)
266 def __delitem__(self, pos):
266 def __delitem__(self, pos):
267 del self.p.index[pos]
267 del self.p.index[pos]
268 def insert(self, pos, e):
268 def insert(self, pos, e):
269 self.p.index.insert(pos, _pack(indexformatng, *e))
269 self.p.index.insert(pos, _pack(indexformatng, *e))
270 def append(self, e):
270 def append(self, e):
271 self.p.index.append(_pack(indexformatng, *e))
271 self.p.index.append(_pack(indexformatng, *e))
272
272
273 class lazymap(object):
273 class lazymap(object):
274 """a lazy version of the node map"""
274 """a lazy version of the node map"""
275 def __init__(self, parser):
275 def __init__(self, parser):
276 self.p = parser
276 self.p = parser
277 def load(self, key):
277 def load(self, key):
278 n = self.p.findnode(key)
278 n = self.p.findnode(key)
279 if n == None:
279 if n == None:
280 raise KeyError(key)
280 raise KeyError(key)
281 def __contains__(self, key):
281 def __contains__(self, key):
282 if key in self.p.map:
282 if key in self.p.map:
283 return True
283 return True
284 self.p.loadmap()
284 self.p.loadmap()
285 return key in self.p.map
285 return key in self.p.map
286 def __iter__(self):
286 def __iter__(self):
287 yield nullid
287 yield nullid
288 for i in xrange(self.p.l):
288 for i in xrange(self.p.l):
289 ret = self.p.index[i]
289 ret = self.p.index[i]
290 if not ret:
290 if not ret:
291 self.p.loadindex(i)
291 self.p.loadindex(i)
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if isinstance(ret, str):
293 if isinstance(ret, str):
294 ret = _unpack(indexformatng, ret)
294 ret = _unpack(indexformatng, ret)
295 yield ret[7]
295 yield ret[7]
296 def __getitem__(self, key):
296 def __getitem__(self, key):
297 try:
297 try:
298 return self.p.map[key]
298 return self.p.map[key]
299 except KeyError:
299 except KeyError:
300 try:
300 try:
301 self.load(key)
301 self.load(key)
302 return self.p.map[key]
302 return self.p.map[key]
303 except KeyError:
303 except KeyError:
304 raise KeyError("node " + hex(key))
304 raise KeyError("node " + hex(key))
305 def __setitem__(self, key, val):
305 def __setitem__(self, key, val):
306 self.p.map[key] = val
306 self.p.map[key] = val
307 def __delitem__(self, key):
307 def __delitem__(self, key):
308 del self.p.map[key]
308 del self.p.map[key]
309
309
310 indexformatv0 = ">4l20s20s20s"
310 indexformatv0 = ">4l20s20s20s"
311 v0shaoffset = 56
311 v0shaoffset = 56
312
312
313 class revlogoldio(object):
313 class revlogoldio(object):
314 def __init__(self):
314 def __init__(self):
315 self.size = struct.calcsize(indexformatv0)
315 self.size = struct.calcsize(indexformatv0)
316
316
317 def parseindex(self, fp, inline):
317 def parseindex(self, fp, inline):
318 s = self.size
318 s = self.size
319 index = []
319 index = []
320 nodemap = {nullid: nullrev}
320 nodemap = {nullid: nullrev}
321 n = off = 0
321 n = off = 0
322 data = fp.read()
322 data = fp.read()
323 l = len(data)
323 l = len(data)
324 while off + s <= l:
324 while off + s <= l:
325 cur = data[off:off + s]
325 cur = data[off:off + s]
326 off += s
326 off += s
327 e = _unpack(indexformatv0, cur)
327 e = _unpack(indexformatv0, cur)
328 # transform to revlogv1 format
328 # transform to revlogv1 format
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
331 index.append(e2)
331 index.append(e2)
332 nodemap[e[6]] = n
332 nodemap[e[6]] = n
333 n += 1
333 n += 1
334
334
335 return index, nodemap, None
335 return index, nodemap, None
336
336
337 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
339 node(entry[5]), node(entry[6]), entry[7])
339 node(entry[5]), node(entry[6]), entry[7])
340 return _pack(indexformatv0, *e2)
340 return _pack(indexformatv0, *e2)
341
341
342 # index ng:
342 # index ng:
343 # 6 bytes offset
343 # 6 bytes offset
344 # 2 bytes flags
344 # 2 bytes flags
345 # 4 bytes compressed length
345 # 4 bytes compressed length
346 # 4 bytes uncompressed length
346 # 4 bytes uncompressed length
347 # 4 bytes: base rev
347 # 4 bytes: base rev
348 # 4 bytes link rev
348 # 4 bytes link rev
349 # 4 bytes parent 1 rev
349 # 4 bytes parent 1 rev
350 # 4 bytes parent 2 rev
350 # 4 bytes parent 2 rev
351 # 32 bytes: nodeid
351 # 32 bytes: nodeid
352 indexformatng = ">Qiiiiii20s12x"
352 indexformatng = ">Qiiiiii20s12x"
353 ngshaoffset = 32
353 ngshaoffset = 32
354 versionformat = ">I"
354 versionformat = ">I"
355
355
356 class revlogio(object):
356 class revlogio(object):
357 def __init__(self):
357 def __init__(self):
358 self.size = struct.calcsize(indexformatng)
358 self.size = struct.calcsize(indexformatng)
359
359
360 def parseindex(self, fp, inline):
360 def parseindex(self, fp, inline):
361 try:
361 try:
362 size = util.fstat(fp).st_size
362 size = util.fstat(fp).st_size
363 except AttributeError:
363 except AttributeError:
364 size = 0
364 size = 0
365
365
366 if util.openhardlinks() and not inline and size > 1000000:
366 if util.openhardlinks() and not inline and size > 1000000:
367 # big index, let's parse it on demand
367 # big index, let's parse it on demand
368 parser = lazyparser(fp, size)
368 parser = lazyparser(fp, size)
369 index = lazyindex(parser)
369 index = lazyindex(parser)
370 nodemap = lazymap(parser)
370 nodemap = lazymap(parser)
371 e = list(index[0])
371 e = list(index[0])
372 type = gettype(e[0])
372 type = gettype(e[0])
373 e[0] = offset_type(0, type)
373 e[0] = offset_type(0, type)
374 index[0] = e
374 index[0] = e
375 return index, nodemap, None
375 return index, nodemap, None
376
376
377 s = self.size
378 cache = None
379 index = []
380 nodemap = {nullid: nullrev}
381 n = off = 0
382 # if we're not using lazymap, always read the whole index
383 data = fp.read()
377 data = fp.read()
384 l = len(data) - s
378 # call the C implementation to parse the index data
385 append = index.append
379 index, nodemap, cache = parsers.parse_index(data, inline)
386 if inline:
387 cache = (0, data)
388 while off <= l:
389 e = _unpack(indexformatng, data[off:off + s])
390 nodemap[e[7]] = n
391 append(e)
392 n += 1
393 if e[1] < 0:
394 break
395 off += e[1] + s
396 else:
397 while off <= l:
398 e = _unpack(indexformatng, data[off:off + s])
399 nodemap[e[7]] = n
400 append(e)
401 n += 1
402 off += s
403
404 e = list(index[0])
405 type = gettype(e[0])
406 e[0] = offset_type(0, type)
407 index[0] = e
408
409 return index, nodemap, cache
380 return index, nodemap, cache
410
381
411 def packentry(self, entry, node, version, rev):
382 def packentry(self, entry, node, version, rev):
412 p = _pack(indexformatng, *entry)
383 p = _pack(indexformatng, *entry)
413 if rev == 0:
384 if rev == 0:
414 p = _pack(versionformat, version) + p[4:]
385 p = _pack(versionformat, version) + p[4:]
415 return p
386 return p
416
387
417 class revlog(object):
388 class revlog(object):
418 """
389 """
419 the underlying revision storage object
390 the underlying revision storage object
420
391
421 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
422
393
423 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
424 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
425 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
426 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
427 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
428 data.
399 data.
429
400
430 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
431 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
432 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
433 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
434 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
435 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
436
407
437 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
438 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
439 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
440 for locking while reading.
411 for locking while reading.
441 """
412 """
442 def __init__(self, opener, indexfile):
413 def __init__(self, opener, indexfile):
443 """
414 """
444 create a revlog object
415 create a revlog object
445
416
446 opener is a function that abstracts the file opening operation
417 opener is a function that abstracts the file opening operation
447 and can be used to implement COW semantics or the like.
418 and can be used to implement COW semantics or the like.
448 """
419 """
449 self.indexfile = indexfile
420 self.indexfile = indexfile
450 self.datafile = indexfile[:-2] + ".d"
421 self.datafile = indexfile[:-2] + ".d"
451 self.opener = opener
422 self.opener = opener
452 self._cache = None
423 self._cache = None
453 self._chunkcache = None
424 self._chunkcache = None
454 self.nodemap = {nullid: nullrev}
425 self.nodemap = {nullid: nullrev}
455 self.index = []
426 self.index = []
456
427
457 v = REVLOG_DEFAULT_VERSION
428 v = REVLOG_DEFAULT_VERSION
458 if hasattr(opener, "defversion"):
429 if hasattr(opener, "defversion"):
459 v = opener.defversion
430 v = opener.defversion
460 if v & REVLOGNG:
431 if v & REVLOGNG:
461 v |= REVLOGNGINLINEDATA
432 v |= REVLOGNGINLINEDATA
462
433
463 i = ""
434 i = ""
464 try:
435 try:
465 f = self.opener(self.indexfile)
436 f = self.opener(self.indexfile)
466 i = f.read(4)
437 i = f.read(4)
467 f.seek(0)
438 f.seek(0)
468 if len(i) > 0:
439 if len(i) > 0:
469 v = struct.unpack(versionformat, i)[0]
440 v = struct.unpack(versionformat, i)[0]
470 except IOError, inst:
441 except IOError, inst:
471 if inst.errno != errno.ENOENT:
442 if inst.errno != errno.ENOENT:
472 raise
443 raise
473
444
474 self.version = v
445 self.version = v
475 self._inline = v & REVLOGNGINLINEDATA
446 self._inline = v & REVLOGNGINLINEDATA
476 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
477 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
478 if fmt == REVLOGV0 and flags:
449 if fmt == REVLOGV0 and flags:
479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
480 % (self.indexfile, flags >> 16))
451 % (self.indexfile, flags >> 16))
481 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
483 % (self.indexfile, flags >> 16))
454 % (self.indexfile, flags >> 16))
484 elif fmt > REVLOGNG:
455 elif fmt > REVLOGNG:
485 raise RevlogError(_("index %s unknown format %d")
456 raise RevlogError(_("index %s unknown format %d")
486 % (self.indexfile, fmt))
457 % (self.indexfile, fmt))
487
458
488 self._io = revlogio()
459 self._io = revlogio()
489 if self.version == REVLOGV0:
460 if self.version == REVLOGV0:
490 self._io = revlogoldio()
461 self._io = revlogoldio()
491 if i:
462 if i:
492 d = self._io.parseindex(f, self._inline)
463 d = self._io.parseindex(f, self._inline)
493 self.index, self.nodemap, self._chunkcache = d
464 self.index, self.nodemap, self._chunkcache = d
494
465
495 # add the magic null revision at -1
466 # add the magic null revision at -1 (if it hasn't been done already)
496 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
467 if (self.index == [] or isinstance(self.index, lazyindex) or
468 self.index[-1][7] != nullid) :
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
497
470
498 def _loadindex(self, start, end):
471 def _loadindex(self, start, end):
499 """load a block of indexes all at once from the lazy parser"""
472 """load a block of indexes all at once from the lazy parser"""
500 if isinstance(self.index, lazyindex):
473 if isinstance(self.index, lazyindex):
501 self.index.p.loadindex(start, end)
474 self.index.p.loadindex(start, end)
502
475
503 def _loadindexmap(self):
476 def _loadindexmap(self):
504 """loads both the map and the index from the lazy parser"""
477 """loads both the map and the index from the lazy parser"""
505 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
506 p = self.index.p
479 p = self.index.p
507 p.loadindex()
480 p.loadindex()
508 self.nodemap = p.map
481 self.nodemap = p.map
509
482
510 def _loadmap(self):
483 def _loadmap(self):
511 """loads the map from the lazy parser"""
484 """loads the map from the lazy parser"""
512 if isinstance(self.nodemap, lazymap):
485 if isinstance(self.nodemap, lazymap):
513 self.nodemap.p.loadmap()
486 self.nodemap.p.loadmap()
514 self.nodemap = self.nodemap.p.map
487 self.nodemap = self.nodemap.p.map
515
488
516 def tip(self):
489 def tip(self):
517 return self.node(len(self.index) - 2)
490 return self.node(len(self.index) - 2)
518 def __len__(self):
491 def __len__(self):
519 return len(self.index) - 1
492 return len(self.index) - 1
520 def __iter__(self):
493 def __iter__(self):
521 for i in xrange(len(self)):
494 for i in xrange(len(self)):
522 yield i
495 yield i
523 def rev(self, node):
496 def rev(self, node):
524 try:
497 try:
525 return self.nodemap[node]
498 return self.nodemap[node]
526 except KeyError:
499 except KeyError:
527 raise LookupError(node, self.indexfile, _('no node'))
500 raise LookupError(node, self.indexfile, _('no node'))
528 def node(self, rev):
501 def node(self, rev):
529 return self.index[rev][7]
502 return self.index[rev][7]
530 def linkrev(self, node):
503 def linkrev(self, node):
531 return self.index[self.rev(node)][4]
504 return self.index[self.rev(node)][4]
532 def parents(self, node):
505 def parents(self, node):
533 d = self.index[self.rev(node)][5:7]
506 d = self.index[self.rev(node)][5:7]
534 return (self.node(d[0]), self.node(d[1]))
507 return (self.node(d[0]), self.node(d[1]))
535 def parentrevs(self, rev):
508 def parentrevs(self, rev):
536 return self.index[rev][5:7]
509 return self.index[rev][5:7]
537 def start(self, rev):
510 def start(self, rev):
538 return int(self.index[rev][0] >> 16)
511 return int(self.index[rev][0] >> 16)
539 def end(self, rev):
512 def end(self, rev):
540 return self.start(rev) + self.length(rev)
513 return self.start(rev) + self.length(rev)
541 def length(self, rev):
514 def length(self, rev):
542 return self.index[rev][1]
515 return self.index[rev][1]
543 def base(self, rev):
516 def base(self, rev):
544 return self.index[rev][3]
517 return self.index[rev][3]
545
518
546 def size(self, rev):
519 def size(self, rev):
547 """return the length of the uncompressed text for a given revision"""
520 """return the length of the uncompressed text for a given revision"""
548 l = self.index[rev][2]
521 l = self.index[rev][2]
549 if l >= 0:
522 if l >= 0:
550 return l
523 return l
551
524
552 t = self.revision(self.node(rev))
525 t = self.revision(self.node(rev))
553 return len(t)
526 return len(t)
554
527
555 # alternate implementation, The advantage to this code is it
528 # alternate implementation, The advantage to this code is it
556 # will be faster for a single revision. But, the results are not
529 # will be faster for a single revision. But, the results are not
557 # cached, so finding the size of every revision will be slower.
530 # cached, so finding the size of every revision will be slower.
558 """
531 """
559 if self.cache and self.cache[1] == rev:
532 if self.cache and self.cache[1] == rev:
560 return len(self.cache[2])
533 return len(self.cache[2])
561
534
562 base = self.base(rev)
535 base = self.base(rev)
563 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
536 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
564 base = self.cache[1]
537 base = self.cache[1]
565 text = self.cache[2]
538 text = self.cache[2]
566 else:
539 else:
567 text = self.revision(self.node(base))
540 text = self.revision(self.node(base))
568
541
569 l = len(text)
542 l = len(text)
570 for x in xrange(base + 1, rev + 1):
543 for x in xrange(base + 1, rev + 1):
571 l = mdiff.patchedsize(l, self.chunk(x))
544 l = mdiff.patchedsize(l, self.chunk(x))
572 return l
545 return l
573 """
546 """
574
547
575 def reachable(self, node, stop=None):
548 def reachable(self, node, stop=None):
576 """return a hash of all nodes ancestral to a given node, including
549 """return a hash of all nodes ancestral to a given node, including
577 the node itself, stopping when stop is matched"""
550 the node itself, stopping when stop is matched"""
578 reachable = {}
551 reachable = {}
579 visit = [node]
552 visit = [node]
580 reachable[node] = 1
553 reachable[node] = 1
581 if stop:
554 if stop:
582 stopn = self.rev(stop)
555 stopn = self.rev(stop)
583 else:
556 else:
584 stopn = 0
557 stopn = 0
585 while visit:
558 while visit:
586 n = visit.pop(0)
559 n = visit.pop(0)
587 if n == stop:
560 if n == stop:
588 continue
561 continue
589 if n == nullid:
562 if n == nullid:
590 continue
563 continue
591 for p in self.parents(n):
564 for p in self.parents(n):
592 if self.rev(p) < stopn:
565 if self.rev(p) < stopn:
593 continue
566 continue
594 if p not in reachable:
567 if p not in reachable:
595 reachable[p] = 1
568 reachable[p] = 1
596 visit.append(p)
569 visit.append(p)
597 return reachable
570 return reachable
598
571
599 def ancestors(self, *revs):
572 def ancestors(self, *revs):
600 'Generate the ancestors of revs using a breadth-first visit'
573 'Generate the ancestors of revs using a breadth-first visit'
601 visit = list(revs)
574 visit = list(revs)
602 seen = util.set([nullrev])
575 seen = util.set([nullrev])
603 while visit:
576 while visit:
604 for parent in self.parentrevs(visit.pop(0)):
577 for parent in self.parentrevs(visit.pop(0)):
605 if parent not in seen:
578 if parent not in seen:
606 visit.append(parent)
579 visit.append(parent)
607 seen.add(parent)
580 seen.add(parent)
608 yield parent
581 yield parent
609
582
610 def descendants(self, *revs):
583 def descendants(self, *revs):
611 'Generate the descendants of revs in topological order'
584 'Generate the descendants of revs in topological order'
612 seen = util.set(revs)
585 seen = util.set(revs)
613 for i in xrange(min(revs) + 1, len(self)):
586 for i in xrange(min(revs) + 1, len(self)):
614 for x in self.parentrevs(i):
587 for x in self.parentrevs(i):
615 if x != nullrev and x in seen:
588 if x != nullrev and x in seen:
616 seen.add(i)
589 seen.add(i)
617 yield i
590 yield i
618 break
591 break
619
592
620 def nodesbetween(self, roots=None, heads=None):
593 def nodesbetween(self, roots=None, heads=None):
621 """Return a tuple containing three elements. Elements 1 and 2 contain
594 """Return a tuple containing three elements. Elements 1 and 2 contain
622 a final list bases and heads after all the unreachable ones have been
595 a final list bases and heads after all the unreachable ones have been
623 pruned. Element 0 contains a topologically sorted list of all
596 pruned. Element 0 contains a topologically sorted list of all
624
597
625 nodes that satisfy these constraints:
598 nodes that satisfy these constraints:
626 1. All nodes must be descended from a node in roots (the nodes on
599 1. All nodes must be descended from a node in roots (the nodes on
627 roots are considered descended from themselves).
600 roots are considered descended from themselves).
628 2. All nodes must also be ancestors of a node in heads (the nodes in
601 2. All nodes must also be ancestors of a node in heads (the nodes in
629 heads are considered to be their own ancestors).
602 heads are considered to be their own ancestors).
630
603
631 If roots is unspecified, nullid is assumed as the only root.
604 If roots is unspecified, nullid is assumed as the only root.
632 If heads is unspecified, it is taken to be the output of the
605 If heads is unspecified, it is taken to be the output of the
633 heads method (i.e. a list of all nodes in the repository that
606 heads method (i.e. a list of all nodes in the repository that
634 have no children)."""
607 have no children)."""
635 nonodes = ([], [], [])
608 nonodes = ([], [], [])
636 if roots is not None:
609 if roots is not None:
637 roots = list(roots)
610 roots = list(roots)
638 if not roots:
611 if not roots:
639 return nonodes
612 return nonodes
640 lowestrev = min([self.rev(n) for n in roots])
613 lowestrev = min([self.rev(n) for n in roots])
641 else:
614 else:
642 roots = [nullid] # Everybody's a descendent of nullid
615 roots = [nullid] # Everybody's a descendent of nullid
643 lowestrev = nullrev
616 lowestrev = nullrev
644 if (lowestrev == nullrev) and (heads is None):
617 if (lowestrev == nullrev) and (heads is None):
645 # We want _all_ the nodes!
618 # We want _all_ the nodes!
646 return ([self.node(r) for r in self], [nullid], list(self.heads()))
619 return ([self.node(r) for r in self], [nullid], list(self.heads()))
647 if heads is None:
620 if heads is None:
648 # All nodes are ancestors, so the latest ancestor is the last
621 # All nodes are ancestors, so the latest ancestor is the last
649 # node.
622 # node.
650 highestrev = len(self) - 1
623 highestrev = len(self) - 1
651 # Set ancestors to None to signal that every node is an ancestor.
624 # Set ancestors to None to signal that every node is an ancestor.
652 ancestors = None
625 ancestors = None
653 # Set heads to an empty dictionary for later discovery of heads
626 # Set heads to an empty dictionary for later discovery of heads
654 heads = {}
627 heads = {}
655 else:
628 else:
656 heads = list(heads)
629 heads = list(heads)
657 if not heads:
630 if not heads:
658 return nonodes
631 return nonodes
659 ancestors = {}
632 ancestors = {}
660 # Turn heads into a dictionary so we can remove 'fake' heads.
633 # Turn heads into a dictionary so we can remove 'fake' heads.
661 # Also, later we will be using it to filter out the heads we can't
634 # Also, later we will be using it to filter out the heads we can't
662 # find from roots.
635 # find from roots.
663 heads = dict.fromkeys(heads, 0)
636 heads = dict.fromkeys(heads, 0)
664 # Start at the top and keep marking parents until we're done.
637 # Start at the top and keep marking parents until we're done.
665 nodestotag = heads.keys()
638 nodestotag = heads.keys()
666 # Remember where the top was so we can use it as a limit later.
639 # Remember where the top was so we can use it as a limit later.
667 highestrev = max([self.rev(n) for n in nodestotag])
640 highestrev = max([self.rev(n) for n in nodestotag])
668 while nodestotag:
641 while nodestotag:
669 # grab a node to tag
642 # grab a node to tag
670 n = nodestotag.pop()
643 n = nodestotag.pop()
671 # Never tag nullid
644 # Never tag nullid
672 if n == nullid:
645 if n == nullid:
673 continue
646 continue
674 # A node's revision number represents its place in a
647 # A node's revision number represents its place in a
675 # topologically sorted list of nodes.
648 # topologically sorted list of nodes.
676 r = self.rev(n)
649 r = self.rev(n)
677 if r >= lowestrev:
650 if r >= lowestrev:
678 if n not in ancestors:
651 if n not in ancestors:
679 # If we are possibly a descendent of one of the roots
652 # If we are possibly a descendent of one of the roots
680 # and we haven't already been marked as an ancestor
653 # and we haven't already been marked as an ancestor
681 ancestors[n] = 1 # Mark as ancestor
654 ancestors[n] = 1 # Mark as ancestor
682 # Add non-nullid parents to list of nodes to tag.
655 # Add non-nullid parents to list of nodes to tag.
683 nodestotag.extend([p for p in self.parents(n) if
656 nodestotag.extend([p for p in self.parents(n) if
684 p != nullid])
657 p != nullid])
685 elif n in heads: # We've seen it before, is it a fake head?
658 elif n in heads: # We've seen it before, is it a fake head?
686 # So it is, real heads should not be the ancestors of
659 # So it is, real heads should not be the ancestors of
687 # any other heads.
660 # any other heads.
688 heads.pop(n)
661 heads.pop(n)
689 if not ancestors:
662 if not ancestors:
690 return nonodes
663 return nonodes
691 # Now that we have our set of ancestors, we want to remove any
664 # Now that we have our set of ancestors, we want to remove any
692 # roots that are not ancestors.
665 # roots that are not ancestors.
693
666
694 # If one of the roots was nullid, everything is included anyway.
667 # If one of the roots was nullid, everything is included anyway.
695 if lowestrev > nullrev:
668 if lowestrev > nullrev:
696 # But, since we weren't, let's recompute the lowest rev to not
669 # But, since we weren't, let's recompute the lowest rev to not
697 # include roots that aren't ancestors.
670 # include roots that aren't ancestors.
698
671
699 # Filter out roots that aren't ancestors of heads
672 # Filter out roots that aren't ancestors of heads
700 roots = [n for n in roots if n in ancestors]
673 roots = [n for n in roots if n in ancestors]
701 # Recompute the lowest revision
674 # Recompute the lowest revision
702 if roots:
675 if roots:
703 lowestrev = min([self.rev(n) for n in roots])
676 lowestrev = min([self.rev(n) for n in roots])
704 else:
677 else:
705 # No more roots? Return empty list
678 # No more roots? Return empty list
706 return nonodes
679 return nonodes
707 else:
680 else:
708 # We are descending from nullid, and don't need to care about
681 # We are descending from nullid, and don't need to care about
709 # any other roots.
682 # any other roots.
710 lowestrev = nullrev
683 lowestrev = nullrev
711 roots = [nullid]
684 roots = [nullid]
712 # Transform our roots list into a 'set' (i.e. a dictionary where the
685 # Transform our roots list into a 'set' (i.e. a dictionary where the
713 # values don't matter.
686 # values don't matter.
714 descendents = dict.fromkeys(roots, 1)
687 descendents = dict.fromkeys(roots, 1)
715 # Also, keep the original roots so we can filter out roots that aren't
688 # Also, keep the original roots so we can filter out roots that aren't
716 # 'real' roots (i.e. are descended from other roots).
689 # 'real' roots (i.e. are descended from other roots).
717 roots = descendents.copy()
690 roots = descendents.copy()
718 # Our topologically sorted list of output nodes.
691 # Our topologically sorted list of output nodes.
719 orderedout = []
692 orderedout = []
720 # Don't start at nullid since we don't want nullid in our output list,
693 # Don't start at nullid since we don't want nullid in our output list,
721 # and if nullid shows up in descedents, empty parents will look like
694 # and if nullid shows up in descedents, empty parents will look like
722 # they're descendents.
695 # they're descendents.
723 for r in xrange(max(lowestrev, 0), highestrev + 1):
696 for r in xrange(max(lowestrev, 0), highestrev + 1):
724 n = self.node(r)
697 n = self.node(r)
725 isdescendent = False
698 isdescendent = False
726 if lowestrev == nullrev: # Everybody is a descendent of nullid
699 if lowestrev == nullrev: # Everybody is a descendent of nullid
727 isdescendent = True
700 isdescendent = True
728 elif n in descendents:
701 elif n in descendents:
729 # n is already a descendent
702 # n is already a descendent
730 isdescendent = True
703 isdescendent = True
731 # This check only needs to be done here because all the roots
704 # This check only needs to be done here because all the roots
732 # will start being marked is descendents before the loop.
705 # will start being marked is descendents before the loop.
733 if n in roots:
706 if n in roots:
734 # If n was a root, check if it's a 'real' root.
707 # If n was a root, check if it's a 'real' root.
735 p = tuple(self.parents(n))
708 p = tuple(self.parents(n))
736 # If any of its parents are descendents, it's not a root.
709 # If any of its parents are descendents, it's not a root.
737 if (p[0] in descendents) or (p[1] in descendents):
710 if (p[0] in descendents) or (p[1] in descendents):
738 roots.pop(n)
711 roots.pop(n)
739 else:
712 else:
740 p = tuple(self.parents(n))
713 p = tuple(self.parents(n))
741 # A node is a descendent if either of its parents are
714 # A node is a descendent if either of its parents are
742 # descendents. (We seeded the dependents list with the roots
715 # descendents. (We seeded the dependents list with the roots
743 # up there, remember?)
716 # up there, remember?)
744 if (p[0] in descendents) or (p[1] in descendents):
717 if (p[0] in descendents) or (p[1] in descendents):
745 descendents[n] = 1
718 descendents[n] = 1
746 isdescendent = True
719 isdescendent = True
747 if isdescendent and ((ancestors is None) or (n in ancestors)):
720 if isdescendent and ((ancestors is None) or (n in ancestors)):
748 # Only include nodes that are both descendents and ancestors.
721 # Only include nodes that are both descendents and ancestors.
749 orderedout.append(n)
722 orderedout.append(n)
750 if (ancestors is not None) and (n in heads):
723 if (ancestors is not None) and (n in heads):
751 # We're trying to figure out which heads are reachable
724 # We're trying to figure out which heads are reachable
752 # from roots.
725 # from roots.
753 # Mark this head as having been reached
726 # Mark this head as having been reached
754 heads[n] = 1
727 heads[n] = 1
755 elif ancestors is None:
728 elif ancestors is None:
756 # Otherwise, we're trying to discover the heads.
729 # Otherwise, we're trying to discover the heads.
757 # Assume this is a head because if it isn't, the next step
730 # Assume this is a head because if it isn't, the next step
758 # will eventually remove it.
731 # will eventually remove it.
759 heads[n] = 1
732 heads[n] = 1
760 # But, obviously its parents aren't.
733 # But, obviously its parents aren't.
761 for p in self.parents(n):
734 for p in self.parents(n):
762 heads.pop(p, None)
735 heads.pop(p, None)
763 heads = [n for n in heads.iterkeys() if heads[n] != 0]
736 heads = [n for n in heads.iterkeys() if heads[n] != 0]
764 roots = roots.keys()
737 roots = roots.keys()
765 assert orderedout
738 assert orderedout
766 assert roots
739 assert roots
767 assert heads
740 assert heads
768 return (orderedout, roots, heads)
741 return (orderedout, roots, heads)
769
742
770 def heads(self, start=None, stop=None):
743 def heads(self, start=None, stop=None):
771 """return the list of all nodes that have no children
744 """return the list of all nodes that have no children
772
745
773 if start is specified, only heads that are descendants of
746 if start is specified, only heads that are descendants of
774 start will be returned
747 start will be returned
775 if stop is specified, it will consider all the revs from stop
748 if stop is specified, it will consider all the revs from stop
776 as if they had no children
749 as if they had no children
777 """
750 """
778 if start is None and stop is None:
751 if start is None and stop is None:
779 count = len(self)
752 count = len(self)
780 if not count:
753 if not count:
781 return [nullid]
754 return [nullid]
782 ishead = [1] * (count + 1)
755 ishead = [1] * (count + 1)
783 index = self.index
756 index = self.index
784 for r in xrange(count):
757 for r in xrange(count):
785 e = index[r]
758 e = index[r]
786 ishead[e[5]] = ishead[e[6]] = 0
759 ishead[e[5]] = ishead[e[6]] = 0
787 return [self.node(r) for r in xrange(count) if ishead[r]]
760 return [self.node(r) for r in xrange(count) if ishead[r]]
788
761
789 if start is None:
762 if start is None:
790 start = nullid
763 start = nullid
791 if stop is None:
764 if stop is None:
792 stop = []
765 stop = []
793 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
766 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
794 startrev = self.rev(start)
767 startrev = self.rev(start)
795 reachable = {startrev: 1}
768 reachable = {startrev: 1}
796 heads = {startrev: 1}
769 heads = {startrev: 1}
797
770
798 parentrevs = self.parentrevs
771 parentrevs = self.parentrevs
799 for r in xrange(startrev + 1, len(self)):
772 for r in xrange(startrev + 1, len(self)):
800 for p in parentrevs(r):
773 for p in parentrevs(r):
801 if p in reachable:
774 if p in reachable:
802 if r not in stoprevs:
775 if r not in stoprevs:
803 reachable[r] = 1
776 reachable[r] = 1
804 heads[r] = 1
777 heads[r] = 1
805 if p in heads and p not in stoprevs:
778 if p in heads and p not in stoprevs:
806 del heads[p]
779 del heads[p]
807
780
808 return [self.node(r) for r in heads]
781 return [self.node(r) for r in heads]
809
782
810 def children(self, node):
783 def children(self, node):
811 """find the children of a given node"""
784 """find the children of a given node"""
812 c = []
785 c = []
813 p = self.rev(node)
786 p = self.rev(node)
814 for r in range(p + 1, len(self)):
787 for r in range(p + 1, len(self)):
815 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
788 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
816 if prevs:
789 if prevs:
817 for pr in prevs:
790 for pr in prevs:
818 if pr == p:
791 if pr == p:
819 c.append(self.node(r))
792 c.append(self.node(r))
820 elif p == nullrev:
793 elif p == nullrev:
821 c.append(self.node(r))
794 c.append(self.node(r))
822 return c
795 return c
823
796
824 def _match(self, id):
797 def _match(self, id):
825 if isinstance(id, (long, int)):
798 if isinstance(id, (long, int)):
826 # rev
799 # rev
827 return self.node(id)
800 return self.node(id)
828 if len(id) == 20:
801 if len(id) == 20:
829 # possibly a binary node
802 # possibly a binary node
830 # odds of a binary node being all hex in ASCII are 1 in 10**25
803 # odds of a binary node being all hex in ASCII are 1 in 10**25
831 try:
804 try:
832 node = id
805 node = id
833 r = self.rev(node) # quick search the index
806 r = self.rev(node) # quick search the index
834 return node
807 return node
835 except LookupError:
808 except LookupError:
836 pass # may be partial hex id
809 pass # may be partial hex id
837 try:
810 try:
838 # str(rev)
811 # str(rev)
839 rev = int(id)
812 rev = int(id)
840 if str(rev) != id:
813 if str(rev) != id:
841 raise ValueError
814 raise ValueError
842 if rev < 0:
815 if rev < 0:
843 rev = len(self) + rev
816 rev = len(self) + rev
844 if rev < 0 or rev >= len(self):
817 if rev < 0 or rev >= len(self):
845 raise ValueError
818 raise ValueError
846 return self.node(rev)
819 return self.node(rev)
847 except (ValueError, OverflowError):
820 except (ValueError, OverflowError):
848 pass
821 pass
849 if len(id) == 40:
822 if len(id) == 40:
850 try:
823 try:
851 # a full hex nodeid?
824 # a full hex nodeid?
852 node = bin(id)
825 node = bin(id)
853 r = self.rev(node)
826 r = self.rev(node)
854 return node
827 return node
855 except (TypeError, LookupError):
828 except (TypeError, LookupError):
856 pass
829 pass
857
830
858 def _partialmatch(self, id):
831 def _partialmatch(self, id):
859 if len(id) < 40:
832 if len(id) < 40:
860 try:
833 try:
861 # hex(node)[:...]
834 # hex(node)[:...]
862 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
835 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
863 node = None
836 node = None
864 for n in self.nodemap:
837 for n in self.nodemap:
865 if n.startswith(bin_id) and hex(n).startswith(id):
838 if n.startswith(bin_id) and hex(n).startswith(id):
866 if node is not None:
839 if node is not None:
867 raise LookupError(id, self.indexfile,
840 raise LookupError(id, self.indexfile,
868 _('ambiguous identifier'))
841 _('ambiguous identifier'))
869 node = n
842 node = n
870 if node is not None:
843 if node is not None:
871 return node
844 return node
872 except TypeError:
845 except TypeError:
873 pass
846 pass
874
847
875 def lookup(self, id):
848 def lookup(self, id):
876 """locate a node based on:
849 """locate a node based on:
877 - revision number or str(revision number)
850 - revision number or str(revision number)
878 - nodeid or subset of hex nodeid
851 - nodeid or subset of hex nodeid
879 """
852 """
880 n = self._match(id)
853 n = self._match(id)
881 if n is not None:
854 if n is not None:
882 return n
855 return n
883 n = self._partialmatch(id)
856 n = self._partialmatch(id)
884 if n:
857 if n:
885 return n
858 return n
886
859
887 raise LookupError(id, self.indexfile, _('no match found'))
860 raise LookupError(id, self.indexfile, _('no match found'))
888
861
889 def cmp(self, node, text):
862 def cmp(self, node, text):
890 """compare text with a given file revision"""
863 """compare text with a given file revision"""
891 p1, p2 = self.parents(node)
864 p1, p2 = self.parents(node)
892 return hash(text, p1, p2) != node
865 return hash(text, p1, p2) != node
893
866
894 def chunk(self, rev, df=None):
867 def chunk(self, rev, df=None):
895 def loadcache(df):
868 def loadcache(df):
896 if not df:
869 if not df:
897 if self._inline:
870 if self._inline:
898 df = self.opener(self.indexfile)
871 df = self.opener(self.indexfile)
899 else:
872 else:
900 df = self.opener(self.datafile)
873 df = self.opener(self.datafile)
901 df.seek(start)
874 df.seek(start)
902 self._chunkcache = (start, df.read(cache_length))
875 self._chunkcache = (start, df.read(cache_length))
903
876
904 start, length = self.start(rev), self.length(rev)
877 start, length = self.start(rev), self.length(rev)
905 if self._inline:
878 if self._inline:
906 start += (rev + 1) * self._io.size
879 start += (rev + 1) * self._io.size
907 end = start + length
880 end = start + length
908
881
909 offset = 0
882 offset = 0
910 if not self._chunkcache:
883 if not self._chunkcache:
911 cache_length = max(65536, length)
884 cache_length = max(65536, length)
912 loadcache(df)
885 loadcache(df)
913 else:
886 else:
914 cache_start = self._chunkcache[0]
887 cache_start = self._chunkcache[0]
915 cache_length = len(self._chunkcache[1])
888 cache_length = len(self._chunkcache[1])
916 cache_end = cache_start + cache_length
889 cache_end = cache_start + cache_length
917 if start >= cache_start and end <= cache_end:
890 if start >= cache_start and end <= cache_end:
918 # it is cached
891 # it is cached
919 offset = start - cache_start
892 offset = start - cache_start
920 else:
893 else:
921 cache_length = max(65536, length)
894 cache_length = max(65536, length)
922 loadcache(df)
895 loadcache(df)
923
896
924 # avoid copying large chunks
897 # avoid copying large chunks
925 c = self._chunkcache[1]
898 c = self._chunkcache[1]
926 if cache_length != length:
899 if cache_length != length:
927 c = c[offset:offset + length]
900 c = c[offset:offset + length]
928
901
929 return decompress(c)
902 return decompress(c)
930
903
931 def delta(self, node):
904 def delta(self, node):
932 """return or calculate a delta between a node and its predecessor"""
905 """return or calculate a delta between a node and its predecessor"""
933 r = self.rev(node)
906 r = self.rev(node)
934 return self.revdiff(r - 1, r)
907 return self.revdiff(r - 1, r)
935
908
936 def revdiff(self, rev1, rev2):
909 def revdiff(self, rev1, rev2):
937 """return or calculate a delta between two revisions"""
910 """return or calculate a delta between two revisions"""
938 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
911 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
939 return self.chunk(rev2)
912 return self.chunk(rev2)
940
913
941 return mdiff.textdiff(self.revision(self.node(rev1)),
914 return mdiff.textdiff(self.revision(self.node(rev1)),
942 self.revision(self.node(rev2)))
915 self.revision(self.node(rev2)))
943
916
944 def revision(self, node):
917 def revision(self, node):
945 """return an uncompressed revision of a given node"""
918 """return an uncompressed revision of a given node"""
946 if node == nullid:
919 if node == nullid:
947 return ""
920 return ""
948 if self._cache and self._cache[0] == node:
921 if self._cache and self._cache[0] == node:
949 return str(self._cache[2])
922 return str(self._cache[2])
950
923
951 # look up what we need to read
924 # look up what we need to read
952 text = None
925 text = None
953 rev = self.rev(node)
926 rev = self.rev(node)
954 base = self.base(rev)
927 base = self.base(rev)
955
928
956 # check rev flags
929 # check rev flags
957 if self.index[rev][0] & 0xFFFF:
930 if self.index[rev][0] & 0xFFFF:
958 raise RevlogError(_('incompatible revision flag %x') %
931 raise RevlogError(_('incompatible revision flag %x') %
959 (self.index[rev][0] & 0xFFFF))
932 (self.index[rev][0] & 0xFFFF))
960
933
961 df = None
934 df = None
962
935
963 # do we have useful data cached?
936 # do we have useful data cached?
964 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
937 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
965 base = self._cache[1]
938 base = self._cache[1]
966 text = str(self._cache[2])
939 text = str(self._cache[2])
967 self._loadindex(base, rev + 1)
940 self._loadindex(base, rev + 1)
968 if not self._inline and rev > base + 1:
941 if not self._inline and rev > base + 1:
969 df = self.opener(self.datafile)
942 df = self.opener(self.datafile)
970 else:
943 else:
971 self._loadindex(base, rev + 1)
944 self._loadindex(base, rev + 1)
972 if not self._inline and rev > base:
945 if not self._inline and rev > base:
973 df = self.opener(self.datafile)
946 df = self.opener(self.datafile)
974 text = self.chunk(base, df=df)
947 text = self.chunk(base, df=df)
975
948
976 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
949 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
977 text = mdiff.patches(text, bins)
950 text = mdiff.patches(text, bins)
978 p1, p2 = self.parents(node)
951 p1, p2 = self.parents(node)
979 if node != hash(text, p1, p2):
952 if node != hash(text, p1, p2):
980 raise RevlogError(_("integrity check failed on %s:%d")
953 raise RevlogError(_("integrity check failed on %s:%d")
981 % (self.datafile, rev))
954 % (self.datafile, rev))
982
955
983 self._cache = (node, rev, text)
956 self._cache = (node, rev, text)
984 return text
957 return text
985
958
986 def checkinlinesize(self, tr, fp=None):
959 def checkinlinesize(self, tr, fp=None):
987 if not self._inline:
960 if not self._inline:
988 return
961 return
989 if not fp:
962 if not fp:
990 fp = self.opener(self.indexfile, 'r')
963 fp = self.opener(self.indexfile, 'r')
991 fp.seek(0, 2)
964 fp.seek(0, 2)
992 size = fp.tell()
965 size = fp.tell()
993 if size < 131072:
966 if size < 131072:
994 return
967 return
995 trinfo = tr.find(self.indexfile)
968 trinfo = tr.find(self.indexfile)
996 if trinfo == None:
969 if trinfo == None:
997 raise RevlogError(_("%s not found in the transaction")
970 raise RevlogError(_("%s not found in the transaction")
998 % self.indexfile)
971 % self.indexfile)
999
972
1000 trindex = trinfo[2]
973 trindex = trinfo[2]
1001 dataoff = self.start(trindex)
974 dataoff = self.start(trindex)
1002
975
1003 tr.add(self.datafile, dataoff)
976 tr.add(self.datafile, dataoff)
1004 df = self.opener(self.datafile, 'w')
977 df = self.opener(self.datafile, 'w')
1005 try:
978 try:
1006 calc = self._io.size
979 calc = self._io.size
1007 for r in self:
980 for r in self:
1008 start = self.start(r) + (r + 1) * calc
981 start = self.start(r) + (r + 1) * calc
1009 length = self.length(r)
982 length = self.length(r)
1010 fp.seek(start)
983 fp.seek(start)
1011 d = fp.read(length)
984 d = fp.read(length)
1012 df.write(d)
985 df.write(d)
1013 finally:
986 finally:
1014 df.close()
987 df.close()
1015
988
1016 fp.close()
989 fp.close()
1017 fp = self.opener(self.indexfile, 'w', atomictemp=True)
990 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1018 self.version &= ~(REVLOGNGINLINEDATA)
991 self.version &= ~(REVLOGNGINLINEDATA)
1019 self._inline = False
992 self._inline = False
1020 for i in self:
993 for i in self:
1021 e = self._io.packentry(self.index[i], self.node, self.version, i)
994 e = self._io.packentry(self.index[i], self.node, self.version, i)
1022 fp.write(e)
995 fp.write(e)
1023
996
1024 # if we don't call rename, the temp file will never replace the
997 # if we don't call rename, the temp file will never replace the
1025 # real index
998 # real index
1026 fp.rename()
999 fp.rename()
1027
1000
1028 tr.replace(self.indexfile, trindex * calc)
1001 tr.replace(self.indexfile, trindex * calc)
1029 self._chunkcache = None
1002 self._chunkcache = None
1030
1003
1031 def addrevision(self, text, transaction, link, p1, p2, d=None):
1004 def addrevision(self, text, transaction, link, p1, p2, d=None):
1032 """add a revision to the log
1005 """add a revision to the log
1033
1006
1034 text - the revision data to add
1007 text - the revision data to add
1035 transaction - the transaction object used for rollback
1008 transaction - the transaction object used for rollback
1036 link - the linkrev data to add
1009 link - the linkrev data to add
1037 p1, p2 - the parent nodeids of the revision
1010 p1, p2 - the parent nodeids of the revision
1038 d - an optional precomputed delta
1011 d - an optional precomputed delta
1039 """
1012 """
1040 dfh = None
1013 dfh = None
1041 if not self._inline:
1014 if not self._inline:
1042 dfh = self.opener(self.datafile, "a")
1015 dfh = self.opener(self.datafile, "a")
1043 ifh = self.opener(self.indexfile, "a+")
1016 ifh = self.opener(self.indexfile, "a+")
1044 try:
1017 try:
1045 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1018 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1046 finally:
1019 finally:
1047 if dfh:
1020 if dfh:
1048 dfh.close()
1021 dfh.close()
1049 ifh.close()
1022 ifh.close()
1050
1023
1051 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1024 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1052 node = hash(text, p1, p2)
1025 node = hash(text, p1, p2)
1053 if node in self.nodemap:
1026 if node in self.nodemap:
1054 return node
1027 return node
1055
1028
1056 curr = len(self)
1029 curr = len(self)
1057 prev = curr - 1
1030 prev = curr - 1
1058 base = self.base(prev)
1031 base = self.base(prev)
1059 offset = self.end(prev)
1032 offset = self.end(prev)
1060
1033
1061 if curr:
1034 if curr:
1062 if not d:
1035 if not d:
1063 ptext = self.revision(self.node(prev))
1036 ptext = self.revision(self.node(prev))
1064 d = mdiff.textdiff(ptext, text)
1037 d = mdiff.textdiff(ptext, text)
1065 data = compress(d)
1038 data = compress(d)
1066 l = len(data[1]) + len(data[0])
1039 l = len(data[1]) + len(data[0])
1067 dist = l + offset - self.start(base)
1040 dist = l + offset - self.start(base)
1068
1041
1069 # full versions are inserted when the needed deltas
1042 # full versions are inserted when the needed deltas
1070 # become comparable to the uncompressed text
1043 # become comparable to the uncompressed text
1071 if not curr or dist > len(text) * 2:
1044 if not curr or dist > len(text) * 2:
1072 data = compress(text)
1045 data = compress(text)
1073 l = len(data[1]) + len(data[0])
1046 l = len(data[1]) + len(data[0])
1074 base = curr
1047 base = curr
1075
1048
1076 e = (offset_type(offset, 0), l, len(text),
1049 e = (offset_type(offset, 0), l, len(text),
1077 base, link, self.rev(p1), self.rev(p2), node)
1050 base, link, self.rev(p1), self.rev(p2), node)
1078 self.index.insert(-1, e)
1051 self.index.insert(-1, e)
1079 self.nodemap[node] = curr
1052 self.nodemap[node] = curr
1080
1053
1081 entry = self._io.packentry(e, self.node, self.version, curr)
1054 entry = self._io.packentry(e, self.node, self.version, curr)
1082 if not self._inline:
1055 if not self._inline:
1083 transaction.add(self.datafile, offset)
1056 transaction.add(self.datafile, offset)
1084 transaction.add(self.indexfile, curr * len(entry))
1057 transaction.add(self.indexfile, curr * len(entry))
1085 if data[0]:
1058 if data[0]:
1086 dfh.write(data[0])
1059 dfh.write(data[0])
1087 dfh.write(data[1])
1060 dfh.write(data[1])
1088 dfh.flush()
1061 dfh.flush()
1089 ifh.write(entry)
1062 ifh.write(entry)
1090 else:
1063 else:
1091 offset += curr * self._io.size
1064 offset += curr * self._io.size
1092 transaction.add(self.indexfile, offset, curr)
1065 transaction.add(self.indexfile, offset, curr)
1093 ifh.write(entry)
1066 ifh.write(entry)
1094 ifh.write(data[0])
1067 ifh.write(data[0])
1095 ifh.write(data[1])
1068 ifh.write(data[1])
1096 self.checkinlinesize(transaction, ifh)
1069 self.checkinlinesize(transaction, ifh)
1097
1070
1098 self._cache = (node, curr, text)
1071 self._cache = (node, curr, text)
1099 return node
1072 return node
1100
1073
1101 def ancestor(self, a, b):
1074 def ancestor(self, a, b):
1102 """calculate the least common ancestor of nodes a and b"""
1075 """calculate the least common ancestor of nodes a and b"""
1103
1076
1104 def parents(rev):
1077 def parents(rev):
1105 return [p for p in self.parentrevs(rev) if p != nullrev]
1078 return [p for p in self.parentrevs(rev) if p != nullrev]
1106
1079
1107 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1080 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1108 if c is None:
1081 if c is None:
1109 return nullid
1082 return nullid
1110
1083
1111 return self.node(c)
1084 return self.node(c)
1112
1085
1113 def group(self, nodelist, lookup, infocollect=None):
1086 def group(self, nodelist, lookup, infocollect=None):
1114 """calculate a delta group
1087 """calculate a delta group
1115
1088
1116 Given a list of changeset revs, return a set of deltas and
1089 Given a list of changeset revs, return a set of deltas and
1117 metadata corresponding to nodes. the first delta is
1090 metadata corresponding to nodes. the first delta is
1118 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1091 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1119 have this parent as it has all history before these
1092 have this parent as it has all history before these
1120 changesets. parent is parent[0]
1093 changesets. parent is parent[0]
1121 """
1094 """
1122 revs = [self.rev(n) for n in nodelist]
1095 revs = [self.rev(n) for n in nodelist]
1123
1096
1124 # if we don't have any revisions touched by these changesets, bail
1097 # if we don't have any revisions touched by these changesets, bail
1125 if not revs:
1098 if not revs:
1126 yield changegroup.closechunk()
1099 yield changegroup.closechunk()
1127 return
1100 return
1128
1101
1129 # add the parent of the first rev
1102 # add the parent of the first rev
1130 p = self.parents(self.node(revs[0]))[0]
1103 p = self.parents(self.node(revs[0]))[0]
1131 revs.insert(0, self.rev(p))
1104 revs.insert(0, self.rev(p))
1132
1105
1133 # build deltas
1106 # build deltas
1134 for d in xrange(0, len(revs) - 1):
1107 for d in xrange(0, len(revs) - 1):
1135 a, b = revs[d], revs[d + 1]
1108 a, b = revs[d], revs[d + 1]
1136 nb = self.node(b)
1109 nb = self.node(b)
1137
1110
1138 if infocollect is not None:
1111 if infocollect is not None:
1139 infocollect(nb)
1112 infocollect(nb)
1140
1113
1141 p = self.parents(nb)
1114 p = self.parents(nb)
1142 meta = nb + p[0] + p[1] + lookup(nb)
1115 meta = nb + p[0] + p[1] + lookup(nb)
1143 if a == -1:
1116 if a == -1:
1144 d = self.revision(nb)
1117 d = self.revision(nb)
1145 meta += mdiff.trivialdiffheader(len(d))
1118 meta += mdiff.trivialdiffheader(len(d))
1146 else:
1119 else:
1147 d = self.revdiff(a, b)
1120 d = self.revdiff(a, b)
1148 yield changegroup.chunkheader(len(meta) + len(d))
1121 yield changegroup.chunkheader(len(meta) + len(d))
1149 yield meta
1122 yield meta
1150 if len(d) > 2**20:
1123 if len(d) > 2**20:
1151 pos = 0
1124 pos = 0
1152 while pos < len(d):
1125 while pos < len(d):
1153 pos2 = pos + 2 ** 18
1126 pos2 = pos + 2 ** 18
1154 yield d[pos:pos2]
1127 yield d[pos:pos2]
1155 pos = pos2
1128 pos = pos2
1156 else:
1129 else:
1157 yield d
1130 yield d
1158
1131
1159 yield changegroup.closechunk()
1132 yield changegroup.closechunk()
1160
1133
1161 def addgroup(self, revs, linkmapper, transaction):
1134 def addgroup(self, revs, linkmapper, transaction):
1162 """
1135 """
1163 add a delta group
1136 add a delta group
1164
1137
1165 given a set of deltas, add them to the revision log. the
1138 given a set of deltas, add them to the revision log. the
1166 first delta is against its parent, which should be in our
1139 first delta is against its parent, which should be in our
1167 log, the rest are against the previous delta.
1140 log, the rest are against the previous delta.
1168 """
1141 """
1169
1142
1170 #track the base of the current delta log
1143 #track the base of the current delta log
1171 r = len(self)
1144 r = len(self)
1172 t = r - 1
1145 t = r - 1
1173 node = None
1146 node = None
1174
1147
1175 base = prev = nullrev
1148 base = prev = nullrev
1176 start = end = textlen = 0
1149 start = end = textlen = 0
1177 if r:
1150 if r:
1178 end = self.end(t)
1151 end = self.end(t)
1179
1152
1180 ifh = self.opener(self.indexfile, "a+")
1153 ifh = self.opener(self.indexfile, "a+")
1181 isize = r * self._io.size
1154 isize = r * self._io.size
1182 if self._inline:
1155 if self._inline:
1183 transaction.add(self.indexfile, end + isize, r)
1156 transaction.add(self.indexfile, end + isize, r)
1184 dfh = None
1157 dfh = None
1185 else:
1158 else:
1186 transaction.add(self.indexfile, isize, r)
1159 transaction.add(self.indexfile, isize, r)
1187 transaction.add(self.datafile, end)
1160 transaction.add(self.datafile, end)
1188 dfh = self.opener(self.datafile, "a")
1161 dfh = self.opener(self.datafile, "a")
1189
1162
1190 try:
1163 try:
1191 # loop through our set of deltas
1164 # loop through our set of deltas
1192 chain = None
1165 chain = None
1193 for chunk in revs:
1166 for chunk in revs:
1194 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1167 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1195 link = linkmapper(cs)
1168 link = linkmapper(cs)
1196 if node in self.nodemap:
1169 if node in self.nodemap:
1197 # this can happen if two branches make the same change
1170 # this can happen if two branches make the same change
1198 chain = node
1171 chain = node
1199 continue
1172 continue
1200 delta = buffer(chunk, 80)
1173 delta = buffer(chunk, 80)
1201 del chunk
1174 del chunk
1202
1175
1203 for p in (p1, p2):
1176 for p in (p1, p2):
1204 if not p in self.nodemap:
1177 if not p in self.nodemap:
1205 raise LookupError(p, self.indexfile, _('unknown parent'))
1178 raise LookupError(p, self.indexfile, _('unknown parent'))
1206
1179
1207 if not chain:
1180 if not chain:
1208 # retrieve the parent revision of the delta chain
1181 # retrieve the parent revision of the delta chain
1209 chain = p1
1182 chain = p1
1210 if not chain in self.nodemap:
1183 if not chain in self.nodemap:
1211 raise LookupError(chain, self.indexfile, _('unknown base'))
1184 raise LookupError(chain, self.indexfile, _('unknown base'))
1212
1185
1213 # full versions are inserted when the needed deltas become
1186 # full versions are inserted when the needed deltas become
1214 # comparable to the uncompressed text or when the previous
1187 # comparable to the uncompressed text or when the previous
1215 # version is not the one we have a delta against. We use
1188 # version is not the one we have a delta against. We use
1216 # the size of the previous full rev as a proxy for the
1189 # the size of the previous full rev as a proxy for the
1217 # current size.
1190 # current size.
1218
1191
1219 if chain == prev:
1192 if chain == prev:
1220 cdelta = compress(delta)
1193 cdelta = compress(delta)
1221 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1194 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1222 textlen = mdiff.patchedsize(textlen, delta)
1195 textlen = mdiff.patchedsize(textlen, delta)
1223
1196
1224 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1197 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1225 # flush our writes here so we can read it in revision
1198 # flush our writes here so we can read it in revision
1226 if dfh:
1199 if dfh:
1227 dfh.flush()
1200 dfh.flush()
1228 ifh.flush()
1201 ifh.flush()
1229 text = self.revision(chain)
1202 text = self.revision(chain)
1230 if len(text) == 0:
1203 if len(text) == 0:
1231 # skip over trivial delta header
1204 # skip over trivial delta header
1232 text = buffer(delta, 12)
1205 text = buffer(delta, 12)
1233 else:
1206 else:
1234 text = mdiff.patches(text, [delta])
1207 text = mdiff.patches(text, [delta])
1235 del delta
1208 del delta
1236 chk = self._addrevision(text, transaction, link, p1, p2, None,
1209 chk = self._addrevision(text, transaction, link, p1, p2, None,
1237 ifh, dfh)
1210 ifh, dfh)
1238 if not dfh and not self._inline:
1211 if not dfh and not self._inline:
1239 # addrevision switched from inline to conventional
1212 # addrevision switched from inline to conventional
1240 # reopen the index
1213 # reopen the index
1241 dfh = self.opener(self.datafile, "a")
1214 dfh = self.opener(self.datafile, "a")
1242 ifh = self.opener(self.indexfile, "a")
1215 ifh = self.opener(self.indexfile, "a")
1243 if chk != node:
1216 if chk != node:
1244 raise RevlogError(_("consistency error adding group"))
1217 raise RevlogError(_("consistency error adding group"))
1245 textlen = len(text)
1218 textlen = len(text)
1246 else:
1219 else:
1247 e = (offset_type(end, 0), cdeltalen, textlen, base,
1220 e = (offset_type(end, 0), cdeltalen, textlen, base,
1248 link, self.rev(p1), self.rev(p2), node)
1221 link, self.rev(p1), self.rev(p2), node)
1249 self.index.insert(-1, e)
1222 self.index.insert(-1, e)
1250 self.nodemap[node] = r
1223 self.nodemap[node] = r
1251 entry = self._io.packentry(e, self.node, self.version, r)
1224 entry = self._io.packentry(e, self.node, self.version, r)
1252 if self._inline:
1225 if self._inline:
1253 ifh.write(entry)
1226 ifh.write(entry)
1254 ifh.write(cdelta[0])
1227 ifh.write(cdelta[0])
1255 ifh.write(cdelta[1])
1228 ifh.write(cdelta[1])
1256 self.checkinlinesize(transaction, ifh)
1229 self.checkinlinesize(transaction, ifh)
1257 if not self._inline:
1230 if not self._inline:
1258 dfh = self.opener(self.datafile, "a")
1231 dfh = self.opener(self.datafile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1232 ifh = self.opener(self.indexfile, "a")
1260 else:
1233 else:
1261 dfh.write(cdelta[0])
1234 dfh.write(cdelta[0])
1262 dfh.write(cdelta[1])
1235 dfh.write(cdelta[1])
1263 ifh.write(entry)
1236 ifh.write(entry)
1264
1237
1265 t, r, chain, prev = r, r + 1, node, node
1238 t, r, chain, prev = r, r + 1, node, node
1266 base = self.base(t)
1239 base = self.base(t)
1267 start = self.start(base)
1240 start = self.start(base)
1268 end = self.end(t)
1241 end = self.end(t)
1269 finally:
1242 finally:
1270 if dfh:
1243 if dfh:
1271 dfh.close()
1244 dfh.close()
1272 ifh.close()
1245 ifh.close()
1273
1246
1274 return node
1247 return node
1275
1248
1276 def strip(self, minlink):
1249 def strip(self, minlink):
1277 """truncate the revlog on the first revision with a linkrev >= minlink
1250 """truncate the revlog on the first revision with a linkrev >= minlink
1278
1251
1279 This function is called when we're stripping revision minlink and
1252 This function is called when we're stripping revision minlink and
1280 its descendants from the repository.
1253 its descendants from the repository.
1281
1254
1282 We have to remove all revisions with linkrev >= minlink, because
1255 We have to remove all revisions with linkrev >= minlink, because
1283 the equivalent changelog revisions will be renumbered after the
1256 the equivalent changelog revisions will be renumbered after the
1284 strip.
1257 strip.
1285
1258
1286 So we truncate the revlog on the first of these revisions, and
1259 So we truncate the revlog on the first of these revisions, and
1287 trust that the caller has saved the revisions that shouldn't be
1260 trust that the caller has saved the revisions that shouldn't be
1288 removed and that it'll readd them after this truncation.
1261 removed and that it'll readd them after this truncation.
1289 """
1262 """
1290 if len(self) == 0:
1263 if len(self) == 0:
1291 return
1264 return
1292
1265
1293 if isinstance(self.index, lazyindex):
1266 if isinstance(self.index, lazyindex):
1294 self._loadindexmap()
1267 self._loadindexmap()
1295
1268
1296 for rev in self:
1269 for rev in self:
1297 if self.index[rev][4] >= minlink:
1270 if self.index[rev][4] >= minlink:
1298 break
1271 break
1299 else:
1272 else:
1300 return
1273 return
1301
1274
1302 # first truncate the files on disk
1275 # first truncate the files on disk
1303 end = self.start(rev)
1276 end = self.start(rev)
1304 if not self._inline:
1277 if not self._inline:
1305 df = self.opener(self.datafile, "a")
1278 df = self.opener(self.datafile, "a")
1306 df.truncate(end)
1279 df.truncate(end)
1307 end = rev * self._io.size
1280 end = rev * self._io.size
1308 else:
1281 else:
1309 end += rev * self._io.size
1282 end += rev * self._io.size
1310
1283
1311 indexf = self.opener(self.indexfile, "a")
1284 indexf = self.opener(self.indexfile, "a")
1312 indexf.truncate(end)
1285 indexf.truncate(end)
1313
1286
1314 # then reset internal state in memory to forget those revisions
1287 # then reset internal state in memory to forget those revisions
1315 self._cache = None
1288 self._cache = None
1316 self._chunkcache = None
1289 self._chunkcache = None
1317 for x in xrange(rev, len(self)):
1290 for x in xrange(rev, len(self)):
1318 del self.nodemap[self.node(x)]
1291 del self.nodemap[self.node(x)]
1319
1292
1320 del self.index[rev:-1]
1293 del self.index[rev:-1]
1321
1294
1322 def checksize(self):
1295 def checksize(self):
1323 expected = 0
1296 expected = 0
1324 if len(self):
1297 if len(self):
1325 expected = max(0, self.end(len(self) - 1))
1298 expected = max(0, self.end(len(self) - 1))
1326
1299
1327 try:
1300 try:
1328 f = self.opener(self.datafile)
1301 f = self.opener(self.datafile)
1329 f.seek(0, 2)
1302 f.seek(0, 2)
1330 actual = f.tell()
1303 actual = f.tell()
1331 dd = actual - expected
1304 dd = actual - expected
1332 except IOError, inst:
1305 except IOError, inst:
1333 if inst.errno != errno.ENOENT:
1306 if inst.errno != errno.ENOENT:
1334 raise
1307 raise
1335 dd = 0
1308 dd = 0
1336
1309
1337 try:
1310 try:
1338 f = self.opener(self.indexfile)
1311 f = self.opener(self.indexfile)
1339 f.seek(0, 2)
1312 f.seek(0, 2)
1340 actual = f.tell()
1313 actual = f.tell()
1341 s = self._io.size
1314 s = self._io.size
1342 i = max(0, actual / s)
1315 i = max(0, actual / s)
1343 di = actual - (i * s)
1316 di = actual - (i * s)
1344 if self._inline:
1317 if self._inline:
1345 databytes = 0
1318 databytes = 0
1346 for r in self:
1319 for r in self:
1347 databytes += max(0, self.length(r))
1320 databytes += max(0, self.length(r))
1348 dd = 0
1321 dd = 0
1349 di = actual - len(self) * s - databytes
1322 di = actual - len(self) * s - databytes
1350 except IOError, inst:
1323 except IOError, inst:
1351 if inst.errno != errno.ENOENT:
1324 if inst.errno != errno.ENOENT:
1352 raise
1325 raise
1353 di = 0
1326 di = 0
1354
1327
1355 return (dd, di)
1328 return (dd, di)
1356
1329
1357 def files(self):
1330 def files(self):
1358 res = [ self.indexfile ]
1331 res = [ self.indexfile ]
1359 if not self._inline:
1332 if not self._inline:
1360 res.append(self.datafile)
1333 res.append(self.datafile)
1361 return res
1334 return res
General Comments 0
You need to be logged in to leave comments. Login now