##// END OF EJS Templates
revlog: fix reading of larger revlog indices on Windows
Matt Mackall -
r8558:5726bb29 default
parent child Browse files
Show More
@@ -1,1389 +1,1385 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
123 self.dataf = dataf
123 self.dataf = dataf
124 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
125 self.datasize = size
125 self.datasize = size
126 self.l = size/self.s
126 self.l = size/self.s
127 self.index = [None] * self.l
127 self.index = [None] * self.l
128 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
129 self.allmap = 0
129 self.allmap = 0
130 self.all = 0
130 self.all = 0
131 self.mapfind_count = 0
131 self.mapfind_count = 0
132
132
133 def loadmap(self):
133 def loadmap(self):
134 """
134 """
135 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
136 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
137 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
138 which takes much less time.
138 which takes much less time.
139 """
139 """
140 if self.allmap:
140 if self.allmap:
141 return
141 return
142 end = self.datasize
142 end = self.datasize
143 self.allmap = 1
143 self.allmap = 1
144 cur = 0
144 cur = 0
145 count = 0
145 count = 0
146 blocksize = self.s * 256
146 blocksize = self.s * 256
147 self.dataf.seek(0)
147 self.dataf.seek(0)
148 while cur < end:
148 while cur < end:
149 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
150 off = 0
150 off = 0
151 for x in xrange(256):
151 for x in xrange(256):
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 self.map[n] = count
153 self.map[n] = count
154 count += 1
154 count += 1
155 if count >= self.l:
155 if count >= self.l:
156 break
156 break
157 off += self.s
157 off += self.s
158 cur += blocksize
158 cur += blocksize
159
159
160 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
161 if self.all:
161 if self.all:
162 return
162 return
163 if data is None:
163 if data is None:
164 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
165 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
166 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
167 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
168 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
169 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
170 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
171 lend = len(data) / self.s
171 lend = len(data) / self.s
172 i = blockstart / self.s
172 i = blockstart / self.s
173 off = 0
173 off = 0
174 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
175 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
176 lend = len(self.index) - i
176 lend = len(self.index) - i
177 for x in xrange(lend):
177 for x in xrange(lend):
178 if self.index[i + x] is None:
178 if self.index[i + x] is None:
179 b = data[off : off + self.s]
179 b = data[off : off + self.s]
180 self.index[i + x] = b
180 self.index[i + x] = b
181 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
182 self.map[n] = i + x
182 self.map[n] = i + x
183 off += self.s
183 off += self.s
184
184
185 def findnode(self, node):
185 def findnode(self, node):
186 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
187 if self.allmap:
187 if self.allmap:
188 return None
188 return None
189
189
190 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
191 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
192 # thing.
192 # thing.
193 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
194 self.loadmap()
194 self.loadmap()
195 if node in self.map:
195 if node in self.map:
196 return node
196 return node
197 return None
197 return None
198 self.mapfind_count += 1
198 self.mapfind_count += 1
199 last = self.l - 1
199 last = self.l - 1
200 while self.index[last] != None:
200 while self.index[last] != None:
201 if last == 0:
201 if last == 0:
202 self.all = 1
202 self.all = 1
203 self.allmap = 1
203 self.allmap = 1
204 return None
204 return None
205 last -= 1
205 last -= 1
206 end = (last + 1) * self.s
206 end = (last + 1) * self.s
207 blocksize = self.s * 256
207 blocksize = self.s * 256
208 while end >= 0:
208 while end >= 0:
209 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
210 self.dataf.seek(start)
210 self.dataf.seek(start)
211 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
212 findend = end - start
212 findend = end - start
213 while True:
213 while True:
214 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
215 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
216 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
217 findend = off
217 findend = off
218 if off >= 0:
218 if off >= 0:
219 i = off / self.s
219 i = off / self.s
220 off = i * self.s
220 off = i * self.s
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 if n == node:
222 if n == node:
223 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
224 return node
224 return node
225 else:
225 else:
226 break
226 break
227 end -= blocksize
227 end -= blocksize
228 return None
228 return None
229
229
230 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
231 if self.all:
231 if self.all:
232 return
232 return
233 all = False
233 all = False
234 if i is None:
234 if i is None:
235 blockstart = 0
235 blockstart = 0
236 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
237 end = self.datasize
237 end = self.datasize
238 all = True
238 all = True
239 else:
239 else:
240 if end:
240 if end:
241 blockstart = i * self.s
241 blockstart = i * self.s
242 end = end * self.s
242 end = end * self.s
243 blocksize = end - blockstart
243 blocksize = end - blockstart
244 else:
244 else:
245 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
246 blocksize = self.s * 1024
246 blocksize = self.s * 1024
247 end = blockstart + blocksize
247 end = blockstart + blocksize
248 while blockstart < end:
248 while blockstart < end:
249 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
250 blockstart += blocksize
250 blockstart += blocksize
251 if all:
251 if all:
252 self.all = True
252 self.all = True
253
253
254 class lazyindex(object):
254 class lazyindex(object):
255 """a lazy version of the index array"""
255 """a lazy version of the index array"""
256 def __init__(self, parser):
256 def __init__(self, parser):
257 self.p = parser
257 self.p = parser
258 def __len__(self):
258 def __len__(self):
259 return len(self.p.index)
259 return len(self.p.index)
260 def load(self, pos):
260 def load(self, pos):
261 if pos < 0:
261 if pos < 0:
262 pos += len(self.p.index)
262 pos += len(self.p.index)
263 self.p.loadindex(pos)
263 self.p.loadindex(pos)
264 return self.p.index[pos]
264 return self.p.index[pos]
265 def __getitem__(self, pos):
265 def __getitem__(self, pos):
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
268 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
269 def __delitem__(self, pos):
269 def __delitem__(self, pos):
270 del self.p.index[pos]
270 del self.p.index[pos]
271 def insert(self, pos, e):
271 def insert(self, pos, e):
272 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 def append(self, e):
273 def append(self, e):
274 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
275
275
276 class lazymap(object):
276 class lazymap(object):
277 """a lazy version of the node map"""
277 """a lazy version of the node map"""
278 def __init__(self, parser):
278 def __init__(self, parser):
279 self.p = parser
279 self.p = parser
280 def load(self, key):
280 def load(self, key):
281 n = self.p.findnode(key)
281 n = self.p.findnode(key)
282 if n is None:
282 if n is None:
283 raise KeyError(key)
283 raise KeyError(key)
284 def __contains__(self, key):
284 def __contains__(self, key):
285 if key in self.p.map:
285 if key in self.p.map:
286 return True
286 return True
287 self.p.loadmap()
287 self.p.loadmap()
288 return key in self.p.map
288 return key in self.p.map
289 def __iter__(self):
289 def __iter__(self):
290 yield nullid
290 yield nullid
291 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if not ret:
293 if not ret:
294 self.p.loadindex(i)
294 self.p.loadindex(i)
295 ret = self.p.index[i]
295 ret = self.p.index[i]
296 if isinstance(ret, str):
296 if isinstance(ret, str):
297 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
298 yield ret[7]
298 yield ret[7]
299 def __getitem__(self, key):
299 def __getitem__(self, key):
300 try:
300 try:
301 return self.p.map[key]
301 return self.p.map[key]
302 except KeyError:
302 except KeyError:
303 try:
303 try:
304 self.load(key)
304 self.load(key)
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
308 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
309 self.p.map[key] = val
309 self.p.map[key] = val
310 def __delitem__(self, key):
310 def __delitem__(self, key):
311 del self.p.map[key]
311 del self.p.map[key]
312
312
313 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
314 v0shaoffset = 56
314 v0shaoffset = 56
315
315
316 class revlogoldio(object):
316 class revlogoldio(object):
317 def __init__(self):
317 def __init__(self):
318 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
319
319
320 def parseindex(self, fp, data, inline):
320 def parseindex(self, fp, data, inline):
321 s = self.size
321 s = self.size
322 index = []
322 index = []
323 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
324 n = off = 0
324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) == _prereadsize:
326 data += fp.read() # read the rest
326 data += fp.read() # read the rest
327 l = len(data)
327 l = len(data)
328 while off + s <= l:
328 while off + s <= l:
329 cur = data[off:off + s]
329 cur = data[off:off + s]
330 off += s
330 off += s
331 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
332 # transform to revlogv1 format
332 # transform to revlogv1 format
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 index.append(e2)
335 index.append(e2)
336 nodemap[e[6]] = n
336 nodemap[e[6]] = n
337 n += 1
337 n += 1
338
338
339 return index, nodemap, None
339 return index, nodemap, None
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
344 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
345
345
346 # index ng:
346 # index ng:
347 # 6 bytes offset
347 # 6 bytes offset
348 # 2 bytes flags
348 # 2 bytes flags
349 # 4 bytes compressed length
349 # 4 bytes compressed length
350 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
351 # 4 bytes: base rev
351 # 4 bytes: base rev
352 # 4 bytes link rev
352 # 4 bytes link rev
353 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
354 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
355 # 32 bytes: nodeid
355 # 32 bytes: nodeid
356 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
357 ngshaoffset = 32
357 ngshaoffset = 32
358 versionformat = ">I"
358 versionformat = ">I"
359
359
360 class revlogio(object):
360 class revlogio(object):
361 def __init__(self):
361 def __init__(self):
362 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
363
363
364 def parseindex(self, fp, data, inline):
364 def parseindex(self, fp, data, inline):
365 try:
365 if len(data) == _prereadsize:
366 size = len(data)
366 if util.openhardlinks() and not inline:
367 if size == _prereadsize:
367 # big index, let's parse it on demand
368 size = util.fstat(fp).st_size
368 parser = lazyparser(fp, size)
369 except AttributeError:
369 index = lazyindex(parser)
370 size = 0
370 nodemap = lazymap(parser)
371
371 e = list(index[0])
372 if util.openhardlinks() and not inline and size > _prereadsize:
372 type = gettype(e[0])
373 # big index, let's parse it on demand
373 e[0] = offset_type(0, type)
374 parser = lazyparser(fp, size)
374 index[0] = e
375 index = lazyindex(parser)
375 return index, nodemap, None
376 nodemap = lazymap(parser)
376 else:
377 e = list(index[0])
377 data += fp.read()
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
380 index[0] = e
381 return index, nodemap, None
382
378
383 # call the C implementation to parse the index data
379 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
380 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
381 return index, nodemap, cache
386
382
387 def packentry(self, entry, node, version, rev):
383 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
384 p = _pack(indexformatng, *entry)
389 if rev == 0:
385 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
386 p = _pack(versionformat, version) + p[4:]
391 return p
387 return p
392
388
393 class revlog(object):
389 class revlog(object):
394 """
390 """
395 the underlying revision storage object
391 the underlying revision storage object
396
392
397 A revlog consists of two parts, an index and the revision data.
393 A revlog consists of two parts, an index and the revision data.
398
394
399 The index is a file with a fixed record size containing
395 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
396 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
397 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
398 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
399 contains a linkrev entry that can serve as a pointer to external
404 data.
400 data.
405
401
406 The revision data itself is a linear collection of data chunks.
402 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
403 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
404 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
405 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
406 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
407 its size, or O(1) relative to the number of revisions.
412
408
413 Both pieces of the revlog are written to in an append-only
409 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
410 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
411 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
412 for locking while reading.
417 """
413 """
418 def __init__(self, opener, indexfile):
414 def __init__(self, opener, indexfile):
419 """
415 """
420 create a revlog object
416 create a revlog object
421
417
422 opener is a function that abstracts the file opening operation
418 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
419 and can be used to implement COW semantics or the like.
424 """
420 """
425 self.indexfile = indexfile
421 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
422 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
423 self.opener = opener
428 self._cache = None
424 self._cache = None
429 self._chunkcache = (0, '')
425 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
426 self.nodemap = {nullid: nullrev}
431 self.index = []
427 self.index = []
432
428
433 v = REVLOG_DEFAULT_VERSION
429 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
430 if hasattr(opener, "defversion"):
435 v = opener.defversion
431 v = opener.defversion
436 if v & REVLOGNG:
432 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
433 v |= REVLOGNGINLINEDATA
438
434
439 i = ''
435 i = ''
440 try:
436 try:
441 f = self.opener(self.indexfile)
437 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
438 i = f.read(_prereadsize)
443 if len(i) > 0:
439 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
440 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
441 except IOError, inst:
446 if inst.errno != errno.ENOENT:
442 if inst.errno != errno.ENOENT:
447 raise
443 raise
448
444
449 self.version = v
445 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
446 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
449 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
451 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
454 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
455 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
456 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
457 % (self.indexfile, fmt))
462
458
463 self._io = revlogio()
459 self._io = revlogio()
464 if self.version == REVLOGV0:
460 if self.version == REVLOGV0:
465 self._io = revlogoldio()
461 self._io = revlogoldio()
466 if i:
462 if i:
467 try:
463 try:
468 d = self._io.parseindex(f, i, self._inline)
464 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
465 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
466 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
467 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
468 if not self._chunkcache:
473 self._chunkcache = (0, '')
469 self._chunkcache = (0, '')
474
470
475 # add the magic null revision at -1 (if it hasn't been done already)
471 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
472 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
473 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
474 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
475
480 def _loadindex(self, start, end):
476 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
477 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
479 self.index.p.loadindex(start, end)
484
480
485 def _loadindexmap(self):
481 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
482 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
483 if isinstance(self.index, lazyindex):
488 p = self.index.p
484 p = self.index.p
489 p.loadindex()
485 p.loadindex()
490 self.nodemap = p.map
486 self.nodemap = p.map
491
487
492 def _loadmap(self):
488 def _loadmap(self):
493 """loads the map from the lazy parser"""
489 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
490 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
491 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
492 self.nodemap = self.nodemap.p.map
497
493
498 def tip(self):
494 def tip(self):
499 return self.node(len(self.index) - 2)
495 return self.node(len(self.index) - 2)
500 def __len__(self):
496 def __len__(self):
501 return len(self.index) - 1
497 return len(self.index) - 1
502 def __iter__(self):
498 def __iter__(self):
503 for i in xrange(len(self)):
499 for i in xrange(len(self)):
504 yield i
500 yield i
505 def rev(self, node):
501 def rev(self, node):
506 try:
502 try:
507 return self.nodemap[node]
503 return self.nodemap[node]
508 except KeyError:
504 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
505 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
506 def node(self, rev):
511 return self.index[rev][7]
507 return self.index[rev][7]
512 def linkrev(self, rev):
508 def linkrev(self, rev):
513 return self.index[rev][4]
509 return self.index[rev][4]
514 def parents(self, node):
510 def parents(self, node):
515 i = self.index
511 i = self.index
516 d = i[self.rev(node)]
512 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
513 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
514 def parentrevs(self, rev):
519 return self.index[rev][5:7]
515 return self.index[rev][5:7]
520 def start(self, rev):
516 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
517 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
518 def end(self, rev):
523 return self.start(rev) + self.length(rev)
519 return self.start(rev) + self.length(rev)
524 def length(self, rev):
520 def length(self, rev):
525 return self.index[rev][1]
521 return self.index[rev][1]
526 def base(self, rev):
522 def base(self, rev):
527 return self.index[rev][3]
523 return self.index[rev][3]
528
524
529 def size(self, rev):
525 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
526 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
527 l = self.index[rev][2]
532 if l >= 0:
528 if l >= 0:
533 return l
529 return l
534
530
535 t = self.revision(self.node(rev))
531 t = self.revision(self.node(rev))
536 return len(t)
532 return len(t)
537
533
538 # alternate implementation, The advantage to this code is it
534 # alternate implementation, The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
535 # will be faster for a single revision. But, the results are not
540 # cached, so finding the size of every revision will be slower.
536 # cached, so finding the size of every revision will be slower.
541 """
537 """
542 if self.cache and self.cache[1] == rev:
538 if self.cache and self.cache[1] == rev:
543 return len(self.cache[2])
539 return len(self.cache[2])
544
540
545 base = self.base(rev)
541 base = self.base(rev)
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 base = self.cache[1]
543 base = self.cache[1]
548 text = self.cache[2]
544 text = self.cache[2]
549 else:
545 else:
550 text = self.revision(self.node(base))
546 text = self.revision(self.node(base))
551
547
552 l = len(text)
548 l = len(text)
553 for x in xrange(base + 1, rev + 1):
549 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x))
550 l = mdiff.patchedsize(l, self.chunk(x))
555 return l
551 return l
556 """
552 """
557
553
558 def reachable(self, node, stop=None):
554 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
555 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
556 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
557 reachable = set((node,))
562 visit = [node]
558 visit = [node]
563 if stop:
559 if stop:
564 stopn = self.rev(stop)
560 stopn = self.rev(stop)
565 else:
561 else:
566 stopn = 0
562 stopn = 0
567 while visit:
563 while visit:
568 n = visit.pop(0)
564 n = visit.pop(0)
569 if n == stop:
565 if n == stop:
570 continue
566 continue
571 if n == nullid:
567 if n == nullid:
572 continue
568 continue
573 for p in self.parents(n):
569 for p in self.parents(n):
574 if self.rev(p) < stopn:
570 if self.rev(p) < stopn:
575 continue
571 continue
576 if p not in reachable:
572 if p not in reachable:
577 reachable.add(p)
573 reachable.add(p)
578 visit.append(p)
574 visit.append(p)
579 return reachable
575 return reachable
580
576
581 def ancestors(self, *revs):
577 def ancestors(self, *revs):
582 'Generate the ancestors of revs using a breadth-first visit'
578 'Generate the ancestors of revs using a breadth-first visit'
583 visit = list(revs)
579 visit = list(revs)
584 seen = set([nullrev])
580 seen = set([nullrev])
585 while visit:
581 while visit:
586 for parent in self.parentrevs(visit.pop(0)):
582 for parent in self.parentrevs(visit.pop(0)):
587 if parent not in seen:
583 if parent not in seen:
588 visit.append(parent)
584 visit.append(parent)
589 seen.add(parent)
585 seen.add(parent)
590 yield parent
586 yield parent
591
587
592 def descendants(self, *revs):
588 def descendants(self, *revs):
593 'Generate the descendants of revs in topological order'
589 'Generate the descendants of revs in topological order'
594 seen = set(revs)
590 seen = set(revs)
595 for i in xrange(min(revs) + 1, len(self)):
591 for i in xrange(min(revs) + 1, len(self)):
596 for x in self.parentrevs(i):
592 for x in self.parentrevs(i):
597 if x != nullrev and x in seen:
593 if x != nullrev and x in seen:
598 seen.add(i)
594 seen.add(i)
599 yield i
595 yield i
600 break
596 break
601
597
602 def findmissing(self, common=None, heads=None):
598 def findmissing(self, common=None, heads=None):
603 '''
599 '''
604 returns the topologically sorted list of nodes from the set:
600 returns the topologically sorted list of nodes from the set:
605 missing = (ancestors(heads) \ ancestors(common))
601 missing = (ancestors(heads) \ ancestors(common))
606
602
607 where ancestors() is the set of ancestors from heads, heads included
603 where ancestors() is the set of ancestors from heads, heads included
608
604
609 if heads is None, the heads of the revlog are used
605 if heads is None, the heads of the revlog are used
610 if common is None, nullid is assumed to be a common node
606 if common is None, nullid is assumed to be a common node
611 '''
607 '''
612 if common is None:
608 if common is None:
613 common = [nullid]
609 common = [nullid]
614 if heads is None:
610 if heads is None:
615 heads = self.heads()
611 heads = self.heads()
616
612
617 common = [self.rev(n) for n in common]
613 common = [self.rev(n) for n in common]
618 heads = [self.rev(n) for n in heads]
614 heads = [self.rev(n) for n in heads]
619
615
620 # we want the ancestors, but inclusive
616 # we want the ancestors, but inclusive
621 has = set(self.ancestors(*common))
617 has = set(self.ancestors(*common))
622 has.add(nullrev)
618 has.add(nullrev)
623 has.update(common)
619 has.update(common)
624
620
625 # take all ancestors from heads that aren't in has
621 # take all ancestors from heads that aren't in has
626 missing = set()
622 missing = set()
627 visit = [r for r in heads if r not in has]
623 visit = [r for r in heads if r not in has]
628 while visit:
624 while visit:
629 r = visit.pop(0)
625 r = visit.pop(0)
630 if r in missing:
626 if r in missing:
631 continue
627 continue
632 else:
628 else:
633 missing.add(r)
629 missing.add(r)
634 for p in self.parentrevs(r):
630 for p in self.parentrevs(r):
635 if p not in has:
631 if p not in has:
636 visit.append(p)
632 visit.append(p)
637 missing = list(missing)
633 missing = list(missing)
638 missing.sort()
634 missing.sort()
639 return [self.node(r) for r in missing]
635 return [self.node(r) for r in missing]
640
636
641 def nodesbetween(self, roots=None, heads=None):
637 def nodesbetween(self, roots=None, heads=None):
642 """Return a tuple containing three elements. Elements 1 and 2 contain
638 """Return a tuple containing three elements. Elements 1 and 2 contain
643 a final list bases and heads after all the unreachable ones have been
639 a final list bases and heads after all the unreachable ones have been
644 pruned. Element 0 contains a topologically sorted list of all
640 pruned. Element 0 contains a topologically sorted list of all
645
641
646 nodes that satisfy these constraints:
642 nodes that satisfy these constraints:
647 1. All nodes must be descended from a node in roots (the nodes on
643 1. All nodes must be descended from a node in roots (the nodes on
648 roots are considered descended from themselves).
644 roots are considered descended from themselves).
649 2. All nodes must also be ancestors of a node in heads (the nodes in
645 2. All nodes must also be ancestors of a node in heads (the nodes in
650 heads are considered to be their own ancestors).
646 heads are considered to be their own ancestors).
651
647
652 If roots is unspecified, nullid is assumed as the only root.
648 If roots is unspecified, nullid is assumed as the only root.
653 If heads is unspecified, it is taken to be the output of the
649 If heads is unspecified, it is taken to be the output of the
654 heads method (i.e. a list of all nodes in the repository that
650 heads method (i.e. a list of all nodes in the repository that
655 have no children)."""
651 have no children)."""
656 nonodes = ([], [], [])
652 nonodes = ([], [], [])
657 if roots is not None:
653 if roots is not None:
658 roots = list(roots)
654 roots = list(roots)
659 if not roots:
655 if not roots:
660 return nonodes
656 return nonodes
661 lowestrev = min([self.rev(n) for n in roots])
657 lowestrev = min([self.rev(n) for n in roots])
662 else:
658 else:
663 roots = [nullid] # Everybody's a descendent of nullid
659 roots = [nullid] # Everybody's a descendent of nullid
664 lowestrev = nullrev
660 lowestrev = nullrev
665 if (lowestrev == nullrev) and (heads is None):
661 if (lowestrev == nullrev) and (heads is None):
666 # We want _all_ the nodes!
662 # We want _all_ the nodes!
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
663 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 if heads is None:
664 if heads is None:
669 # All nodes are ancestors, so the latest ancestor is the last
665 # All nodes are ancestors, so the latest ancestor is the last
670 # node.
666 # node.
671 highestrev = len(self) - 1
667 highestrev = len(self) - 1
672 # Set ancestors to None to signal that every node is an ancestor.
668 # Set ancestors to None to signal that every node is an ancestor.
673 ancestors = None
669 ancestors = None
674 # Set heads to an empty dictionary for later discovery of heads
670 # Set heads to an empty dictionary for later discovery of heads
675 heads = {}
671 heads = {}
676 else:
672 else:
677 heads = list(heads)
673 heads = list(heads)
678 if not heads:
674 if not heads:
679 return nonodes
675 return nonodes
680 ancestors = set()
676 ancestors = set()
681 # Turn heads into a dictionary so we can remove 'fake' heads.
677 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Also, later we will be using it to filter out the heads we can't
678 # Also, later we will be using it to filter out the heads we can't
683 # find from roots.
679 # find from roots.
684 heads = dict.fromkeys(heads, 0)
680 heads = dict.fromkeys(heads, 0)
685 # Start at the top and keep marking parents until we're done.
681 # Start at the top and keep marking parents until we're done.
686 nodestotag = set(heads)
682 nodestotag = set(heads)
687 # Remember where the top was so we can use it as a limit later.
683 # Remember where the top was so we can use it as a limit later.
688 highestrev = max([self.rev(n) for n in nodestotag])
684 highestrev = max([self.rev(n) for n in nodestotag])
689 while nodestotag:
685 while nodestotag:
690 # grab a node to tag
686 # grab a node to tag
691 n = nodestotag.pop()
687 n = nodestotag.pop()
692 # Never tag nullid
688 # Never tag nullid
693 if n == nullid:
689 if n == nullid:
694 continue
690 continue
695 # A node's revision number represents its place in a
691 # A node's revision number represents its place in a
696 # topologically sorted list of nodes.
692 # topologically sorted list of nodes.
697 r = self.rev(n)
693 r = self.rev(n)
698 if r >= lowestrev:
694 if r >= lowestrev:
699 if n not in ancestors:
695 if n not in ancestors:
700 # If we are possibly a descendent of one of the roots
696 # If we are possibly a descendent of one of the roots
701 # and we haven't already been marked as an ancestor
697 # and we haven't already been marked as an ancestor
702 ancestors.add(n) # Mark as ancestor
698 ancestors.add(n) # Mark as ancestor
703 # Add non-nullid parents to list of nodes to tag.
699 # Add non-nullid parents to list of nodes to tag.
704 nodestotag.update([p for p in self.parents(n) if
700 nodestotag.update([p for p in self.parents(n) if
705 p != nullid])
701 p != nullid])
706 elif n in heads: # We've seen it before, is it a fake head?
702 elif n in heads: # We've seen it before, is it a fake head?
707 # So it is, real heads should not be the ancestors of
703 # So it is, real heads should not be the ancestors of
708 # any other heads.
704 # any other heads.
709 heads.pop(n)
705 heads.pop(n)
710 if not ancestors:
706 if not ancestors:
711 return nonodes
707 return nonodes
712 # Now that we have our set of ancestors, we want to remove any
708 # Now that we have our set of ancestors, we want to remove any
713 # roots that are not ancestors.
709 # roots that are not ancestors.
714
710
715 # If one of the roots was nullid, everything is included anyway.
711 # If one of the roots was nullid, everything is included anyway.
716 if lowestrev > nullrev:
712 if lowestrev > nullrev:
717 # But, since we weren't, let's recompute the lowest rev to not
713 # But, since we weren't, let's recompute the lowest rev to not
718 # include roots that aren't ancestors.
714 # include roots that aren't ancestors.
719
715
720 # Filter out roots that aren't ancestors of heads
716 # Filter out roots that aren't ancestors of heads
721 roots = [n for n in roots if n in ancestors]
717 roots = [n for n in roots if n in ancestors]
722 # Recompute the lowest revision
718 # Recompute the lowest revision
723 if roots:
719 if roots:
724 lowestrev = min([self.rev(n) for n in roots])
720 lowestrev = min([self.rev(n) for n in roots])
725 else:
721 else:
726 # No more roots? Return empty list
722 # No more roots? Return empty list
727 return nonodes
723 return nonodes
728 else:
724 else:
729 # We are descending from nullid, and don't need to care about
725 # We are descending from nullid, and don't need to care about
730 # any other roots.
726 # any other roots.
731 lowestrev = nullrev
727 lowestrev = nullrev
732 roots = [nullid]
728 roots = [nullid]
733 # Transform our roots list into a set.
729 # Transform our roots list into a set.
734 descendents = set(roots)
730 descendents = set(roots)
735 # Also, keep the original roots so we can filter out roots that aren't
731 # Also, keep the original roots so we can filter out roots that aren't
736 # 'real' roots (i.e. are descended from other roots).
732 # 'real' roots (i.e. are descended from other roots).
737 roots = descendents.copy()
733 roots = descendents.copy()
738 # Our topologically sorted list of output nodes.
734 # Our topologically sorted list of output nodes.
739 orderedout = []
735 orderedout = []
740 # Don't start at nullid since we don't want nullid in our output list,
736 # Don't start at nullid since we don't want nullid in our output list,
741 # and if nullid shows up in descedents, empty parents will look like
737 # and if nullid shows up in descedents, empty parents will look like
742 # they're descendents.
738 # they're descendents.
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
739 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 n = self.node(r)
740 n = self.node(r)
745 isdescendent = False
741 isdescendent = False
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
742 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 isdescendent = True
743 isdescendent = True
748 elif n in descendents:
744 elif n in descendents:
749 # n is already a descendent
745 # n is already a descendent
750 isdescendent = True
746 isdescendent = True
751 # This check only needs to be done here because all the roots
747 # This check only needs to be done here because all the roots
752 # will start being marked is descendents before the loop.
748 # will start being marked is descendents before the loop.
753 if n in roots:
749 if n in roots:
754 # If n was a root, check if it's a 'real' root.
750 # If n was a root, check if it's a 'real' root.
755 p = tuple(self.parents(n))
751 p = tuple(self.parents(n))
756 # If any of its parents are descendents, it's not a root.
752 # If any of its parents are descendents, it's not a root.
757 if (p[0] in descendents) or (p[1] in descendents):
753 if (p[0] in descendents) or (p[1] in descendents):
758 roots.remove(n)
754 roots.remove(n)
759 else:
755 else:
760 p = tuple(self.parents(n))
756 p = tuple(self.parents(n))
761 # A node is a descendent if either of its parents are
757 # A node is a descendent if either of its parents are
762 # descendents. (We seeded the dependents list with the roots
758 # descendents. (We seeded the dependents list with the roots
763 # up there, remember?)
759 # up there, remember?)
764 if (p[0] in descendents) or (p[1] in descendents):
760 if (p[0] in descendents) or (p[1] in descendents):
765 descendents.add(n)
761 descendents.add(n)
766 isdescendent = True
762 isdescendent = True
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
763 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 # Only include nodes that are both descendents and ancestors.
764 # Only include nodes that are both descendents and ancestors.
769 orderedout.append(n)
765 orderedout.append(n)
770 if (ancestors is not None) and (n in heads):
766 if (ancestors is not None) and (n in heads):
771 # We're trying to figure out which heads are reachable
767 # We're trying to figure out which heads are reachable
772 # from roots.
768 # from roots.
773 # Mark this head as having been reached
769 # Mark this head as having been reached
774 heads[n] = 1
770 heads[n] = 1
775 elif ancestors is None:
771 elif ancestors is None:
776 # Otherwise, we're trying to discover the heads.
772 # Otherwise, we're trying to discover the heads.
777 # Assume this is a head because if it isn't, the next step
773 # Assume this is a head because if it isn't, the next step
778 # will eventually remove it.
774 # will eventually remove it.
779 heads[n] = 1
775 heads[n] = 1
780 # But, obviously its parents aren't.
776 # But, obviously its parents aren't.
781 for p in self.parents(n):
777 for p in self.parents(n):
782 heads.pop(p, None)
778 heads.pop(p, None)
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
779 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 roots = list(roots)
780 roots = list(roots)
785 assert orderedout
781 assert orderedout
786 assert roots
782 assert roots
787 assert heads
783 assert heads
788 return (orderedout, roots, heads)
784 return (orderedout, roots, heads)
789
785
790 def heads(self, start=None, stop=None):
786 def heads(self, start=None, stop=None):
791 """return the list of all nodes that have no children
787 """return the list of all nodes that have no children
792
788
793 if start is specified, only heads that are descendants of
789 if start is specified, only heads that are descendants of
794 start will be returned
790 start will be returned
795 if stop is specified, it will consider all the revs from stop
791 if stop is specified, it will consider all the revs from stop
796 as if they had no children
792 as if they had no children
797 """
793 """
798 if start is None and stop is None:
794 if start is None and stop is None:
799 count = len(self)
795 count = len(self)
800 if not count:
796 if not count:
801 return [nullid]
797 return [nullid]
802 ishead = [1] * (count + 1)
798 ishead = [1] * (count + 1)
803 index = self.index
799 index = self.index
804 for r in xrange(count):
800 for r in xrange(count):
805 e = index[r]
801 e = index[r]
806 ishead[e[5]] = ishead[e[6]] = 0
802 ishead[e[5]] = ishead[e[6]] = 0
807 return [self.node(r) for r in xrange(count) if ishead[r]]
803 return [self.node(r) for r in xrange(count) if ishead[r]]
808
804
809 if start is None:
805 if start is None:
810 start = nullid
806 start = nullid
811 if stop is None:
807 if stop is None:
812 stop = []
808 stop = []
813 stoprevs = set([self.rev(n) for n in stop])
809 stoprevs = set([self.rev(n) for n in stop])
814 startrev = self.rev(start)
810 startrev = self.rev(start)
815 reachable = set((startrev,))
811 reachable = set((startrev,))
816 heads = set((startrev,))
812 heads = set((startrev,))
817
813
818 parentrevs = self.parentrevs
814 parentrevs = self.parentrevs
819 for r in xrange(startrev + 1, len(self)):
815 for r in xrange(startrev + 1, len(self)):
820 for p in parentrevs(r):
816 for p in parentrevs(r):
821 if p in reachable:
817 if p in reachable:
822 if r not in stoprevs:
818 if r not in stoprevs:
823 reachable.add(r)
819 reachable.add(r)
824 heads.add(r)
820 heads.add(r)
825 if p in heads and p not in stoprevs:
821 if p in heads and p not in stoprevs:
826 heads.remove(p)
822 heads.remove(p)
827
823
828 return [self.node(r) for r in heads]
824 return [self.node(r) for r in heads]
829
825
830 def children(self, node):
826 def children(self, node):
831 """find the children of a given node"""
827 """find the children of a given node"""
832 c = []
828 c = []
833 p = self.rev(node)
829 p = self.rev(node)
834 for r in range(p + 1, len(self)):
830 for r in range(p + 1, len(self)):
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
831 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 if prevs:
832 if prevs:
837 for pr in prevs:
833 for pr in prevs:
838 if pr == p:
834 if pr == p:
839 c.append(self.node(r))
835 c.append(self.node(r))
840 elif p == nullrev:
836 elif p == nullrev:
841 c.append(self.node(r))
837 c.append(self.node(r))
842 return c
838 return c
843
839
844 def _match(self, id):
840 def _match(self, id):
845 if isinstance(id, (long, int)):
841 if isinstance(id, (long, int)):
846 # rev
842 # rev
847 return self.node(id)
843 return self.node(id)
848 if len(id) == 20:
844 if len(id) == 20:
849 # possibly a binary node
845 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
846 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
847 try:
852 node = id
848 node = id
853 self.rev(node) # quick search the index
849 self.rev(node) # quick search the index
854 return node
850 return node
855 except LookupError:
851 except LookupError:
856 pass # may be partial hex id
852 pass # may be partial hex id
857 try:
853 try:
858 # str(rev)
854 # str(rev)
859 rev = int(id)
855 rev = int(id)
860 if str(rev) != id:
856 if str(rev) != id:
861 raise ValueError
857 raise ValueError
862 if rev < 0:
858 if rev < 0:
863 rev = len(self) + rev
859 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
860 if rev < 0 or rev >= len(self):
865 raise ValueError
861 raise ValueError
866 return self.node(rev)
862 return self.node(rev)
867 except (ValueError, OverflowError):
863 except (ValueError, OverflowError):
868 pass
864 pass
869 if len(id) == 40:
865 if len(id) == 40:
870 try:
866 try:
871 # a full hex nodeid?
867 # a full hex nodeid?
872 node = bin(id)
868 node = bin(id)
873 self.rev(node)
869 self.rev(node)
874 return node
870 return node
875 except (TypeError, LookupError):
871 except (TypeError, LookupError):
876 pass
872 pass
877
873
878 def _partialmatch(self, id):
874 def _partialmatch(self, id):
879 if len(id) < 40:
875 if len(id) < 40:
880 try:
876 try:
881 # hex(node)[:...]
877 # hex(node)[:...]
882 l = len(id) / 2 # grab an even number of digits
878 l = len(id) / 2 # grab an even number of digits
883 bin_id = bin(id[:l*2])
879 bin_id = bin(id[:l*2])
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
880 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in nl if hex(n).startswith(id)]
881 nl = [n for n in nl if hex(n).startswith(id)]
886 if len(nl) > 0:
882 if len(nl) > 0:
887 if len(nl) == 1:
883 if len(nl) == 1:
888 return nl[0]
884 return nl[0]
889 raise LookupError(id, self.indexfile,
885 raise LookupError(id, self.indexfile,
890 _('ambiguous identifier'))
886 _('ambiguous identifier'))
891 return None
887 return None
892 except TypeError:
888 except TypeError:
893 pass
889 pass
894
890
895 def lookup(self, id):
891 def lookup(self, id):
896 """locate a node based on:
892 """locate a node based on:
897 - revision number or str(revision number)
893 - revision number or str(revision number)
898 - nodeid or subset of hex nodeid
894 - nodeid or subset of hex nodeid
899 """
895 """
900 n = self._match(id)
896 n = self._match(id)
901 if n is not None:
897 if n is not None:
902 return n
898 return n
903 n = self._partialmatch(id)
899 n = self._partialmatch(id)
904 if n:
900 if n:
905 return n
901 return n
906
902
907 raise LookupError(id, self.indexfile, _('no match found'))
903 raise LookupError(id, self.indexfile, _('no match found'))
908
904
909 def cmp(self, node, text):
905 def cmp(self, node, text):
910 """compare text with a given file revision"""
906 """compare text with a given file revision"""
911 p1, p2 = self.parents(node)
907 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
908 return hash(text, p1, p2) != node
913
909
914 def _addchunk(self, offset, data):
910 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
911 o, d = self._chunkcache
916 # try to add to existing cache
912 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
913 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data
914 self._chunkcache = o, d + data
919 else:
915 else:
920 self._chunkcache = offset, data
916 self._chunkcache = offset, data
921
917
922 def _loadchunk(self, offset, length, df=None):
918 def _loadchunk(self, offset, length, df=None):
923 if not df:
919 if not df:
924 if self._inline:
920 if self._inline:
925 df = self.opener(self.indexfile)
921 df = self.opener(self.indexfile)
926 else:
922 else:
927 df = self.opener(self.datafile)
923 df = self.opener(self.datafile)
928
924
929 readahead = max(65536, length)
925 readahead = max(65536, length)
930 df.seek(offset)
926 df.seek(offset)
931 d = df.read(readahead)
927 d = df.read(readahead)
932 self._addchunk(offset, d)
928 self._addchunk(offset, d)
933 if readahead > length:
929 if readahead > length:
934 return d[:length]
930 return d[:length]
935 return d
931 return d
936
932
937 def _getchunk(self, offset, length, df=None):
933 def _getchunk(self, offset, length, df=None):
938 o, d = self._chunkcache
934 o, d = self._chunkcache
939 l = len(d)
935 l = len(d)
940
936
941 # is it in the cache?
937 # is it in the cache?
942 cachestart = offset - o
938 cachestart = offset - o
943 cacheend = cachestart + length
939 cacheend = cachestart + length
944 if cachestart >= 0 and cacheend <= l:
940 if cachestart >= 0 and cacheend <= l:
945 if cachestart == 0 and cacheend == l:
941 if cachestart == 0 and cacheend == l:
946 return d # avoid a copy
942 return d # avoid a copy
947 return d[cachestart:cacheend]
943 return d[cachestart:cacheend]
948
944
949 return self._loadchunk(offset, length, df)
945 return self._loadchunk(offset, length, df)
950
946
951 def _prime(self, startrev, endrev, df):
947 def _prime(self, startrev, endrev, df):
952 start = self.start(startrev)
948 start = self.start(startrev)
953 end = self.end(endrev)
949 end = self.end(endrev)
954 if self._inline:
950 if self._inline:
955 start += (startrev + 1) * self._io.size
951 start += (startrev + 1) * self._io.size
956 end += (startrev + 1) * self._io.size
952 end += (startrev + 1) * self._io.size
957 self._loadchunk(start, end - start, df)
953 self._loadchunk(start, end - start, df)
958
954
959 def chunk(self, rev, df=None):
955 def chunk(self, rev, df=None):
960 start, length = self.start(rev), self.length(rev)
956 start, length = self.start(rev), self.length(rev)
961 if self._inline:
957 if self._inline:
962 start += (rev + 1) * self._io.size
958 start += (rev + 1) * self._io.size
963 return decompress(self._getchunk(start, length, df))
959 return decompress(self._getchunk(start, length, df))
964
960
965 def revdiff(self, rev1, rev2):
961 def revdiff(self, rev1, rev2):
966 """return or calculate a delta between two revisions"""
962 """return or calculate a delta between two revisions"""
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
963 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 return self.chunk(rev2)
964 return self.chunk(rev2)
969
965
970 return mdiff.textdiff(self.revision(self.node(rev1)),
966 return mdiff.textdiff(self.revision(self.node(rev1)),
971 self.revision(self.node(rev2)))
967 self.revision(self.node(rev2)))
972
968
973 def revision(self, node):
969 def revision(self, node):
974 """return an uncompressed revision of a given node"""
970 """return an uncompressed revision of a given node"""
975 if node == nullid:
971 if node == nullid:
976 return ""
972 return ""
977 if self._cache and self._cache[0] == node:
973 if self._cache and self._cache[0] == node:
978 return str(self._cache[2])
974 return str(self._cache[2])
979
975
980 # look up what we need to read
976 # look up what we need to read
981 text = None
977 text = None
982 rev = self.rev(node)
978 rev = self.rev(node)
983 base = self.base(rev)
979 base = self.base(rev)
984
980
985 # check rev flags
981 # check rev flags
986 if self.index[rev][0] & 0xFFFF:
982 if self.index[rev][0] & 0xFFFF:
987 raise RevlogError(_('incompatible revision flag %x') %
983 raise RevlogError(_('incompatible revision flag %x') %
988 (self.index[rev][0] & 0xFFFF))
984 (self.index[rev][0] & 0xFFFF))
989
985
990 df = None
986 df = None
991
987
992 # do we have useful data cached?
988 # do we have useful data cached?
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 base = self._cache[1]
990 base = self._cache[1]
995 text = str(self._cache[2])
991 text = str(self._cache[2])
996 self._loadindex(base, rev + 1)
992 self._loadindex(base, rev + 1)
997 if not self._inline and rev > base + 1:
993 if not self._inline and rev > base + 1:
998 df = self.opener(self.datafile)
994 df = self.opener(self.datafile)
999 self._prime(base, rev, df)
995 self._prime(base, rev, df)
1000 else:
996 else:
1001 self._loadindex(base, rev + 1)
997 self._loadindex(base, rev + 1)
1002 if not self._inline and rev > base:
998 if not self._inline and rev > base:
1003 df = self.opener(self.datafile)
999 df = self.opener(self.datafile)
1004 self._prime(base, rev, df)
1000 self._prime(base, rev, df)
1005 text = self.chunk(base, df=df)
1001 text = self.chunk(base, df=df)
1006
1002
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1003 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 text = mdiff.patches(text, bins)
1004 text = mdiff.patches(text, bins)
1009 p1, p2 = self.parents(node)
1005 p1, p2 = self.parents(node)
1010 if node != hash(text, p1, p2):
1006 if node != hash(text, p1, p2):
1011 raise RevlogError(_("integrity check failed on %s:%d")
1007 raise RevlogError(_("integrity check failed on %s:%d")
1012 % (self.datafile, rev))
1008 % (self.datafile, rev))
1013
1009
1014 self._cache = (node, rev, text)
1010 self._cache = (node, rev, text)
1015 return text
1011 return text
1016
1012
1017 def checkinlinesize(self, tr, fp=None):
1013 def checkinlinesize(self, tr, fp=None):
1018 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1014 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 return
1015 return
1020
1016
1021 trinfo = tr.find(self.indexfile)
1017 trinfo = tr.find(self.indexfile)
1022 if trinfo is None:
1018 if trinfo is None:
1023 raise RevlogError(_("%s not found in the transaction")
1019 raise RevlogError(_("%s not found in the transaction")
1024 % self.indexfile)
1020 % self.indexfile)
1025
1021
1026 trindex = trinfo[2]
1022 trindex = trinfo[2]
1027 dataoff = self.start(trindex)
1023 dataoff = self.start(trindex)
1028
1024
1029 tr.add(self.datafile, dataoff)
1025 tr.add(self.datafile, dataoff)
1030
1026
1031 if fp:
1027 if fp:
1032 fp.flush()
1028 fp.flush()
1033 fp.close()
1029 fp.close()
1034
1030
1035 df = self.opener(self.datafile, 'w')
1031 df = self.opener(self.datafile, 'w')
1036 try:
1032 try:
1037 calc = self._io.size
1033 calc = self._io.size
1038 for r in self:
1034 for r in self:
1039 start = self.start(r) + (r + 1) * calc
1035 start = self.start(r) + (r + 1) * calc
1040 length = self.length(r)
1036 length = self.length(r)
1041 d = self._getchunk(start, length)
1037 d = self._getchunk(start, length)
1042 df.write(d)
1038 df.write(d)
1043 finally:
1039 finally:
1044 df.close()
1040 df.close()
1045
1041
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1042 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 self.version &= ~(REVLOGNGINLINEDATA)
1043 self.version &= ~(REVLOGNGINLINEDATA)
1048 self._inline = False
1044 self._inline = False
1049 for i in self:
1045 for i in self:
1050 e = self._io.packentry(self.index[i], self.node, self.version, i)
1046 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 fp.write(e)
1047 fp.write(e)
1052
1048
1053 # if we don't call rename, the temp file will never replace the
1049 # if we don't call rename, the temp file will never replace the
1054 # real index
1050 # real index
1055 fp.rename()
1051 fp.rename()
1056
1052
1057 tr.replace(self.indexfile, trindex * calc)
1053 tr.replace(self.indexfile, trindex * calc)
1058 self._chunkcache = (0, '')
1054 self._chunkcache = (0, '')
1059
1055
1060 def addrevision(self, text, transaction, link, p1, p2, d=None):
1056 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 """add a revision to the log
1057 """add a revision to the log
1062
1058
1063 text - the revision data to add
1059 text - the revision data to add
1064 transaction - the transaction object used for rollback
1060 transaction - the transaction object used for rollback
1065 link - the linkrev data to add
1061 link - the linkrev data to add
1066 p1, p2 - the parent nodeids of the revision
1062 p1, p2 - the parent nodeids of the revision
1067 d - an optional precomputed delta
1063 d - an optional precomputed delta
1068 """
1064 """
1069 dfh = None
1065 dfh = None
1070 if not self._inline:
1066 if not self._inline:
1071 dfh = self.opener(self.datafile, "a")
1067 dfh = self.opener(self.datafile, "a")
1072 ifh = self.opener(self.indexfile, "a+")
1068 ifh = self.opener(self.indexfile, "a+")
1073 try:
1069 try:
1074 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1070 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 finally:
1071 finally:
1076 if dfh:
1072 if dfh:
1077 dfh.close()
1073 dfh.close()
1078 ifh.close()
1074 ifh.close()
1079
1075
1080 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1076 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 node = hash(text, p1, p2)
1077 node = hash(text, p1, p2)
1082 if node in self.nodemap:
1078 if node in self.nodemap:
1083 return node
1079 return node
1084
1080
1085 curr = len(self)
1081 curr = len(self)
1086 prev = curr - 1
1082 prev = curr - 1
1087 base = self.base(prev)
1083 base = self.base(prev)
1088 offset = self.end(prev)
1084 offset = self.end(prev)
1089
1085
1090 if curr:
1086 if curr:
1091 if not d:
1087 if not d:
1092 ptext = self.revision(self.node(prev))
1088 ptext = self.revision(self.node(prev))
1093 d = mdiff.textdiff(ptext, text)
1089 d = mdiff.textdiff(ptext, text)
1094 data = compress(d)
1090 data = compress(d)
1095 l = len(data[1]) + len(data[0])
1091 l = len(data[1]) + len(data[0])
1096 dist = l + offset - self.start(base)
1092 dist = l + offset - self.start(base)
1097
1093
1098 # full versions are inserted when the needed deltas
1094 # full versions are inserted when the needed deltas
1099 # become comparable to the uncompressed text
1095 # become comparable to the uncompressed text
1100 if not curr or dist > len(text) * 2:
1096 if not curr or dist > len(text) * 2:
1101 data = compress(text)
1097 data = compress(text)
1102 l = len(data[1]) + len(data[0])
1098 l = len(data[1]) + len(data[0])
1103 base = curr
1099 base = curr
1104
1100
1105 e = (offset_type(offset, 0), l, len(text),
1101 e = (offset_type(offset, 0), l, len(text),
1106 base, link, self.rev(p1), self.rev(p2), node)
1102 base, link, self.rev(p1), self.rev(p2), node)
1107 self.index.insert(-1, e)
1103 self.index.insert(-1, e)
1108 self.nodemap[node] = curr
1104 self.nodemap[node] = curr
1109
1105
1110 entry = self._io.packentry(e, self.node, self.version, curr)
1106 entry = self._io.packentry(e, self.node, self.version, curr)
1111 if not self._inline:
1107 if not self._inline:
1112 transaction.add(self.datafile, offset)
1108 transaction.add(self.datafile, offset)
1113 transaction.add(self.indexfile, curr * len(entry))
1109 transaction.add(self.indexfile, curr * len(entry))
1114 if data[0]:
1110 if data[0]:
1115 dfh.write(data[0])
1111 dfh.write(data[0])
1116 dfh.write(data[1])
1112 dfh.write(data[1])
1117 dfh.flush()
1113 dfh.flush()
1118 ifh.write(entry)
1114 ifh.write(entry)
1119 else:
1115 else:
1120 offset += curr * self._io.size
1116 offset += curr * self._io.size
1121 transaction.add(self.indexfile, offset, curr)
1117 transaction.add(self.indexfile, offset, curr)
1122 ifh.write(entry)
1118 ifh.write(entry)
1123 ifh.write(data[0])
1119 ifh.write(data[0])
1124 ifh.write(data[1])
1120 ifh.write(data[1])
1125 self.checkinlinesize(transaction, ifh)
1121 self.checkinlinesize(transaction, ifh)
1126
1122
1127 self._cache = (node, curr, text)
1123 self._cache = (node, curr, text)
1128 return node
1124 return node
1129
1125
1130 def ancestor(self, a, b):
1126 def ancestor(self, a, b):
1131 """calculate the least common ancestor of nodes a and b"""
1127 """calculate the least common ancestor of nodes a and b"""
1132
1128
1133 def parents(rev):
1129 def parents(rev):
1134 return [p for p in self.parentrevs(rev) if p != nullrev]
1130 return [p for p in self.parentrevs(rev) if p != nullrev]
1135
1131
1136 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1132 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 if c is None:
1133 if c is None:
1138 return nullid
1134 return nullid
1139
1135
1140 return self.node(c)
1136 return self.node(c)
1141
1137
1142 def group(self, nodelist, lookup, infocollect=None):
1138 def group(self, nodelist, lookup, infocollect=None):
1143 """calculate a delta group
1139 """calculate a delta group
1144
1140
1145 Given a list of changeset revs, return a set of deltas and
1141 Given a list of changeset revs, return a set of deltas and
1146 metadata corresponding to nodes. the first delta is
1142 metadata corresponding to nodes. the first delta is
1147 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1143 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 have this parent as it has all history before these
1144 have this parent as it has all history before these
1149 changesets. parent is parent[0]
1145 changesets. parent is parent[0]
1150 """
1146 """
1151
1147
1152 # if we don't have any revisions touched by these changesets, bail
1148 # if we don't have any revisions touched by these changesets, bail
1153 if not nodelist:
1149 if not nodelist:
1154 yield changegroup.closechunk()
1150 yield changegroup.closechunk()
1155 return
1151 return
1156
1152
1157 revs = [self.rev(n) for n in nodelist]
1153 revs = [self.rev(n) for n in nodelist]
1158
1154
1159 # add the parent of the first rev
1155 # add the parent of the first rev
1160 p = self.parentrevs(revs[0])[0]
1156 p = self.parentrevs(revs[0])[0]
1161 revs.insert(0, p)
1157 revs.insert(0, p)
1162
1158
1163 # build deltas
1159 # build deltas
1164 for d in xrange(0, len(revs) - 1):
1160 for d in xrange(0, len(revs) - 1):
1165 a, b = revs[d], revs[d + 1]
1161 a, b = revs[d], revs[d + 1]
1166 nb = self.node(b)
1162 nb = self.node(b)
1167
1163
1168 if infocollect is not None:
1164 if infocollect is not None:
1169 infocollect(nb)
1165 infocollect(nb)
1170
1166
1171 p = self.parents(nb)
1167 p = self.parents(nb)
1172 meta = nb + p[0] + p[1] + lookup(nb)
1168 meta = nb + p[0] + p[1] + lookup(nb)
1173 if a == -1:
1169 if a == -1:
1174 d = self.revision(nb)
1170 d = self.revision(nb)
1175 meta += mdiff.trivialdiffheader(len(d))
1171 meta += mdiff.trivialdiffheader(len(d))
1176 else:
1172 else:
1177 d = self.revdiff(a, b)
1173 d = self.revdiff(a, b)
1178 yield changegroup.chunkheader(len(meta) + len(d))
1174 yield changegroup.chunkheader(len(meta) + len(d))
1179 yield meta
1175 yield meta
1180 if len(d) > 2**20:
1176 if len(d) > 2**20:
1181 pos = 0
1177 pos = 0
1182 while pos < len(d):
1178 while pos < len(d):
1183 pos2 = pos + 2 ** 18
1179 pos2 = pos + 2 ** 18
1184 yield d[pos:pos2]
1180 yield d[pos:pos2]
1185 pos = pos2
1181 pos = pos2
1186 else:
1182 else:
1187 yield d
1183 yield d
1188
1184
1189 yield changegroup.closechunk()
1185 yield changegroup.closechunk()
1190
1186
1191 def addgroup(self, revs, linkmapper, transaction):
1187 def addgroup(self, revs, linkmapper, transaction):
1192 """
1188 """
1193 add a delta group
1189 add a delta group
1194
1190
1195 given a set of deltas, add them to the revision log. the
1191 given a set of deltas, add them to the revision log. the
1196 first delta is against its parent, which should be in our
1192 first delta is against its parent, which should be in our
1197 log, the rest are against the previous delta.
1193 log, the rest are against the previous delta.
1198 """
1194 """
1199
1195
1200 #track the base of the current delta log
1196 #track the base of the current delta log
1201 r = len(self)
1197 r = len(self)
1202 t = r - 1
1198 t = r - 1
1203 node = None
1199 node = None
1204
1200
1205 base = prev = nullrev
1201 base = prev = nullrev
1206 start = end = textlen = 0
1202 start = end = textlen = 0
1207 if r:
1203 if r:
1208 end = self.end(t)
1204 end = self.end(t)
1209
1205
1210 ifh = self.opener(self.indexfile, "a+")
1206 ifh = self.opener(self.indexfile, "a+")
1211 isize = r * self._io.size
1207 isize = r * self._io.size
1212 if self._inline:
1208 if self._inline:
1213 transaction.add(self.indexfile, end + isize, r)
1209 transaction.add(self.indexfile, end + isize, r)
1214 dfh = None
1210 dfh = None
1215 else:
1211 else:
1216 transaction.add(self.indexfile, isize, r)
1212 transaction.add(self.indexfile, isize, r)
1217 transaction.add(self.datafile, end)
1213 transaction.add(self.datafile, end)
1218 dfh = self.opener(self.datafile, "a")
1214 dfh = self.opener(self.datafile, "a")
1219
1215
1220 try:
1216 try:
1221 # loop through our set of deltas
1217 # loop through our set of deltas
1222 chain = None
1218 chain = None
1223 for chunk in revs:
1219 for chunk in revs:
1224 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1220 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1225 link = linkmapper(cs)
1221 link = linkmapper(cs)
1226 if node in self.nodemap:
1222 if node in self.nodemap:
1227 # this can happen if two branches make the same change
1223 # this can happen if two branches make the same change
1228 chain = node
1224 chain = node
1229 continue
1225 continue
1230 delta = buffer(chunk, 80)
1226 delta = buffer(chunk, 80)
1231 del chunk
1227 del chunk
1232
1228
1233 for p in (p1, p2):
1229 for p in (p1, p2):
1234 if not p in self.nodemap:
1230 if not p in self.nodemap:
1235 raise LookupError(p, self.indexfile, _('unknown parent'))
1231 raise LookupError(p, self.indexfile, _('unknown parent'))
1236
1232
1237 if not chain:
1233 if not chain:
1238 # retrieve the parent revision of the delta chain
1234 # retrieve the parent revision of the delta chain
1239 chain = p1
1235 chain = p1
1240 if not chain in self.nodemap:
1236 if not chain in self.nodemap:
1241 raise LookupError(chain, self.indexfile, _('unknown base'))
1237 raise LookupError(chain, self.indexfile, _('unknown base'))
1242
1238
1243 # full versions are inserted when the needed deltas become
1239 # full versions are inserted when the needed deltas become
1244 # comparable to the uncompressed text or when the previous
1240 # comparable to the uncompressed text or when the previous
1245 # version is not the one we have a delta against. We use
1241 # version is not the one we have a delta against. We use
1246 # the size of the previous full rev as a proxy for the
1242 # the size of the previous full rev as a proxy for the
1247 # current size.
1243 # current size.
1248
1244
1249 if chain == prev:
1245 if chain == prev:
1250 cdelta = compress(delta)
1246 cdelta = compress(delta)
1251 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1247 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1252 textlen = mdiff.patchedsize(textlen, delta)
1248 textlen = mdiff.patchedsize(textlen, delta)
1253
1249
1254 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1250 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1255 # flush our writes here so we can read it in revision
1251 # flush our writes here so we can read it in revision
1256 if dfh:
1252 if dfh:
1257 dfh.flush()
1253 dfh.flush()
1258 ifh.flush()
1254 ifh.flush()
1259 text = self.revision(chain)
1255 text = self.revision(chain)
1260 if len(text) == 0:
1256 if len(text) == 0:
1261 # skip over trivial delta header
1257 # skip over trivial delta header
1262 text = buffer(delta, 12)
1258 text = buffer(delta, 12)
1263 else:
1259 else:
1264 text = mdiff.patches(text, [delta])
1260 text = mdiff.patches(text, [delta])
1265 del delta
1261 del delta
1266 chk = self._addrevision(text, transaction, link, p1, p2, None,
1262 chk = self._addrevision(text, transaction, link, p1, p2, None,
1267 ifh, dfh)
1263 ifh, dfh)
1268 if not dfh and not self._inline:
1264 if not dfh and not self._inline:
1269 # addrevision switched from inline to conventional
1265 # addrevision switched from inline to conventional
1270 # reopen the index
1266 # reopen the index
1271 dfh = self.opener(self.datafile, "a")
1267 dfh = self.opener(self.datafile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1268 ifh = self.opener(self.indexfile, "a")
1273 if chk != node:
1269 if chk != node:
1274 raise RevlogError(_("consistency error adding group"))
1270 raise RevlogError(_("consistency error adding group"))
1275 textlen = len(text)
1271 textlen = len(text)
1276 else:
1272 else:
1277 e = (offset_type(end, 0), cdeltalen, textlen, base,
1273 e = (offset_type(end, 0), cdeltalen, textlen, base,
1278 link, self.rev(p1), self.rev(p2), node)
1274 link, self.rev(p1), self.rev(p2), node)
1279 self.index.insert(-1, e)
1275 self.index.insert(-1, e)
1280 self.nodemap[node] = r
1276 self.nodemap[node] = r
1281 entry = self._io.packentry(e, self.node, self.version, r)
1277 entry = self._io.packentry(e, self.node, self.version, r)
1282 if self._inline:
1278 if self._inline:
1283 ifh.write(entry)
1279 ifh.write(entry)
1284 ifh.write(cdelta[0])
1280 ifh.write(cdelta[0])
1285 ifh.write(cdelta[1])
1281 ifh.write(cdelta[1])
1286 self.checkinlinesize(transaction, ifh)
1282 self.checkinlinesize(transaction, ifh)
1287 if not self._inline:
1283 if not self._inline:
1288 dfh = self.opener(self.datafile, "a")
1284 dfh = self.opener(self.datafile, "a")
1289 ifh = self.opener(self.indexfile, "a")
1285 ifh = self.opener(self.indexfile, "a")
1290 else:
1286 else:
1291 dfh.write(cdelta[0])
1287 dfh.write(cdelta[0])
1292 dfh.write(cdelta[1])
1288 dfh.write(cdelta[1])
1293 ifh.write(entry)
1289 ifh.write(entry)
1294
1290
1295 t, r, chain, prev = r, r + 1, node, node
1291 t, r, chain, prev = r, r + 1, node, node
1296 base = self.base(t)
1292 base = self.base(t)
1297 start = self.start(base)
1293 start = self.start(base)
1298 end = self.end(t)
1294 end = self.end(t)
1299 finally:
1295 finally:
1300 if dfh:
1296 if dfh:
1301 dfh.close()
1297 dfh.close()
1302 ifh.close()
1298 ifh.close()
1303
1299
1304 return node
1300 return node
1305
1301
1306 def strip(self, minlink, transaction):
1302 def strip(self, minlink, transaction):
1307 """truncate the revlog on the first revision with a linkrev >= minlink
1303 """truncate the revlog on the first revision with a linkrev >= minlink
1308
1304
1309 This function is called when we're stripping revision minlink and
1305 This function is called when we're stripping revision minlink and
1310 its descendants from the repository.
1306 its descendants from the repository.
1311
1307
1312 We have to remove all revisions with linkrev >= minlink, because
1308 We have to remove all revisions with linkrev >= minlink, because
1313 the equivalent changelog revisions will be renumbered after the
1309 the equivalent changelog revisions will be renumbered after the
1314 strip.
1310 strip.
1315
1311
1316 So we truncate the revlog on the first of these revisions, and
1312 So we truncate the revlog on the first of these revisions, and
1317 trust that the caller has saved the revisions that shouldn't be
1313 trust that the caller has saved the revisions that shouldn't be
1318 removed and that it'll readd them after this truncation.
1314 removed and that it'll readd them after this truncation.
1319 """
1315 """
1320 if len(self) == 0:
1316 if len(self) == 0:
1321 return
1317 return
1322
1318
1323 if isinstance(self.index, lazyindex):
1319 if isinstance(self.index, lazyindex):
1324 self._loadindexmap()
1320 self._loadindexmap()
1325
1321
1326 for rev in self:
1322 for rev in self:
1327 if self.index[rev][4] >= minlink:
1323 if self.index[rev][4] >= minlink:
1328 break
1324 break
1329 else:
1325 else:
1330 return
1326 return
1331
1327
1332 # first truncate the files on disk
1328 # first truncate the files on disk
1333 end = self.start(rev)
1329 end = self.start(rev)
1334 if not self._inline:
1330 if not self._inline:
1335 transaction.add(self.datafile, end)
1331 transaction.add(self.datafile, end)
1336 end = rev * self._io.size
1332 end = rev * self._io.size
1337 else:
1333 else:
1338 end += rev * self._io.size
1334 end += rev * self._io.size
1339
1335
1340 transaction.add(self.indexfile, end)
1336 transaction.add(self.indexfile, end)
1341
1337
1342 # then reset internal state in memory to forget those revisions
1338 # then reset internal state in memory to forget those revisions
1343 self._cache = None
1339 self._cache = None
1344 self._chunkcache = (0, '')
1340 self._chunkcache = (0, '')
1345 for x in xrange(rev, len(self)):
1341 for x in xrange(rev, len(self)):
1346 del self.nodemap[self.node(x)]
1342 del self.nodemap[self.node(x)]
1347
1343
1348 del self.index[rev:-1]
1344 del self.index[rev:-1]
1349
1345
1350 def checksize(self):
1346 def checksize(self):
1351 expected = 0
1347 expected = 0
1352 if len(self):
1348 if len(self):
1353 expected = max(0, self.end(len(self) - 1))
1349 expected = max(0, self.end(len(self) - 1))
1354
1350
1355 try:
1351 try:
1356 f = self.opener(self.datafile)
1352 f = self.opener(self.datafile)
1357 f.seek(0, 2)
1353 f.seek(0, 2)
1358 actual = f.tell()
1354 actual = f.tell()
1359 dd = actual - expected
1355 dd = actual - expected
1360 except IOError, inst:
1356 except IOError, inst:
1361 if inst.errno != errno.ENOENT:
1357 if inst.errno != errno.ENOENT:
1362 raise
1358 raise
1363 dd = 0
1359 dd = 0
1364
1360
1365 try:
1361 try:
1366 f = self.opener(self.indexfile)
1362 f = self.opener(self.indexfile)
1367 f.seek(0, 2)
1363 f.seek(0, 2)
1368 actual = f.tell()
1364 actual = f.tell()
1369 s = self._io.size
1365 s = self._io.size
1370 i = max(0, actual / s)
1366 i = max(0, actual / s)
1371 di = actual - (i * s)
1367 di = actual - (i * s)
1372 if self._inline:
1368 if self._inline:
1373 databytes = 0
1369 databytes = 0
1374 for r in self:
1370 for r in self:
1375 databytes += max(0, self.length(r))
1371 databytes += max(0, self.length(r))
1376 dd = 0
1372 dd = 0
1377 di = actual - len(self) * s - databytes
1373 di = actual - len(self) * s - databytes
1378 except IOError, inst:
1374 except IOError, inst:
1379 if inst.errno != errno.ENOENT:
1375 if inst.errno != errno.ENOENT:
1380 raise
1376 raise
1381 di = 0
1377 di = 0
1382
1378
1383 return (dd, di)
1379 return (dd, di)
1384
1380
1385 def files(self):
1381 def files(self):
1386 res = [ self.indexfile ]
1382 res = [ self.indexfile ]
1387 if not self._inline:
1383 if not self._inline:
1388 res.append(self.datafile)
1384 res.append(self.datafile)
1389 return res
1385 return res
General Comments 0
You need to be logged in to leave comments. Login now