##// END OF EJS Templates
revlog: clean up the chunk caching code
Matt Mackall -
r8316:d593922c default
parent child Browse files
Show More
@@ -1,1372 +1,1380 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
123 self.dataf = dataf
123 self.dataf = dataf
124 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
125 self.datasize = size
125 self.datasize = size
126 self.l = size/self.s
126 self.l = size/self.s
127 self.index = [None] * self.l
127 self.index = [None] * self.l
128 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
129 self.allmap = 0
129 self.allmap = 0
130 self.all = 0
130 self.all = 0
131 self.mapfind_count = 0
131 self.mapfind_count = 0
132
132
133 def loadmap(self):
133 def loadmap(self):
134 """
134 """
135 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
136 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
137 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
138 which takes much less time.
138 which takes much less time.
139 """
139 """
140 if self.allmap:
140 if self.allmap:
141 return
141 return
142 end = self.datasize
142 end = self.datasize
143 self.allmap = 1
143 self.allmap = 1
144 cur = 0
144 cur = 0
145 count = 0
145 count = 0
146 blocksize = self.s * 256
146 blocksize = self.s * 256
147 self.dataf.seek(0)
147 self.dataf.seek(0)
148 while cur < end:
148 while cur < end:
149 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
150 off = 0
150 off = 0
151 for x in xrange(256):
151 for x in xrange(256):
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 self.map[n] = count
153 self.map[n] = count
154 count += 1
154 count += 1
155 if count >= self.l:
155 if count >= self.l:
156 break
156 break
157 off += self.s
157 off += self.s
158 cur += blocksize
158 cur += blocksize
159
159
160 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
161 if self.all:
161 if self.all:
162 return
162 return
163 if data is None:
163 if data is None:
164 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
165 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
166 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
167 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
168 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
169 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
170 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
171 lend = len(data) / self.s
171 lend = len(data) / self.s
172 i = blockstart / self.s
172 i = blockstart / self.s
173 off = 0
173 off = 0
174 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
175 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
176 lend = len(self.index) - i
176 lend = len(self.index) - i
177 for x in xrange(lend):
177 for x in xrange(lend):
178 if self.index[i + x] == None:
178 if self.index[i + x] == None:
179 b = data[off : off + self.s]
179 b = data[off : off + self.s]
180 self.index[i + x] = b
180 self.index[i + x] = b
181 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
182 self.map[n] = i + x
182 self.map[n] = i + x
183 off += self.s
183 off += self.s
184
184
185 def findnode(self, node):
185 def findnode(self, node):
186 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
187 if self.allmap:
187 if self.allmap:
188 return None
188 return None
189
189
190 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
191 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
192 # thing.
192 # thing.
193 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
194 self.loadmap()
194 self.loadmap()
195 if node in self.map:
195 if node in self.map:
196 return node
196 return node
197 return None
197 return None
198 self.mapfind_count += 1
198 self.mapfind_count += 1
199 last = self.l - 1
199 last = self.l - 1
200 while self.index[last] != None:
200 while self.index[last] != None:
201 if last == 0:
201 if last == 0:
202 self.all = 1
202 self.all = 1
203 self.allmap = 1
203 self.allmap = 1
204 return None
204 return None
205 last -= 1
205 last -= 1
206 end = (last + 1) * self.s
206 end = (last + 1) * self.s
207 blocksize = self.s * 256
207 blocksize = self.s * 256
208 while end >= 0:
208 while end >= 0:
209 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
210 self.dataf.seek(start)
210 self.dataf.seek(start)
211 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
212 findend = end - start
212 findend = end - start
213 while True:
213 while True:
214 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
215 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
216 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
217 findend = off
217 findend = off
218 if off >= 0:
218 if off >= 0:
219 i = off / self.s
219 i = off / self.s
220 off = i * self.s
220 off = i * self.s
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 if n == node:
222 if n == node:
223 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
224 return node
224 return node
225 else:
225 else:
226 break
226 break
227 end -= blocksize
227 end -= blocksize
228 return None
228 return None
229
229
230 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
231 if self.all:
231 if self.all:
232 return
232 return
233 all = False
233 all = False
234 if i == None:
234 if i == None:
235 blockstart = 0
235 blockstart = 0
236 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
237 end = self.datasize
237 end = self.datasize
238 all = True
238 all = True
239 else:
239 else:
240 if end:
240 if end:
241 blockstart = i * self.s
241 blockstart = i * self.s
242 end = end * self.s
242 end = end * self.s
243 blocksize = end - blockstart
243 blocksize = end - blockstart
244 else:
244 else:
245 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
246 blocksize = self.s * 1024
246 blocksize = self.s * 1024
247 end = blockstart + blocksize
247 end = blockstart + blocksize
248 while blockstart < end:
248 while blockstart < end:
249 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
250 blockstart += blocksize
250 blockstart += blocksize
251 if all:
251 if all:
252 self.all = True
252 self.all = True
253
253
254 class lazyindex(object):
254 class lazyindex(object):
255 """a lazy version of the index array"""
255 """a lazy version of the index array"""
256 def __init__(self, parser):
256 def __init__(self, parser):
257 self.p = parser
257 self.p = parser
258 def __len__(self):
258 def __len__(self):
259 return len(self.p.index)
259 return len(self.p.index)
260 def load(self, pos):
260 def load(self, pos):
261 if pos < 0:
261 if pos < 0:
262 pos += len(self.p.index)
262 pos += len(self.p.index)
263 self.p.loadindex(pos)
263 self.p.loadindex(pos)
264 return self.p.index[pos]
264 return self.p.index[pos]
265 def __getitem__(self, pos):
265 def __getitem__(self, pos):
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
268 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
269 def __delitem__(self, pos):
269 def __delitem__(self, pos):
270 del self.p.index[pos]
270 del self.p.index[pos]
271 def insert(self, pos, e):
271 def insert(self, pos, e):
272 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 def append(self, e):
273 def append(self, e):
274 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
275
275
276 class lazymap(object):
276 class lazymap(object):
277 """a lazy version of the node map"""
277 """a lazy version of the node map"""
278 def __init__(self, parser):
278 def __init__(self, parser):
279 self.p = parser
279 self.p = parser
280 def load(self, key):
280 def load(self, key):
281 n = self.p.findnode(key)
281 n = self.p.findnode(key)
282 if n == None:
282 if n == None:
283 raise KeyError(key)
283 raise KeyError(key)
284 def __contains__(self, key):
284 def __contains__(self, key):
285 if key in self.p.map:
285 if key in self.p.map:
286 return True
286 return True
287 self.p.loadmap()
287 self.p.loadmap()
288 return key in self.p.map
288 return key in self.p.map
289 def __iter__(self):
289 def __iter__(self):
290 yield nullid
290 yield nullid
291 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if not ret:
293 if not ret:
294 self.p.loadindex(i)
294 self.p.loadindex(i)
295 ret = self.p.index[i]
295 ret = self.p.index[i]
296 if isinstance(ret, str):
296 if isinstance(ret, str):
297 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
298 yield ret[7]
298 yield ret[7]
299 def __getitem__(self, key):
299 def __getitem__(self, key):
300 try:
300 try:
301 return self.p.map[key]
301 return self.p.map[key]
302 except KeyError:
302 except KeyError:
303 try:
303 try:
304 self.load(key)
304 self.load(key)
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
308 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
309 self.p.map[key] = val
309 self.p.map[key] = val
310 def __delitem__(self, key):
310 def __delitem__(self, key):
311 del self.p.map[key]
311 del self.p.map[key]
312
312
313 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
314 v0shaoffset = 56
314 v0shaoffset = 56
315
315
316 class revlogoldio(object):
316 class revlogoldio(object):
317 def __init__(self):
317 def __init__(self):
318 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
319
319
320 def parseindex(self, fp, data, inline):
320 def parseindex(self, fp, data, inline):
321 s = self.size
321 s = self.size
322 index = []
322 index = []
323 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
324 n = off = 0
324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) < _prereadsize:
326 data += fp.read() # read the rest
326 data += fp.read() # read the rest
327 l = len(data)
327 l = len(data)
328 while off + s <= l:
328 while off + s <= l:
329 cur = data[off:off + s]
329 cur = data[off:off + s]
330 off += s
330 off += s
331 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
332 # transform to revlogv1 format
332 # transform to revlogv1 format
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 index.append(e2)
335 index.append(e2)
336 nodemap[e[6]] = n
336 nodemap[e[6]] = n
337 n += 1
337 n += 1
338
338
339 return index, nodemap, None
339 return index, nodemap, None
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
344 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
345
345
346 # index ng:
346 # index ng:
347 # 6 bytes offset
347 # 6 bytes offset
348 # 2 bytes flags
348 # 2 bytes flags
349 # 4 bytes compressed length
349 # 4 bytes compressed length
350 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
351 # 4 bytes: base rev
351 # 4 bytes: base rev
352 # 4 bytes link rev
352 # 4 bytes link rev
353 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
354 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
355 # 32 bytes: nodeid
355 # 32 bytes: nodeid
356 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
357 ngshaoffset = 32
357 ngshaoffset = 32
358 versionformat = ">I"
358 versionformat = ">I"
359
359
360 class revlogio(object):
360 class revlogio(object):
361 def __init__(self):
361 def __init__(self):
362 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
363
363
364 def parseindex(self, fp, data, inline):
364 def parseindex(self, fp, data, inline):
365 try:
365 try:
366 size = len(data)
366 size = len(data)
367 if size == _prereadsize:
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
368 size = util.fstat(fp).st_size
369 except AttributeError:
369 except AttributeError:
370 size = 0
370 size = 0
371
371
372 if util.openhardlinks() and not inline and size > _prereadsize:
372 if util.openhardlinks() and not inline and size > _prereadsize:
373 # big index, let's parse it on demand
373 # big index, let's parse it on demand
374 parser = lazyparser(fp, size)
374 parser = lazyparser(fp, size)
375 index = lazyindex(parser)
375 index = lazyindex(parser)
376 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381 return index, nodemap, None
381 return index, nodemap, None
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = None
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
472
474
473 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
474 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
475 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477
479
478 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
479 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
480 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
481 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
482
484
483 def _loadindexmap(self):
485 def _loadindexmap(self):
484 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
485 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
486 p = self.index.p
488 p = self.index.p
487 p.loadindex()
489 p.loadindex()
488 self.nodemap = p.map
490 self.nodemap = p.map
489
491
490 def _loadmap(self):
492 def _loadmap(self):
491 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
492 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
493 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
494 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
495
497
496 def tip(self):
498 def tip(self):
497 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
498 def __len__(self):
500 def __len__(self):
499 return len(self.index) - 1
501 return len(self.index) - 1
500 def __iter__(self):
502 def __iter__(self):
501 for i in xrange(len(self)):
503 for i in xrange(len(self)):
502 yield i
504 yield i
503 def rev(self, node):
505 def rev(self, node):
504 try:
506 try:
505 return self.nodemap[node]
507 return self.nodemap[node]
506 except KeyError:
508 except KeyError:
507 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
508 def node(self, rev):
510 def node(self, rev):
509 return self.index[rev][7]
511 return self.index[rev][7]
510 def linkrev(self, rev):
512 def linkrev(self, rev):
511 return self.index[rev][4]
513 return self.index[rev][4]
512 def parents(self, node):
514 def parents(self, node):
513 i = self.index
515 i = self.index
514 d = i[self.rev(node)]
516 d = i[self.rev(node)]
515 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
516 def parentrevs(self, rev):
518 def parentrevs(self, rev):
517 return self.index[rev][5:7]
519 return self.index[rev][5:7]
518 def start(self, rev):
520 def start(self, rev):
519 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
520 def end(self, rev):
522 def end(self, rev):
521 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
522 def length(self, rev):
524 def length(self, rev):
523 return self.index[rev][1]
525 return self.index[rev][1]
524 def base(self, rev):
526 def base(self, rev):
525 return self.index[rev][3]
527 return self.index[rev][3]
526
528
527 def size(self, rev):
529 def size(self, rev):
528 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
529 l = self.index[rev][2]
531 l = self.index[rev][2]
530 if l >= 0:
532 if l >= 0:
531 return l
533 return l
532
534
533 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
534 return len(t)
536 return len(t)
535
537
536 # alternate implementation, The advantage to this code is it
538 # alternate implementation, The advantage to this code is it
537 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. But, the results are not
538 # cached, so finding the size of every revision will be slower.
540 # cached, so finding the size of every revision will be slower.
539 """
541 """
540 if self.cache and self.cache[1] == rev:
542 if self.cache and self.cache[1] == rev:
541 return len(self.cache[2])
543 return len(self.cache[2])
542
544
543 base = self.base(rev)
545 base = self.base(rev)
544 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
545 base = self.cache[1]
547 base = self.cache[1]
546 text = self.cache[2]
548 text = self.cache[2]
547 else:
549 else:
548 text = self.revision(self.node(base))
550 text = self.revision(self.node(base))
549
551
550 l = len(text)
552 l = len(text)
551 for x in xrange(base + 1, rev + 1):
553 for x in xrange(base + 1, rev + 1):
552 l = mdiff.patchedsize(l, self.chunk(x))
554 l = mdiff.patchedsize(l, self.chunk(x))
553 return l
555 return l
554 """
556 """
555
557
556 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
557 """return a hash of all nodes ancestral to a given node, including
559 """return a hash of all nodes ancestral to a given node, including
558 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
559 reachable = {}
561 reachable = {}
560 visit = [node]
562 visit = [node]
561 reachable[node] = 1
563 reachable[node] = 1
562 if stop:
564 if stop:
563 stopn = self.rev(stop)
565 stopn = self.rev(stop)
564 else:
566 else:
565 stopn = 0
567 stopn = 0
566 while visit:
568 while visit:
567 n = visit.pop(0)
569 n = visit.pop(0)
568 if n == stop:
570 if n == stop:
569 continue
571 continue
570 if n == nullid:
572 if n == nullid:
571 continue
573 continue
572 for p in self.parents(n):
574 for p in self.parents(n):
573 if self.rev(p) < stopn:
575 if self.rev(p) < stopn:
574 continue
576 continue
575 if p not in reachable:
577 if p not in reachable:
576 reachable[p] = 1
578 reachable[p] = 1
577 visit.append(p)
579 visit.append(p)
578 return reachable
580 return reachable
579
581
580 def ancestors(self, *revs):
582 def ancestors(self, *revs):
581 'Generate the ancestors of revs using a breadth-first visit'
583 'Generate the ancestors of revs using a breadth-first visit'
582 visit = list(revs)
584 visit = list(revs)
583 seen = set([nullrev])
585 seen = set([nullrev])
584 while visit:
586 while visit:
585 for parent in self.parentrevs(visit.pop(0)):
587 for parent in self.parentrevs(visit.pop(0)):
586 if parent not in seen:
588 if parent not in seen:
587 visit.append(parent)
589 visit.append(parent)
588 seen.add(parent)
590 seen.add(parent)
589 yield parent
591 yield parent
590
592
591 def descendants(self, *revs):
593 def descendants(self, *revs):
592 'Generate the descendants of revs in topological order'
594 'Generate the descendants of revs in topological order'
593 seen = set(revs)
595 seen = set(revs)
594 for i in xrange(min(revs) + 1, len(self)):
596 for i in xrange(min(revs) + 1, len(self)):
595 for x in self.parentrevs(i):
597 for x in self.parentrevs(i):
596 if x != nullrev and x in seen:
598 if x != nullrev and x in seen:
597 seen.add(i)
599 seen.add(i)
598 yield i
600 yield i
599 break
601 break
600
602
601 def findmissing(self, common=None, heads=None):
603 def findmissing(self, common=None, heads=None):
602 '''
604 '''
603 returns the topologically sorted list of nodes from the set:
605 returns the topologically sorted list of nodes from the set:
604 missing = (ancestors(heads) \ ancestors(common))
606 missing = (ancestors(heads) \ ancestors(common))
605
607
606 where ancestors() is the set of ancestors from heads, heads included
608 where ancestors() is the set of ancestors from heads, heads included
607
609
608 if heads is None, the heads of the revlog are used
610 if heads is None, the heads of the revlog are used
609 if common is None, nullid is assumed to be a common node
611 if common is None, nullid is assumed to be a common node
610 '''
612 '''
611 if common is None:
613 if common is None:
612 common = [nullid]
614 common = [nullid]
613 if heads is None:
615 if heads is None:
614 heads = self.heads()
616 heads = self.heads()
615
617
616 common = [self.rev(n) for n in common]
618 common = [self.rev(n) for n in common]
617 heads = [self.rev(n) for n in heads]
619 heads = [self.rev(n) for n in heads]
618
620
619 # we want the ancestors, but inclusive
621 # we want the ancestors, but inclusive
620 has = set(self.ancestors(*common))
622 has = set(self.ancestors(*common))
621 has.add(nullrev)
623 has.add(nullrev)
622 has.update(common)
624 has.update(common)
623
625
624 # take all ancestors from heads that aren't in has
626 # take all ancestors from heads that aren't in has
625 missing = {}
627 missing = {}
626 visit = [r for r in heads if r not in has]
628 visit = [r for r in heads if r not in has]
627 while visit:
629 while visit:
628 r = visit.pop(0)
630 r = visit.pop(0)
629 if r in missing:
631 if r in missing:
630 continue
632 continue
631 else:
633 else:
632 missing[r] = None
634 missing[r] = None
633 for p in self.parentrevs(r):
635 for p in self.parentrevs(r):
634 if p not in has:
636 if p not in has:
635 visit.append(p)
637 visit.append(p)
636 missing = missing.keys()
638 missing = missing.keys()
637 missing.sort()
639 missing.sort()
638 return [self.node(r) for r in missing]
640 return [self.node(r) for r in missing]
639
641
640 def nodesbetween(self, roots=None, heads=None):
642 def nodesbetween(self, roots=None, heads=None):
641 """Return a tuple containing three elements. Elements 1 and 2 contain
643 """Return a tuple containing three elements. Elements 1 and 2 contain
642 a final list bases and heads after all the unreachable ones have been
644 a final list bases and heads after all the unreachable ones have been
643 pruned. Element 0 contains a topologically sorted list of all
645 pruned. Element 0 contains a topologically sorted list of all
644
646
645 nodes that satisfy these constraints:
647 nodes that satisfy these constraints:
646 1. All nodes must be descended from a node in roots (the nodes on
648 1. All nodes must be descended from a node in roots (the nodes on
647 roots are considered descended from themselves).
649 roots are considered descended from themselves).
648 2. All nodes must also be ancestors of a node in heads (the nodes in
650 2. All nodes must also be ancestors of a node in heads (the nodes in
649 heads are considered to be their own ancestors).
651 heads are considered to be their own ancestors).
650
652
651 If roots is unspecified, nullid is assumed as the only root.
653 If roots is unspecified, nullid is assumed as the only root.
652 If heads is unspecified, it is taken to be the output of the
654 If heads is unspecified, it is taken to be the output of the
653 heads method (i.e. a list of all nodes in the repository that
655 heads method (i.e. a list of all nodes in the repository that
654 have no children)."""
656 have no children)."""
655 nonodes = ([], [], [])
657 nonodes = ([], [], [])
656 if roots is not None:
658 if roots is not None:
657 roots = list(roots)
659 roots = list(roots)
658 if not roots:
660 if not roots:
659 return nonodes
661 return nonodes
660 lowestrev = min([self.rev(n) for n in roots])
662 lowestrev = min([self.rev(n) for n in roots])
661 else:
663 else:
662 roots = [nullid] # Everybody's a descendent of nullid
664 roots = [nullid] # Everybody's a descendent of nullid
663 lowestrev = nullrev
665 lowestrev = nullrev
664 if (lowestrev == nullrev) and (heads is None):
666 if (lowestrev == nullrev) and (heads is None):
665 # We want _all_ the nodes!
667 # We want _all_ the nodes!
666 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 return ([self.node(r) for r in self], [nullid], list(self.heads()))
667 if heads is None:
669 if heads is None:
668 # All nodes are ancestors, so the latest ancestor is the last
670 # All nodes are ancestors, so the latest ancestor is the last
669 # node.
671 # node.
670 highestrev = len(self) - 1
672 highestrev = len(self) - 1
671 # Set ancestors to None to signal that every node is an ancestor.
673 # Set ancestors to None to signal that every node is an ancestor.
672 ancestors = None
674 ancestors = None
673 # Set heads to an empty dictionary for later discovery of heads
675 # Set heads to an empty dictionary for later discovery of heads
674 heads = {}
676 heads = {}
675 else:
677 else:
676 heads = list(heads)
678 heads = list(heads)
677 if not heads:
679 if not heads:
678 return nonodes
680 return nonodes
679 ancestors = {}
681 ancestors = {}
680 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Turn heads into a dictionary so we can remove 'fake' heads.
681 # Also, later we will be using it to filter out the heads we can't
683 # Also, later we will be using it to filter out the heads we can't
682 # find from roots.
684 # find from roots.
683 heads = dict.fromkeys(heads, 0)
685 heads = dict.fromkeys(heads, 0)
684 # Start at the top and keep marking parents until we're done.
686 # Start at the top and keep marking parents until we're done.
685 nodestotag = set(heads)
687 nodestotag = set(heads)
686 # Remember where the top was so we can use it as a limit later.
688 # Remember where the top was so we can use it as a limit later.
687 highestrev = max([self.rev(n) for n in nodestotag])
689 highestrev = max([self.rev(n) for n in nodestotag])
688 while nodestotag:
690 while nodestotag:
689 # grab a node to tag
691 # grab a node to tag
690 n = nodestotag.pop()
692 n = nodestotag.pop()
691 # Never tag nullid
693 # Never tag nullid
692 if n == nullid:
694 if n == nullid:
693 continue
695 continue
694 # A node's revision number represents its place in a
696 # A node's revision number represents its place in a
695 # topologically sorted list of nodes.
697 # topologically sorted list of nodes.
696 r = self.rev(n)
698 r = self.rev(n)
697 if r >= lowestrev:
699 if r >= lowestrev:
698 if n not in ancestors:
700 if n not in ancestors:
699 # If we are possibly a descendent of one of the roots
701 # If we are possibly a descendent of one of the roots
700 # and we haven't already been marked as an ancestor
702 # and we haven't already been marked as an ancestor
701 ancestors[n] = 1 # Mark as ancestor
703 ancestors[n] = 1 # Mark as ancestor
702 # Add non-nullid parents to list of nodes to tag.
704 # Add non-nullid parents to list of nodes to tag.
703 nodestotag.update([p for p in self.parents(n) if
705 nodestotag.update([p for p in self.parents(n) if
704 p != nullid])
706 p != nullid])
705 elif n in heads: # We've seen it before, is it a fake head?
707 elif n in heads: # We've seen it before, is it a fake head?
706 # So it is, real heads should not be the ancestors of
708 # So it is, real heads should not be the ancestors of
707 # any other heads.
709 # any other heads.
708 heads.pop(n)
710 heads.pop(n)
709 if not ancestors:
711 if not ancestors:
710 return nonodes
712 return nonodes
711 # Now that we have our set of ancestors, we want to remove any
713 # Now that we have our set of ancestors, we want to remove any
712 # roots that are not ancestors.
714 # roots that are not ancestors.
713
715
714 # If one of the roots was nullid, everything is included anyway.
716 # If one of the roots was nullid, everything is included anyway.
715 if lowestrev > nullrev:
717 if lowestrev > nullrev:
716 # But, since we weren't, let's recompute the lowest rev to not
718 # But, since we weren't, let's recompute the lowest rev to not
717 # include roots that aren't ancestors.
719 # include roots that aren't ancestors.
718
720
719 # Filter out roots that aren't ancestors of heads
721 # Filter out roots that aren't ancestors of heads
720 roots = [n for n in roots if n in ancestors]
722 roots = [n for n in roots if n in ancestors]
721 # Recompute the lowest revision
723 # Recompute the lowest revision
722 if roots:
724 if roots:
723 lowestrev = min([self.rev(n) for n in roots])
725 lowestrev = min([self.rev(n) for n in roots])
724 else:
726 else:
725 # No more roots? Return empty list
727 # No more roots? Return empty list
726 return nonodes
728 return nonodes
727 else:
729 else:
728 # We are descending from nullid, and don't need to care about
730 # We are descending from nullid, and don't need to care about
729 # any other roots.
731 # any other roots.
730 lowestrev = nullrev
732 lowestrev = nullrev
731 roots = [nullid]
733 roots = [nullid]
732 # Transform our roots list into a set.
734 # Transform our roots list into a set.
733 descendents = set(roots)
735 descendents = set(roots)
734 # Also, keep the original roots so we can filter out roots that aren't
736 # Also, keep the original roots so we can filter out roots that aren't
735 # 'real' roots (i.e. are descended from other roots).
737 # 'real' roots (i.e. are descended from other roots).
736 roots = descendents.copy()
738 roots = descendents.copy()
737 # Our topologically sorted list of output nodes.
739 # Our topologically sorted list of output nodes.
738 orderedout = []
740 orderedout = []
739 # Don't start at nullid since we don't want nullid in our output list,
741 # Don't start at nullid since we don't want nullid in our output list,
740 # and if nullid shows up in descedents, empty parents will look like
742 # and if nullid shows up in descedents, empty parents will look like
741 # they're descendents.
743 # they're descendents.
742 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 for r in xrange(max(lowestrev, 0), highestrev + 1):
743 n = self.node(r)
745 n = self.node(r)
744 isdescendent = False
746 isdescendent = False
745 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 if lowestrev == nullrev: # Everybody is a descendent of nullid
746 isdescendent = True
748 isdescendent = True
747 elif n in descendents:
749 elif n in descendents:
748 # n is already a descendent
750 # n is already a descendent
749 isdescendent = True
751 isdescendent = True
750 # This check only needs to be done here because all the roots
752 # This check only needs to be done here because all the roots
751 # will start being marked is descendents before the loop.
753 # will start being marked is descendents before the loop.
752 if n in roots:
754 if n in roots:
753 # If n was a root, check if it's a 'real' root.
755 # If n was a root, check if it's a 'real' root.
754 p = tuple(self.parents(n))
756 p = tuple(self.parents(n))
755 # If any of its parents are descendents, it's not a root.
757 # If any of its parents are descendents, it's not a root.
756 if (p[0] in descendents) or (p[1] in descendents):
758 if (p[0] in descendents) or (p[1] in descendents):
757 roots.remove(n)
759 roots.remove(n)
758 else:
760 else:
759 p = tuple(self.parents(n))
761 p = tuple(self.parents(n))
760 # A node is a descendent if either of its parents are
762 # A node is a descendent if either of its parents are
761 # descendents. (We seeded the dependents list with the roots
763 # descendents. (We seeded the dependents list with the roots
762 # up there, remember?)
764 # up there, remember?)
763 if (p[0] in descendents) or (p[1] in descendents):
765 if (p[0] in descendents) or (p[1] in descendents):
764 descendents.add(n)
766 descendents.add(n)
765 isdescendent = True
767 isdescendent = True
766 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 if isdescendent and ((ancestors is None) or (n in ancestors)):
767 # Only include nodes that are both descendents and ancestors.
769 # Only include nodes that are both descendents and ancestors.
768 orderedout.append(n)
770 orderedout.append(n)
769 if (ancestors is not None) and (n in heads):
771 if (ancestors is not None) and (n in heads):
770 # We're trying to figure out which heads are reachable
772 # We're trying to figure out which heads are reachable
771 # from roots.
773 # from roots.
772 # Mark this head as having been reached
774 # Mark this head as having been reached
773 heads[n] = 1
775 heads[n] = 1
774 elif ancestors is None:
776 elif ancestors is None:
775 # Otherwise, we're trying to discover the heads.
777 # Otherwise, we're trying to discover the heads.
776 # Assume this is a head because if it isn't, the next step
778 # Assume this is a head because if it isn't, the next step
777 # will eventually remove it.
779 # will eventually remove it.
778 heads[n] = 1
780 heads[n] = 1
779 # But, obviously its parents aren't.
781 # But, obviously its parents aren't.
780 for p in self.parents(n):
782 for p in self.parents(n):
781 heads.pop(p, None)
783 heads.pop(p, None)
782 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 heads = [n for n in heads.iterkeys() if heads[n] != 0]
783 roots = list(roots)
785 roots = list(roots)
784 assert orderedout
786 assert orderedout
785 assert roots
787 assert roots
786 assert heads
788 assert heads
787 return (orderedout, roots, heads)
789 return (orderedout, roots, heads)
788
790
789 def heads(self, start=None, stop=None):
791 def heads(self, start=None, stop=None):
790 """return the list of all nodes that have no children
792 """return the list of all nodes that have no children
791
793
792 if start is specified, only heads that are descendants of
794 if start is specified, only heads that are descendants of
793 start will be returned
795 start will be returned
794 if stop is specified, it will consider all the revs from stop
796 if stop is specified, it will consider all the revs from stop
795 as if they had no children
797 as if they had no children
796 """
798 """
797 if start is None and stop is None:
799 if start is None and stop is None:
798 count = len(self)
800 count = len(self)
799 if not count:
801 if not count:
800 return [nullid]
802 return [nullid]
801 ishead = [1] * (count + 1)
803 ishead = [1] * (count + 1)
802 index = self.index
804 index = self.index
803 for r in xrange(count):
805 for r in xrange(count):
804 e = index[r]
806 e = index[r]
805 ishead[e[5]] = ishead[e[6]] = 0
807 ishead[e[5]] = ishead[e[6]] = 0
806 return [self.node(r) for r in xrange(count) if ishead[r]]
808 return [self.node(r) for r in xrange(count) if ishead[r]]
807
809
808 if start is None:
810 if start is None:
809 start = nullid
811 start = nullid
810 if stop is None:
812 if stop is None:
811 stop = []
813 stop = []
812 stoprevs = set([self.rev(n) for n in stop])
814 stoprevs = set([self.rev(n) for n in stop])
813 startrev = self.rev(start)
815 startrev = self.rev(start)
814 reachable = {startrev: 1}
816 reachable = {startrev: 1}
815 heads = {startrev: 1}
817 heads = {startrev: 1}
816
818
817 parentrevs = self.parentrevs
819 parentrevs = self.parentrevs
818 for r in xrange(startrev + 1, len(self)):
820 for r in xrange(startrev + 1, len(self)):
819 for p in parentrevs(r):
821 for p in parentrevs(r):
820 if p in reachable:
822 if p in reachable:
821 if r not in stoprevs:
823 if r not in stoprevs:
822 reachable[r] = 1
824 reachable[r] = 1
823 heads[r] = 1
825 heads[r] = 1
824 if p in heads and p not in stoprevs:
826 if p in heads and p not in stoprevs:
825 del heads[p]
827 del heads[p]
826
828
827 return [self.node(r) for r in heads]
829 return [self.node(r) for r in heads]
828
830
829 def children(self, node):
831 def children(self, node):
830 """find the children of a given node"""
832 """find the children of a given node"""
831 c = []
833 c = []
832 p = self.rev(node)
834 p = self.rev(node)
833 for r in range(p + 1, len(self)):
835 for r in range(p + 1, len(self)):
834 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
835 if prevs:
837 if prevs:
836 for pr in prevs:
838 for pr in prevs:
837 if pr == p:
839 if pr == p:
838 c.append(self.node(r))
840 c.append(self.node(r))
839 elif p == nullrev:
841 elif p == nullrev:
840 c.append(self.node(r))
842 c.append(self.node(r))
841 return c
843 return c
842
844
843 def _match(self, id):
845 def _match(self, id):
844 if isinstance(id, (long, int)):
846 if isinstance(id, (long, int)):
845 # rev
847 # rev
846 return self.node(id)
848 return self.node(id)
847 if len(id) == 20:
849 if len(id) == 20:
848 # possibly a binary node
850 # possibly a binary node
849 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 try:
852 try:
851 node = id
853 node = id
852 self.rev(node) # quick search the index
854 self.rev(node) # quick search the index
853 return node
855 return node
854 except LookupError:
856 except LookupError:
855 pass # may be partial hex id
857 pass # may be partial hex id
856 try:
858 try:
857 # str(rev)
859 # str(rev)
858 rev = int(id)
860 rev = int(id)
859 if str(rev) != id:
861 if str(rev) != id:
860 raise ValueError
862 raise ValueError
861 if rev < 0:
863 if rev < 0:
862 rev = len(self) + rev
864 rev = len(self) + rev
863 if rev < 0 or rev >= len(self):
865 if rev < 0 or rev >= len(self):
864 raise ValueError
866 raise ValueError
865 return self.node(rev)
867 return self.node(rev)
866 except (ValueError, OverflowError):
868 except (ValueError, OverflowError):
867 pass
869 pass
868 if len(id) == 40:
870 if len(id) == 40:
869 try:
871 try:
870 # a full hex nodeid?
872 # a full hex nodeid?
871 node = bin(id)
873 node = bin(id)
872 self.rev(node)
874 self.rev(node)
873 return node
875 return node
874 except (TypeError, LookupError):
876 except (TypeError, LookupError):
875 pass
877 pass
876
878
877 def _partialmatch(self, id):
879 def _partialmatch(self, id):
878 if len(id) < 40:
880 if len(id) < 40:
879 try:
881 try:
880 # hex(node)[:...]
882 # hex(node)[:...]
881 l = len(id) / 2 # grab an even number of digits
883 l = len(id) / 2 # grab an even number of digits
882 bin_id = bin(id[:l*2])
884 bin_id = bin(id[:l*2])
883 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in self.nodemap if n[:l] == bin_id]
884 nl = [n for n in nl if hex(n).startswith(id)]
886 nl = [n for n in nl if hex(n).startswith(id)]
885 if len(nl) > 0:
887 if len(nl) > 0:
886 if len(nl) == 1:
888 if len(nl) == 1:
887 return nl[0]
889 return nl[0]
888 raise LookupError(id, self.indexfile,
890 raise LookupError(id, self.indexfile,
889 _('ambiguous identifier'))
891 _('ambiguous identifier'))
890 return None
892 return None
891 except TypeError:
893 except TypeError:
892 pass
894 pass
893
895
894 def lookup(self, id):
896 def lookup(self, id):
895 """locate a node based on:
897 """locate a node based on:
896 - revision number or str(revision number)
898 - revision number or str(revision number)
897 - nodeid or subset of hex nodeid
899 - nodeid or subset of hex nodeid
898 """
900 """
899 n = self._match(id)
901 n = self._match(id)
900 if n is not None:
902 if n is not None:
901 return n
903 return n
902 n = self._partialmatch(id)
904 n = self._partialmatch(id)
903 if n:
905 if n:
904 return n
906 return n
905
907
906 raise LookupError(id, self.indexfile, _('no match found'))
908 raise LookupError(id, self.indexfile, _('no match found'))
907
909
908 def cmp(self, node, text):
910 def cmp(self, node, text):
909 """compare text with a given file revision"""
911 """compare text with a given file revision"""
910 p1, p2 = self.parents(node)
912 p1, p2 = self.parents(node)
911 return hash(text, p1, p2) != node
913 return hash(text, p1, p2) != node
912
914
915 def _addchunk(self, offset, data):
916 o, d = self._chunkcache
917 # try to add to existing cache
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
919 self._chunkcache = o, d + data
920 else:
921 self._chunkcache = offset, data
922
923 def _loadchunk(self, offset, length, df=None):
924 if not df:
925 if self._inline:
926 df = self.opener(self.indexfile)
927 else:
928 df = self.opener(self.datafile)
929
930 readahead = max(65536, length)
931 df.seek(offset)
932 d = df.read(readahead)
933 self._addchunk(offset, d)
934 if readahead > length:
935 return d[:length]
936 return d
937
938 def _getchunk(self, offset, length, df=None):
939 o, d = self._chunkcache
940 l = len(d)
941
942 # is it in the cache?
943 cachestart = offset - o
944 cacheend = cachestart + length
945 if cachestart >= 0 and cacheend <= l:
946 if cachestart == 0 and cacheend == l:
947 return d # avoid a copy
948 return d[cachestart:cacheend]
949
950 return self._loadchunk(offset, length, df)
951
913 def chunk(self, rev, df=None):
952 def chunk(self, rev, df=None):
914 def loadcache(df):
915 if not df:
916 if self._inline:
917 df = self.opener(self.indexfile)
918 else:
919 df = self.opener(self.datafile)
920 df.seek(start)
921 self._chunkcache = (start, df.read(cache_length))
922
923 start, length = self.start(rev), self.length(rev)
953 start, length = self.start(rev), self.length(rev)
924 if self._inline:
954 if self._inline:
925 start += (rev + 1) * self._io.size
955 start += (rev + 1) * self._io.size
926 end = start + length
956 return decompress(self._getchunk(start, length, df))
927
928 offset = 0
929 if not self._chunkcache:
930 cache_length = max(65536, length)
931 loadcache(df)
932 else:
933 cache_start = self._chunkcache[0]
934 cache_length = len(self._chunkcache[1])
935 cache_end = cache_start + cache_length
936 if start >= cache_start and end <= cache_end:
937 # it is cached
938 offset = start - cache_start
939 else:
940 cache_length = max(65536, length)
941 loadcache(df)
942
943 # avoid copying large chunks
944 c = self._chunkcache[1]
945 if cache_length != length:
946 c = c[offset:offset + length]
947
948 return decompress(c)
949
957
950 def revdiff(self, rev1, rev2):
958 def revdiff(self, rev1, rev2):
951 """return or calculate a delta between two revisions"""
959 """return or calculate a delta between two revisions"""
952 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
960 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
953 return self.chunk(rev2)
961 return self.chunk(rev2)
954
962
955 return mdiff.textdiff(self.revision(self.node(rev1)),
963 return mdiff.textdiff(self.revision(self.node(rev1)),
956 self.revision(self.node(rev2)))
964 self.revision(self.node(rev2)))
957
965
958 def revision(self, node):
966 def revision(self, node):
959 """return an uncompressed revision of a given node"""
967 """return an uncompressed revision of a given node"""
960 if node == nullid:
968 if node == nullid:
961 return ""
969 return ""
962 if self._cache and self._cache[0] == node:
970 if self._cache and self._cache[0] == node:
963 return str(self._cache[2])
971 return str(self._cache[2])
964
972
965 # look up what we need to read
973 # look up what we need to read
966 text = None
974 text = None
967 rev = self.rev(node)
975 rev = self.rev(node)
968 base = self.base(rev)
976 base = self.base(rev)
969
977
970 # check rev flags
978 # check rev flags
971 if self.index[rev][0] & 0xFFFF:
979 if self.index[rev][0] & 0xFFFF:
972 raise RevlogError(_('incompatible revision flag %x') %
980 raise RevlogError(_('incompatible revision flag %x') %
973 (self.index[rev][0] & 0xFFFF))
981 (self.index[rev][0] & 0xFFFF))
974
982
975 df = None
983 df = None
976
984
977 # do we have useful data cached?
985 # do we have useful data cached?
978 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
986 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
979 base = self._cache[1]
987 base = self._cache[1]
980 text = str(self._cache[2])
988 text = str(self._cache[2])
981 self._loadindex(base, rev + 1)
989 self._loadindex(base, rev + 1)
982 if not self._inline and rev > base + 1:
990 if not self._inline and rev > base + 1:
983 df = self.opener(self.datafile)
991 df = self.opener(self.datafile)
984 else:
992 else:
985 self._loadindex(base, rev + 1)
993 self._loadindex(base, rev + 1)
986 if not self._inline and rev > base:
994 if not self._inline and rev > base:
987 df = self.opener(self.datafile)
995 df = self.opener(self.datafile)
988 text = self.chunk(base, df=df)
996 text = self.chunk(base, df=df)
989
997
990 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
998 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
991 text = mdiff.patches(text, bins)
999 text = mdiff.patches(text, bins)
992 p1, p2 = self.parents(node)
1000 p1, p2 = self.parents(node)
993 if node != hash(text, p1, p2):
1001 if node != hash(text, p1, p2):
994 raise RevlogError(_("integrity check failed on %s:%d")
1002 raise RevlogError(_("integrity check failed on %s:%d")
995 % (self.datafile, rev))
1003 % (self.datafile, rev))
996
1004
997 self._cache = (node, rev, text)
1005 self._cache = (node, rev, text)
998 return text
1006 return text
999
1007
1000 def checkinlinesize(self, tr, fp=None):
1008 def checkinlinesize(self, tr, fp=None):
1001 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1002 return
1010 return
1003
1011
1004 trinfo = tr.find(self.indexfile)
1012 trinfo = tr.find(self.indexfile)
1005 if trinfo == None:
1013 if trinfo == None:
1006 raise RevlogError(_("%s not found in the transaction")
1014 raise RevlogError(_("%s not found in the transaction")
1007 % self.indexfile)
1015 % self.indexfile)
1008
1016
1009 trindex = trinfo[2]
1017 trindex = trinfo[2]
1010 dataoff = self.start(trindex)
1018 dataoff = self.start(trindex)
1011
1019
1012 tr.add(self.datafile, dataoff)
1020 tr.add(self.datafile, dataoff)
1013
1021
1014 if not fp:
1022 if not fp:
1015 fp = self.opener(self.indexfile, 'r')
1023 fp = self.opener(self.indexfile, 'r')
1016
1024
1017 df = self.opener(self.datafile, 'w')
1025 df = self.opener(self.datafile, 'w')
1018 try:
1026 try:
1019 calc = self._io.size
1027 calc = self._io.size
1020 for r in self:
1028 for r in self:
1021 start = self.start(r) + (r + 1) * calc
1029 start = self.start(r) + (r + 1) * calc
1022 length = self.length(r)
1030 length = self.length(r)
1023 fp.seek(start)
1031 fp.seek(start)
1024 d = fp.read(length)
1032 d = fp.read(length)
1025 df.write(d)
1033 df.write(d)
1026 finally:
1034 finally:
1027 df.close()
1035 df.close()
1028
1036
1029 fp.close()
1037 fp.close()
1030 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1038 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1031 self.version &= ~(REVLOGNGINLINEDATA)
1039 self.version &= ~(REVLOGNGINLINEDATA)
1032 self._inline = False
1040 self._inline = False
1033 for i in self:
1041 for i in self:
1034 e = self._io.packentry(self.index[i], self.node, self.version, i)
1042 e = self._io.packentry(self.index[i], self.node, self.version, i)
1035 fp.write(e)
1043 fp.write(e)
1036
1044
1037 # if we don't call rename, the temp file will never replace the
1045 # if we don't call rename, the temp file will never replace the
1038 # real index
1046 # real index
1039 fp.rename()
1047 fp.rename()
1040
1048
1041 tr.replace(self.indexfile, trindex * calc)
1049 tr.replace(self.indexfile, trindex * calc)
1042 self._chunkcache = None
1050 self._chunkcache = (0, '')
1043
1051
1044 def addrevision(self, text, transaction, link, p1, p2, d=None):
1052 def addrevision(self, text, transaction, link, p1, p2, d=None):
1045 """add a revision to the log
1053 """add a revision to the log
1046
1054
1047 text - the revision data to add
1055 text - the revision data to add
1048 transaction - the transaction object used for rollback
1056 transaction - the transaction object used for rollback
1049 link - the linkrev data to add
1057 link - the linkrev data to add
1050 p1, p2 - the parent nodeids of the revision
1058 p1, p2 - the parent nodeids of the revision
1051 d - an optional precomputed delta
1059 d - an optional precomputed delta
1052 """
1060 """
1053 dfh = None
1061 dfh = None
1054 if not self._inline:
1062 if not self._inline:
1055 dfh = self.opener(self.datafile, "a")
1063 dfh = self.opener(self.datafile, "a")
1056 ifh = self.opener(self.indexfile, "a+")
1064 ifh = self.opener(self.indexfile, "a+")
1057 try:
1065 try:
1058 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1066 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1059 finally:
1067 finally:
1060 if dfh:
1068 if dfh:
1061 dfh.close()
1069 dfh.close()
1062 ifh.close()
1070 ifh.close()
1063
1071
1064 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1072 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1065 node = hash(text, p1, p2)
1073 node = hash(text, p1, p2)
1066 if node in self.nodemap:
1074 if node in self.nodemap:
1067 return node
1075 return node
1068
1076
1069 curr = len(self)
1077 curr = len(self)
1070 prev = curr - 1
1078 prev = curr - 1
1071 base = self.base(prev)
1079 base = self.base(prev)
1072 offset = self.end(prev)
1080 offset = self.end(prev)
1073
1081
1074 if curr:
1082 if curr:
1075 if not d:
1083 if not d:
1076 ptext = self.revision(self.node(prev))
1084 ptext = self.revision(self.node(prev))
1077 d = mdiff.textdiff(ptext, text)
1085 d = mdiff.textdiff(ptext, text)
1078 data = compress(d)
1086 data = compress(d)
1079 l = len(data[1]) + len(data[0])
1087 l = len(data[1]) + len(data[0])
1080 dist = l + offset - self.start(base)
1088 dist = l + offset - self.start(base)
1081
1089
1082 # full versions are inserted when the needed deltas
1090 # full versions are inserted when the needed deltas
1083 # become comparable to the uncompressed text
1091 # become comparable to the uncompressed text
1084 if not curr or dist > len(text) * 2:
1092 if not curr or dist > len(text) * 2:
1085 data = compress(text)
1093 data = compress(text)
1086 l = len(data[1]) + len(data[0])
1094 l = len(data[1]) + len(data[0])
1087 base = curr
1095 base = curr
1088
1096
1089 e = (offset_type(offset, 0), l, len(text),
1097 e = (offset_type(offset, 0), l, len(text),
1090 base, link, self.rev(p1), self.rev(p2), node)
1098 base, link, self.rev(p1), self.rev(p2), node)
1091 self.index.insert(-1, e)
1099 self.index.insert(-1, e)
1092 self.nodemap[node] = curr
1100 self.nodemap[node] = curr
1093
1101
1094 entry = self._io.packentry(e, self.node, self.version, curr)
1102 entry = self._io.packentry(e, self.node, self.version, curr)
1095 if not self._inline:
1103 if not self._inline:
1096 transaction.add(self.datafile, offset)
1104 transaction.add(self.datafile, offset)
1097 transaction.add(self.indexfile, curr * len(entry))
1105 transaction.add(self.indexfile, curr * len(entry))
1098 if data[0]:
1106 if data[0]:
1099 dfh.write(data[0])
1107 dfh.write(data[0])
1100 dfh.write(data[1])
1108 dfh.write(data[1])
1101 dfh.flush()
1109 dfh.flush()
1102 ifh.write(entry)
1110 ifh.write(entry)
1103 else:
1111 else:
1104 offset += curr * self._io.size
1112 offset += curr * self._io.size
1105 transaction.add(self.indexfile, offset, curr)
1113 transaction.add(self.indexfile, offset, curr)
1106 ifh.write(entry)
1114 ifh.write(entry)
1107 ifh.write(data[0])
1115 ifh.write(data[0])
1108 ifh.write(data[1])
1116 ifh.write(data[1])
1109 self.checkinlinesize(transaction, ifh)
1117 self.checkinlinesize(transaction, ifh)
1110
1118
1111 self._cache = (node, curr, text)
1119 self._cache = (node, curr, text)
1112 return node
1120 return node
1113
1121
1114 def ancestor(self, a, b):
1122 def ancestor(self, a, b):
1115 """calculate the least common ancestor of nodes a and b"""
1123 """calculate the least common ancestor of nodes a and b"""
1116
1124
1117 def parents(rev):
1125 def parents(rev):
1118 return [p for p in self.parentrevs(rev) if p != nullrev]
1126 return [p for p in self.parentrevs(rev) if p != nullrev]
1119
1127
1120 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1128 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1121 if c is None:
1129 if c is None:
1122 return nullid
1130 return nullid
1123
1131
1124 return self.node(c)
1132 return self.node(c)
1125
1133
1126 def group(self, nodelist, lookup, infocollect=None):
1134 def group(self, nodelist, lookup, infocollect=None):
1127 """calculate a delta group
1135 """calculate a delta group
1128
1136
1129 Given a list of changeset revs, return a set of deltas and
1137 Given a list of changeset revs, return a set of deltas and
1130 metadata corresponding to nodes. the first delta is
1138 metadata corresponding to nodes. the first delta is
1131 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1139 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1132 have this parent as it has all history before these
1140 have this parent as it has all history before these
1133 changesets. parent is parent[0]
1141 changesets. parent is parent[0]
1134 """
1142 """
1135 revs = [self.rev(n) for n in nodelist]
1143 revs = [self.rev(n) for n in nodelist]
1136
1144
1137 # if we don't have any revisions touched by these changesets, bail
1145 # if we don't have any revisions touched by these changesets, bail
1138 if not revs:
1146 if not revs:
1139 yield changegroup.closechunk()
1147 yield changegroup.closechunk()
1140 return
1148 return
1141
1149
1142 # add the parent of the first rev
1150 # add the parent of the first rev
1143 p = self.parents(self.node(revs[0]))[0]
1151 p = self.parents(self.node(revs[0]))[0]
1144 revs.insert(0, self.rev(p))
1152 revs.insert(0, self.rev(p))
1145
1153
1146 # build deltas
1154 # build deltas
1147 for d in xrange(0, len(revs) - 1):
1155 for d in xrange(0, len(revs) - 1):
1148 a, b = revs[d], revs[d + 1]
1156 a, b = revs[d], revs[d + 1]
1149 nb = self.node(b)
1157 nb = self.node(b)
1150
1158
1151 if infocollect is not None:
1159 if infocollect is not None:
1152 infocollect(nb)
1160 infocollect(nb)
1153
1161
1154 p = self.parents(nb)
1162 p = self.parents(nb)
1155 meta = nb + p[0] + p[1] + lookup(nb)
1163 meta = nb + p[0] + p[1] + lookup(nb)
1156 if a == -1:
1164 if a == -1:
1157 d = self.revision(nb)
1165 d = self.revision(nb)
1158 meta += mdiff.trivialdiffheader(len(d))
1166 meta += mdiff.trivialdiffheader(len(d))
1159 else:
1167 else:
1160 d = self.revdiff(a, b)
1168 d = self.revdiff(a, b)
1161 yield changegroup.chunkheader(len(meta) + len(d))
1169 yield changegroup.chunkheader(len(meta) + len(d))
1162 yield meta
1170 yield meta
1163 if len(d) > 2**20:
1171 if len(d) > 2**20:
1164 pos = 0
1172 pos = 0
1165 while pos < len(d):
1173 while pos < len(d):
1166 pos2 = pos + 2 ** 18
1174 pos2 = pos + 2 ** 18
1167 yield d[pos:pos2]
1175 yield d[pos:pos2]
1168 pos = pos2
1176 pos = pos2
1169 else:
1177 else:
1170 yield d
1178 yield d
1171
1179
1172 yield changegroup.closechunk()
1180 yield changegroup.closechunk()
1173
1181
1174 def addgroup(self, revs, linkmapper, transaction):
1182 def addgroup(self, revs, linkmapper, transaction):
1175 """
1183 """
1176 add a delta group
1184 add a delta group
1177
1185
1178 given a set of deltas, add them to the revision log. the
1186 given a set of deltas, add them to the revision log. the
1179 first delta is against its parent, which should be in our
1187 first delta is against its parent, which should be in our
1180 log, the rest are against the previous delta.
1188 log, the rest are against the previous delta.
1181 """
1189 """
1182
1190
1183 #track the base of the current delta log
1191 #track the base of the current delta log
1184 r = len(self)
1192 r = len(self)
1185 t = r - 1
1193 t = r - 1
1186 node = None
1194 node = None
1187
1195
1188 base = prev = nullrev
1196 base = prev = nullrev
1189 start = end = textlen = 0
1197 start = end = textlen = 0
1190 if r:
1198 if r:
1191 end = self.end(t)
1199 end = self.end(t)
1192
1200
1193 ifh = self.opener(self.indexfile, "a+")
1201 ifh = self.opener(self.indexfile, "a+")
1194 isize = r * self._io.size
1202 isize = r * self._io.size
1195 if self._inline:
1203 if self._inline:
1196 transaction.add(self.indexfile, end + isize, r)
1204 transaction.add(self.indexfile, end + isize, r)
1197 dfh = None
1205 dfh = None
1198 else:
1206 else:
1199 transaction.add(self.indexfile, isize, r)
1207 transaction.add(self.indexfile, isize, r)
1200 transaction.add(self.datafile, end)
1208 transaction.add(self.datafile, end)
1201 dfh = self.opener(self.datafile, "a")
1209 dfh = self.opener(self.datafile, "a")
1202
1210
1203 try:
1211 try:
1204 # loop through our set of deltas
1212 # loop through our set of deltas
1205 chain = None
1213 chain = None
1206 for chunk in revs:
1214 for chunk in revs:
1207 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1215 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1208 link = linkmapper(cs)
1216 link = linkmapper(cs)
1209 if node in self.nodemap:
1217 if node in self.nodemap:
1210 # this can happen if two branches make the same change
1218 # this can happen if two branches make the same change
1211 chain = node
1219 chain = node
1212 continue
1220 continue
1213 delta = buffer(chunk, 80)
1221 delta = buffer(chunk, 80)
1214 del chunk
1222 del chunk
1215
1223
1216 for p in (p1, p2):
1224 for p in (p1, p2):
1217 if not p in self.nodemap:
1225 if not p in self.nodemap:
1218 raise LookupError(p, self.indexfile, _('unknown parent'))
1226 raise LookupError(p, self.indexfile, _('unknown parent'))
1219
1227
1220 if not chain:
1228 if not chain:
1221 # retrieve the parent revision of the delta chain
1229 # retrieve the parent revision of the delta chain
1222 chain = p1
1230 chain = p1
1223 if not chain in self.nodemap:
1231 if not chain in self.nodemap:
1224 raise LookupError(chain, self.indexfile, _('unknown base'))
1232 raise LookupError(chain, self.indexfile, _('unknown base'))
1225
1233
1226 # full versions are inserted when the needed deltas become
1234 # full versions are inserted when the needed deltas become
1227 # comparable to the uncompressed text or when the previous
1235 # comparable to the uncompressed text or when the previous
1228 # version is not the one we have a delta against. We use
1236 # version is not the one we have a delta against. We use
1229 # the size of the previous full rev as a proxy for the
1237 # the size of the previous full rev as a proxy for the
1230 # current size.
1238 # current size.
1231
1239
1232 if chain == prev:
1240 if chain == prev:
1233 cdelta = compress(delta)
1241 cdelta = compress(delta)
1234 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1242 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1235 textlen = mdiff.patchedsize(textlen, delta)
1243 textlen = mdiff.patchedsize(textlen, delta)
1236
1244
1237 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1245 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1238 # flush our writes here so we can read it in revision
1246 # flush our writes here so we can read it in revision
1239 if dfh:
1247 if dfh:
1240 dfh.flush()
1248 dfh.flush()
1241 ifh.flush()
1249 ifh.flush()
1242 text = self.revision(chain)
1250 text = self.revision(chain)
1243 if len(text) == 0:
1251 if len(text) == 0:
1244 # skip over trivial delta header
1252 # skip over trivial delta header
1245 text = buffer(delta, 12)
1253 text = buffer(delta, 12)
1246 else:
1254 else:
1247 text = mdiff.patches(text, [delta])
1255 text = mdiff.patches(text, [delta])
1248 del delta
1256 del delta
1249 chk = self._addrevision(text, transaction, link, p1, p2, None,
1257 chk = self._addrevision(text, transaction, link, p1, p2, None,
1250 ifh, dfh)
1258 ifh, dfh)
1251 if not dfh and not self._inline:
1259 if not dfh and not self._inline:
1252 # addrevision switched from inline to conventional
1260 # addrevision switched from inline to conventional
1253 # reopen the index
1261 # reopen the index
1254 dfh = self.opener(self.datafile, "a")
1262 dfh = self.opener(self.datafile, "a")
1255 ifh = self.opener(self.indexfile, "a")
1263 ifh = self.opener(self.indexfile, "a")
1256 if chk != node:
1264 if chk != node:
1257 raise RevlogError(_("consistency error adding group"))
1265 raise RevlogError(_("consistency error adding group"))
1258 textlen = len(text)
1266 textlen = len(text)
1259 else:
1267 else:
1260 e = (offset_type(end, 0), cdeltalen, textlen, base,
1268 e = (offset_type(end, 0), cdeltalen, textlen, base,
1261 link, self.rev(p1), self.rev(p2), node)
1269 link, self.rev(p1), self.rev(p2), node)
1262 self.index.insert(-1, e)
1270 self.index.insert(-1, e)
1263 self.nodemap[node] = r
1271 self.nodemap[node] = r
1264 entry = self._io.packentry(e, self.node, self.version, r)
1272 entry = self._io.packentry(e, self.node, self.version, r)
1265 if self._inline:
1273 if self._inline:
1266 ifh.write(entry)
1274 ifh.write(entry)
1267 ifh.write(cdelta[0])
1275 ifh.write(cdelta[0])
1268 ifh.write(cdelta[1])
1276 ifh.write(cdelta[1])
1269 self.checkinlinesize(transaction, ifh)
1277 self.checkinlinesize(transaction, ifh)
1270 if not self._inline:
1278 if not self._inline:
1271 dfh = self.opener(self.datafile, "a")
1279 dfh = self.opener(self.datafile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1280 ifh = self.opener(self.indexfile, "a")
1273 else:
1281 else:
1274 dfh.write(cdelta[0])
1282 dfh.write(cdelta[0])
1275 dfh.write(cdelta[1])
1283 dfh.write(cdelta[1])
1276 ifh.write(entry)
1284 ifh.write(entry)
1277
1285
1278 t, r, chain, prev = r, r + 1, node, node
1286 t, r, chain, prev = r, r + 1, node, node
1279 base = self.base(t)
1287 base = self.base(t)
1280 start = self.start(base)
1288 start = self.start(base)
1281 end = self.end(t)
1289 end = self.end(t)
1282 finally:
1290 finally:
1283 if dfh:
1291 if dfh:
1284 dfh.close()
1292 dfh.close()
1285 ifh.close()
1293 ifh.close()
1286
1294
1287 return node
1295 return node
1288
1296
1289 def strip(self, minlink, transaction):
1297 def strip(self, minlink, transaction):
1290 """truncate the revlog on the first revision with a linkrev >= minlink
1298 """truncate the revlog on the first revision with a linkrev >= minlink
1291
1299
1292 This function is called when we're stripping revision minlink and
1300 This function is called when we're stripping revision minlink and
1293 its descendants from the repository.
1301 its descendants from the repository.
1294
1302
1295 We have to remove all revisions with linkrev >= minlink, because
1303 We have to remove all revisions with linkrev >= minlink, because
1296 the equivalent changelog revisions will be renumbered after the
1304 the equivalent changelog revisions will be renumbered after the
1297 strip.
1305 strip.
1298
1306
1299 So we truncate the revlog on the first of these revisions, and
1307 So we truncate the revlog on the first of these revisions, and
1300 trust that the caller has saved the revisions that shouldn't be
1308 trust that the caller has saved the revisions that shouldn't be
1301 removed and that it'll readd them after this truncation.
1309 removed and that it'll readd them after this truncation.
1302 """
1310 """
1303 if len(self) == 0:
1311 if len(self) == 0:
1304 return
1312 return
1305
1313
1306 if isinstance(self.index, lazyindex):
1314 if isinstance(self.index, lazyindex):
1307 self._loadindexmap()
1315 self._loadindexmap()
1308
1316
1309 for rev in self:
1317 for rev in self:
1310 if self.index[rev][4] >= minlink:
1318 if self.index[rev][4] >= minlink:
1311 break
1319 break
1312 else:
1320 else:
1313 return
1321 return
1314
1322
1315 # first truncate the files on disk
1323 # first truncate the files on disk
1316 end = self.start(rev)
1324 end = self.start(rev)
1317 if not self._inline:
1325 if not self._inline:
1318 transaction.add(self.datafile, end)
1326 transaction.add(self.datafile, end)
1319 end = rev * self._io.size
1327 end = rev * self._io.size
1320 else:
1328 else:
1321 end += rev * self._io.size
1329 end += rev * self._io.size
1322
1330
1323 transaction.add(self.indexfile, end)
1331 transaction.add(self.indexfile, end)
1324
1332
1325 # then reset internal state in memory to forget those revisions
1333 # then reset internal state in memory to forget those revisions
1326 self._cache = None
1334 self._cache = None
1327 self._chunkcache = None
1335 self._chunkcache = (0, '')
1328 for x in xrange(rev, len(self)):
1336 for x in xrange(rev, len(self)):
1329 del self.nodemap[self.node(x)]
1337 del self.nodemap[self.node(x)]
1330
1338
1331 del self.index[rev:-1]
1339 del self.index[rev:-1]
1332
1340
1333 def checksize(self):
1341 def checksize(self):
1334 expected = 0
1342 expected = 0
1335 if len(self):
1343 if len(self):
1336 expected = max(0, self.end(len(self) - 1))
1344 expected = max(0, self.end(len(self) - 1))
1337
1345
1338 try:
1346 try:
1339 f = self.opener(self.datafile)
1347 f = self.opener(self.datafile)
1340 f.seek(0, 2)
1348 f.seek(0, 2)
1341 actual = f.tell()
1349 actual = f.tell()
1342 dd = actual - expected
1350 dd = actual - expected
1343 except IOError, inst:
1351 except IOError, inst:
1344 if inst.errno != errno.ENOENT:
1352 if inst.errno != errno.ENOENT:
1345 raise
1353 raise
1346 dd = 0
1354 dd = 0
1347
1355
1348 try:
1356 try:
1349 f = self.opener(self.indexfile)
1357 f = self.opener(self.indexfile)
1350 f.seek(0, 2)
1358 f.seek(0, 2)
1351 actual = f.tell()
1359 actual = f.tell()
1352 s = self._io.size
1360 s = self._io.size
1353 i = max(0, actual / s)
1361 i = max(0, actual / s)
1354 di = actual - (i * s)
1362 di = actual - (i * s)
1355 if self._inline:
1363 if self._inline:
1356 databytes = 0
1364 databytes = 0
1357 for r in self:
1365 for r in self:
1358 databytes += max(0, self.length(r))
1366 databytes += max(0, self.length(r))
1359 dd = 0
1367 dd = 0
1360 di = actual - len(self) * s - databytes
1368 di = actual - len(self) * s - databytes
1361 except IOError, inst:
1369 except IOError, inst:
1362 if inst.errno != errno.ENOENT:
1370 if inst.errno != errno.ENOENT:
1363 raise
1371 raise
1364 di = 0
1372 di = 0
1365
1373
1366 return (dd, di)
1374 return (dd, di)
1367
1375
1368 def files(self):
1376 def files(self):
1369 res = [ self.indexfile ]
1377 res = [ self.indexfile ]
1370 if not self._inline:
1378 if not self._inline:
1371 res.append(self.datafile)
1379 res.append(self.datafile)
1372 return res
1380 return res
General Comments 0
You need to be logged in to leave comments. Login now