##// END OF EJS Templates
revlog: add cache priming for reconstructing delta chains
Matt Mackall -
r8318:6b8513f8 default
parent child Browse files
Show More
@@ -1,1379 +1,1389 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
123 self.dataf = dataf
123 self.dataf = dataf
124 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
125 self.datasize = size
125 self.datasize = size
126 self.l = size/self.s
126 self.l = size/self.s
127 self.index = [None] * self.l
127 self.index = [None] * self.l
128 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
129 self.allmap = 0
129 self.allmap = 0
130 self.all = 0
130 self.all = 0
131 self.mapfind_count = 0
131 self.mapfind_count = 0
132
132
133 def loadmap(self):
133 def loadmap(self):
134 """
134 """
135 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
136 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
137 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
138 which takes much less time.
138 which takes much less time.
139 """
139 """
140 if self.allmap:
140 if self.allmap:
141 return
141 return
142 end = self.datasize
142 end = self.datasize
143 self.allmap = 1
143 self.allmap = 1
144 cur = 0
144 cur = 0
145 count = 0
145 count = 0
146 blocksize = self.s * 256
146 blocksize = self.s * 256
147 self.dataf.seek(0)
147 self.dataf.seek(0)
148 while cur < end:
148 while cur < end:
149 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
150 off = 0
150 off = 0
151 for x in xrange(256):
151 for x in xrange(256):
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 self.map[n] = count
153 self.map[n] = count
154 count += 1
154 count += 1
155 if count >= self.l:
155 if count >= self.l:
156 break
156 break
157 off += self.s
157 off += self.s
158 cur += blocksize
158 cur += blocksize
159
159
160 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
161 if self.all:
161 if self.all:
162 return
162 return
163 if data is None:
163 if data is None:
164 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
165 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
166 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
167 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
168 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
169 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
170 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
171 lend = len(data) / self.s
171 lend = len(data) / self.s
172 i = blockstart / self.s
172 i = blockstart / self.s
173 off = 0
173 off = 0
174 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
175 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
176 lend = len(self.index) - i
176 lend = len(self.index) - i
177 for x in xrange(lend):
177 for x in xrange(lend):
178 if self.index[i + x] == None:
178 if self.index[i + x] == None:
179 b = data[off : off + self.s]
179 b = data[off : off + self.s]
180 self.index[i + x] = b
180 self.index[i + x] = b
181 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
182 self.map[n] = i + x
182 self.map[n] = i + x
183 off += self.s
183 off += self.s
184
184
185 def findnode(self, node):
185 def findnode(self, node):
186 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
187 if self.allmap:
187 if self.allmap:
188 return None
188 return None
189
189
190 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
191 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
192 # thing.
192 # thing.
193 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
194 self.loadmap()
194 self.loadmap()
195 if node in self.map:
195 if node in self.map:
196 return node
196 return node
197 return None
197 return None
198 self.mapfind_count += 1
198 self.mapfind_count += 1
199 last = self.l - 1
199 last = self.l - 1
200 while self.index[last] != None:
200 while self.index[last] != None:
201 if last == 0:
201 if last == 0:
202 self.all = 1
202 self.all = 1
203 self.allmap = 1
203 self.allmap = 1
204 return None
204 return None
205 last -= 1
205 last -= 1
206 end = (last + 1) * self.s
206 end = (last + 1) * self.s
207 blocksize = self.s * 256
207 blocksize = self.s * 256
208 while end >= 0:
208 while end >= 0:
209 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
210 self.dataf.seek(start)
210 self.dataf.seek(start)
211 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
212 findend = end - start
212 findend = end - start
213 while True:
213 while True:
214 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
215 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
216 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
217 findend = off
217 findend = off
218 if off >= 0:
218 if off >= 0:
219 i = off / self.s
219 i = off / self.s
220 off = i * self.s
220 off = i * self.s
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 if n == node:
222 if n == node:
223 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
224 return node
224 return node
225 else:
225 else:
226 break
226 break
227 end -= blocksize
227 end -= blocksize
228 return None
228 return None
229
229
230 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
231 if self.all:
231 if self.all:
232 return
232 return
233 all = False
233 all = False
234 if i == None:
234 if i == None:
235 blockstart = 0
235 blockstart = 0
236 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
237 end = self.datasize
237 end = self.datasize
238 all = True
238 all = True
239 else:
239 else:
240 if end:
240 if end:
241 blockstart = i * self.s
241 blockstart = i * self.s
242 end = end * self.s
242 end = end * self.s
243 blocksize = end - blockstart
243 blocksize = end - blockstart
244 else:
244 else:
245 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
246 blocksize = self.s * 1024
246 blocksize = self.s * 1024
247 end = blockstart + blocksize
247 end = blockstart + blocksize
248 while blockstart < end:
248 while blockstart < end:
249 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
250 blockstart += blocksize
250 blockstart += blocksize
251 if all:
251 if all:
252 self.all = True
252 self.all = True
253
253
254 class lazyindex(object):
254 class lazyindex(object):
255 """a lazy version of the index array"""
255 """a lazy version of the index array"""
256 def __init__(self, parser):
256 def __init__(self, parser):
257 self.p = parser
257 self.p = parser
258 def __len__(self):
258 def __len__(self):
259 return len(self.p.index)
259 return len(self.p.index)
260 def load(self, pos):
260 def load(self, pos):
261 if pos < 0:
261 if pos < 0:
262 pos += len(self.p.index)
262 pos += len(self.p.index)
263 self.p.loadindex(pos)
263 self.p.loadindex(pos)
264 return self.p.index[pos]
264 return self.p.index[pos]
265 def __getitem__(self, pos):
265 def __getitem__(self, pos):
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
268 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
269 def __delitem__(self, pos):
269 def __delitem__(self, pos):
270 del self.p.index[pos]
270 del self.p.index[pos]
271 def insert(self, pos, e):
271 def insert(self, pos, e):
272 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 def append(self, e):
273 def append(self, e):
274 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
275
275
276 class lazymap(object):
276 class lazymap(object):
277 """a lazy version of the node map"""
277 """a lazy version of the node map"""
278 def __init__(self, parser):
278 def __init__(self, parser):
279 self.p = parser
279 self.p = parser
280 def load(self, key):
280 def load(self, key):
281 n = self.p.findnode(key)
281 n = self.p.findnode(key)
282 if n == None:
282 if n == None:
283 raise KeyError(key)
283 raise KeyError(key)
284 def __contains__(self, key):
284 def __contains__(self, key):
285 if key in self.p.map:
285 if key in self.p.map:
286 return True
286 return True
287 self.p.loadmap()
287 self.p.loadmap()
288 return key in self.p.map
288 return key in self.p.map
289 def __iter__(self):
289 def __iter__(self):
290 yield nullid
290 yield nullid
291 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if not ret:
293 if not ret:
294 self.p.loadindex(i)
294 self.p.loadindex(i)
295 ret = self.p.index[i]
295 ret = self.p.index[i]
296 if isinstance(ret, str):
296 if isinstance(ret, str):
297 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
298 yield ret[7]
298 yield ret[7]
299 def __getitem__(self, key):
299 def __getitem__(self, key):
300 try:
300 try:
301 return self.p.map[key]
301 return self.p.map[key]
302 except KeyError:
302 except KeyError:
303 try:
303 try:
304 self.load(key)
304 self.load(key)
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
308 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
309 self.p.map[key] = val
309 self.p.map[key] = val
310 def __delitem__(self, key):
310 def __delitem__(self, key):
311 del self.p.map[key]
311 del self.p.map[key]
312
312
313 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
314 v0shaoffset = 56
314 v0shaoffset = 56
315
315
316 class revlogoldio(object):
316 class revlogoldio(object):
317 def __init__(self):
317 def __init__(self):
318 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
319
319
320 def parseindex(self, fp, data, inline):
320 def parseindex(self, fp, data, inline):
321 s = self.size
321 s = self.size
322 index = []
322 index = []
323 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
324 n = off = 0
324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) < _prereadsize:
326 data += fp.read() # read the rest
326 data += fp.read() # read the rest
327 l = len(data)
327 l = len(data)
328 while off + s <= l:
328 while off + s <= l:
329 cur = data[off:off + s]
329 cur = data[off:off + s]
330 off += s
330 off += s
331 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
332 # transform to revlogv1 format
332 # transform to revlogv1 format
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 index.append(e2)
335 index.append(e2)
336 nodemap[e[6]] = n
336 nodemap[e[6]] = n
337 n += 1
337 n += 1
338
338
339 return index, nodemap, None
339 return index, nodemap, None
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
344 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
345
345
346 # index ng:
346 # index ng:
347 # 6 bytes offset
347 # 6 bytes offset
348 # 2 bytes flags
348 # 2 bytes flags
349 # 4 bytes compressed length
349 # 4 bytes compressed length
350 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
351 # 4 bytes: base rev
351 # 4 bytes: base rev
352 # 4 bytes link rev
352 # 4 bytes link rev
353 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
354 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
355 # 32 bytes: nodeid
355 # 32 bytes: nodeid
356 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
357 ngshaoffset = 32
357 ngshaoffset = 32
358 versionformat = ">I"
358 versionformat = ">I"
359
359
360 class revlogio(object):
360 class revlogio(object):
361 def __init__(self):
361 def __init__(self):
362 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
363
363
364 def parseindex(self, fp, data, inline):
364 def parseindex(self, fp, data, inline):
365 try:
365 try:
366 size = len(data)
366 size = len(data)
367 if size == _prereadsize:
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
368 size = util.fstat(fp).st_size
369 except AttributeError:
369 except AttributeError:
370 size = 0
370 size = 0
371
371
372 if util.openhardlinks() and not inline and size > _prereadsize:
372 if util.openhardlinks() and not inline and size > _prereadsize:
373 # big index, let's parse it on demand
373 # big index, let's parse it on demand
374 parser = lazyparser(fp, size)
374 parser = lazyparser(fp, size)
375 index = lazyindex(parser)
375 index = lazyindex(parser)
376 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381 return index, nodemap, None
381 return index, nodemap, None
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
473 self._chunkcache = (0, '')
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # alternate implementation, The advantage to this code is it
538 # alternate implementation, The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. But, the results are not
540 # cached, so finding the size of every revision will be slower.
540 # cached, so finding the size of every revision will be slower.
541 """
541 """
542 if self.cache and self.cache[1] == rev:
542 if self.cache and self.cache[1] == rev:
543 return len(self.cache[2])
543 return len(self.cache[2])
544
544
545 base = self.base(rev)
545 base = self.base(rev)
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 base = self.cache[1]
547 base = self.cache[1]
548 text = self.cache[2]
548 text = self.cache[2]
549 else:
549 else:
550 text = self.revision(self.node(base))
550 text = self.revision(self.node(base))
551
551
552 l = len(text)
552 l = len(text)
553 for x in xrange(base + 1, rev + 1):
553 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x))
554 l = mdiff.patchedsize(l, self.chunk(x))
555 return l
555 return l
556 """
556 """
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return a hash of all nodes ancestral to a given node, including
559 """return a hash of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = {}
561 reachable = {}
562 visit = [node]
562 visit = [node]
563 reachable[node] = 1
563 reachable[node] = 1
564 if stop:
564 if stop:
565 stopn = self.rev(stop)
565 stopn = self.rev(stop)
566 else:
566 else:
567 stopn = 0
567 stopn = 0
568 while visit:
568 while visit:
569 n = visit.pop(0)
569 n = visit.pop(0)
570 if n == stop:
570 if n == stop:
571 continue
571 continue
572 if n == nullid:
572 if n == nullid:
573 continue
573 continue
574 for p in self.parents(n):
574 for p in self.parents(n):
575 if self.rev(p) < stopn:
575 if self.rev(p) < stopn:
576 continue
576 continue
577 if p not in reachable:
577 if p not in reachable:
578 reachable[p] = 1
578 reachable[p] = 1
579 visit.append(p)
579 visit.append(p)
580 return reachable
580 return reachable
581
581
582 def ancestors(self, *revs):
582 def ancestors(self, *revs):
583 'Generate the ancestors of revs using a breadth-first visit'
583 'Generate the ancestors of revs using a breadth-first visit'
584 visit = list(revs)
584 visit = list(revs)
585 seen = set([nullrev])
585 seen = set([nullrev])
586 while visit:
586 while visit:
587 for parent in self.parentrevs(visit.pop(0)):
587 for parent in self.parentrevs(visit.pop(0)):
588 if parent not in seen:
588 if parent not in seen:
589 visit.append(parent)
589 visit.append(parent)
590 seen.add(parent)
590 seen.add(parent)
591 yield parent
591 yield parent
592
592
593 def descendants(self, *revs):
593 def descendants(self, *revs):
594 'Generate the descendants of revs in topological order'
594 'Generate the descendants of revs in topological order'
595 seen = set(revs)
595 seen = set(revs)
596 for i in xrange(min(revs) + 1, len(self)):
596 for i in xrange(min(revs) + 1, len(self)):
597 for x in self.parentrevs(i):
597 for x in self.parentrevs(i):
598 if x != nullrev and x in seen:
598 if x != nullrev and x in seen:
599 seen.add(i)
599 seen.add(i)
600 yield i
600 yield i
601 break
601 break
602
602
603 def findmissing(self, common=None, heads=None):
603 def findmissing(self, common=None, heads=None):
604 '''
604 '''
605 returns the topologically sorted list of nodes from the set:
605 returns the topologically sorted list of nodes from the set:
606 missing = (ancestors(heads) \ ancestors(common))
606 missing = (ancestors(heads) \ ancestors(common))
607
607
608 where ancestors() is the set of ancestors from heads, heads included
608 where ancestors() is the set of ancestors from heads, heads included
609
609
610 if heads is None, the heads of the revlog are used
610 if heads is None, the heads of the revlog are used
611 if common is None, nullid is assumed to be a common node
611 if common is None, nullid is assumed to be a common node
612 '''
612 '''
613 if common is None:
613 if common is None:
614 common = [nullid]
614 common = [nullid]
615 if heads is None:
615 if heads is None:
616 heads = self.heads()
616 heads = self.heads()
617
617
618 common = [self.rev(n) for n in common]
618 common = [self.rev(n) for n in common]
619 heads = [self.rev(n) for n in heads]
619 heads = [self.rev(n) for n in heads]
620
620
621 # we want the ancestors, but inclusive
621 # we want the ancestors, but inclusive
622 has = set(self.ancestors(*common))
622 has = set(self.ancestors(*common))
623 has.add(nullrev)
623 has.add(nullrev)
624 has.update(common)
624 has.update(common)
625
625
626 # take all ancestors from heads that aren't in has
626 # take all ancestors from heads that aren't in has
627 missing = {}
627 missing = {}
628 visit = [r for r in heads if r not in has]
628 visit = [r for r in heads if r not in has]
629 while visit:
629 while visit:
630 r = visit.pop(0)
630 r = visit.pop(0)
631 if r in missing:
631 if r in missing:
632 continue
632 continue
633 else:
633 else:
634 missing[r] = None
634 missing[r] = None
635 for p in self.parentrevs(r):
635 for p in self.parentrevs(r):
636 if p not in has:
636 if p not in has:
637 visit.append(p)
637 visit.append(p)
638 missing = missing.keys()
638 missing = missing.keys()
639 missing.sort()
639 missing.sort()
640 return [self.node(r) for r in missing]
640 return [self.node(r) for r in missing]
641
641
642 def nodesbetween(self, roots=None, heads=None):
642 def nodesbetween(self, roots=None, heads=None):
643 """Return a tuple containing three elements. Elements 1 and 2 contain
643 """Return a tuple containing three elements. Elements 1 and 2 contain
644 a final list bases and heads after all the unreachable ones have been
644 a final list bases and heads after all the unreachable ones have been
645 pruned. Element 0 contains a topologically sorted list of all
645 pruned. Element 0 contains a topologically sorted list of all
646
646
647 nodes that satisfy these constraints:
647 nodes that satisfy these constraints:
648 1. All nodes must be descended from a node in roots (the nodes on
648 1. All nodes must be descended from a node in roots (the nodes on
649 roots are considered descended from themselves).
649 roots are considered descended from themselves).
650 2. All nodes must also be ancestors of a node in heads (the nodes in
650 2. All nodes must also be ancestors of a node in heads (the nodes in
651 heads are considered to be their own ancestors).
651 heads are considered to be their own ancestors).
652
652
653 If roots is unspecified, nullid is assumed as the only root.
653 If roots is unspecified, nullid is assumed as the only root.
654 If heads is unspecified, it is taken to be the output of the
654 If heads is unspecified, it is taken to be the output of the
655 heads method (i.e. a list of all nodes in the repository that
655 heads method (i.e. a list of all nodes in the repository that
656 have no children)."""
656 have no children)."""
657 nonodes = ([], [], [])
657 nonodes = ([], [], [])
658 if roots is not None:
658 if roots is not None:
659 roots = list(roots)
659 roots = list(roots)
660 if not roots:
660 if not roots:
661 return nonodes
661 return nonodes
662 lowestrev = min([self.rev(n) for n in roots])
662 lowestrev = min([self.rev(n) for n in roots])
663 else:
663 else:
664 roots = [nullid] # Everybody's a descendent of nullid
664 roots = [nullid] # Everybody's a descendent of nullid
665 lowestrev = nullrev
665 lowestrev = nullrev
666 if (lowestrev == nullrev) and (heads is None):
666 if (lowestrev == nullrev) and (heads is None):
667 # We want _all_ the nodes!
667 # We want _all_ the nodes!
668 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 return ([self.node(r) for r in self], [nullid], list(self.heads()))
669 if heads is None:
669 if heads is None:
670 # All nodes are ancestors, so the latest ancestor is the last
670 # All nodes are ancestors, so the latest ancestor is the last
671 # node.
671 # node.
672 highestrev = len(self) - 1
672 highestrev = len(self) - 1
673 # Set ancestors to None to signal that every node is an ancestor.
673 # Set ancestors to None to signal that every node is an ancestor.
674 ancestors = None
674 ancestors = None
675 # Set heads to an empty dictionary for later discovery of heads
675 # Set heads to an empty dictionary for later discovery of heads
676 heads = {}
676 heads = {}
677 else:
677 else:
678 heads = list(heads)
678 heads = list(heads)
679 if not heads:
679 if not heads:
680 return nonodes
680 return nonodes
681 ancestors = {}
681 ancestors = {}
682 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Turn heads into a dictionary so we can remove 'fake' heads.
683 # Also, later we will be using it to filter out the heads we can't
683 # Also, later we will be using it to filter out the heads we can't
684 # find from roots.
684 # find from roots.
685 heads = dict.fromkeys(heads, 0)
685 heads = dict.fromkeys(heads, 0)
686 # Start at the top and keep marking parents until we're done.
686 # Start at the top and keep marking parents until we're done.
687 nodestotag = set(heads)
687 nodestotag = set(heads)
688 # Remember where the top was so we can use it as a limit later.
688 # Remember where the top was so we can use it as a limit later.
689 highestrev = max([self.rev(n) for n in nodestotag])
689 highestrev = max([self.rev(n) for n in nodestotag])
690 while nodestotag:
690 while nodestotag:
691 # grab a node to tag
691 # grab a node to tag
692 n = nodestotag.pop()
692 n = nodestotag.pop()
693 # Never tag nullid
693 # Never tag nullid
694 if n == nullid:
694 if n == nullid:
695 continue
695 continue
696 # A node's revision number represents its place in a
696 # A node's revision number represents its place in a
697 # topologically sorted list of nodes.
697 # topologically sorted list of nodes.
698 r = self.rev(n)
698 r = self.rev(n)
699 if r >= lowestrev:
699 if r >= lowestrev:
700 if n not in ancestors:
700 if n not in ancestors:
701 # If we are possibly a descendent of one of the roots
701 # If we are possibly a descendent of one of the roots
702 # and we haven't already been marked as an ancestor
702 # and we haven't already been marked as an ancestor
703 ancestors[n] = 1 # Mark as ancestor
703 ancestors[n] = 1 # Mark as ancestor
704 # Add non-nullid parents to list of nodes to tag.
704 # Add non-nullid parents to list of nodes to tag.
705 nodestotag.update([p for p in self.parents(n) if
705 nodestotag.update([p for p in self.parents(n) if
706 p != nullid])
706 p != nullid])
707 elif n in heads: # We've seen it before, is it a fake head?
707 elif n in heads: # We've seen it before, is it a fake head?
708 # So it is, real heads should not be the ancestors of
708 # So it is, real heads should not be the ancestors of
709 # any other heads.
709 # any other heads.
710 heads.pop(n)
710 heads.pop(n)
711 if not ancestors:
711 if not ancestors:
712 return nonodes
712 return nonodes
713 # Now that we have our set of ancestors, we want to remove any
713 # Now that we have our set of ancestors, we want to remove any
714 # roots that are not ancestors.
714 # roots that are not ancestors.
715
715
716 # If one of the roots was nullid, everything is included anyway.
716 # If one of the roots was nullid, everything is included anyway.
717 if lowestrev > nullrev:
717 if lowestrev > nullrev:
718 # But, since we weren't, let's recompute the lowest rev to not
718 # But, since we weren't, let's recompute the lowest rev to not
719 # include roots that aren't ancestors.
719 # include roots that aren't ancestors.
720
720
721 # Filter out roots that aren't ancestors of heads
721 # Filter out roots that aren't ancestors of heads
722 roots = [n for n in roots if n in ancestors]
722 roots = [n for n in roots if n in ancestors]
723 # Recompute the lowest revision
723 # Recompute the lowest revision
724 if roots:
724 if roots:
725 lowestrev = min([self.rev(n) for n in roots])
725 lowestrev = min([self.rev(n) for n in roots])
726 else:
726 else:
727 # No more roots? Return empty list
727 # No more roots? Return empty list
728 return nonodes
728 return nonodes
729 else:
729 else:
730 # We are descending from nullid, and don't need to care about
730 # We are descending from nullid, and don't need to care about
731 # any other roots.
731 # any other roots.
732 lowestrev = nullrev
732 lowestrev = nullrev
733 roots = [nullid]
733 roots = [nullid]
734 # Transform our roots list into a set.
734 # Transform our roots list into a set.
735 descendents = set(roots)
735 descendents = set(roots)
736 # Also, keep the original roots so we can filter out roots that aren't
736 # Also, keep the original roots so we can filter out roots that aren't
737 # 'real' roots (i.e. are descended from other roots).
737 # 'real' roots (i.e. are descended from other roots).
738 roots = descendents.copy()
738 roots = descendents.copy()
739 # Our topologically sorted list of output nodes.
739 # Our topologically sorted list of output nodes.
740 orderedout = []
740 orderedout = []
741 # Don't start at nullid since we don't want nullid in our output list,
741 # Don't start at nullid since we don't want nullid in our output list,
742 # and if nullid shows up in descedents, empty parents will look like
742 # and if nullid shows up in descedents, empty parents will look like
743 # they're descendents.
743 # they're descendents.
744 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 for r in xrange(max(lowestrev, 0), highestrev + 1):
745 n = self.node(r)
745 n = self.node(r)
746 isdescendent = False
746 isdescendent = False
747 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 if lowestrev == nullrev: # Everybody is a descendent of nullid
748 isdescendent = True
748 isdescendent = True
749 elif n in descendents:
749 elif n in descendents:
750 # n is already a descendent
750 # n is already a descendent
751 isdescendent = True
751 isdescendent = True
752 # This check only needs to be done here because all the roots
752 # This check only needs to be done here because all the roots
753 # will start being marked is descendents before the loop.
753 # will start being marked is descendents before the loop.
754 if n in roots:
754 if n in roots:
755 # If n was a root, check if it's a 'real' root.
755 # If n was a root, check if it's a 'real' root.
756 p = tuple(self.parents(n))
756 p = tuple(self.parents(n))
757 # If any of its parents are descendents, it's not a root.
757 # If any of its parents are descendents, it's not a root.
758 if (p[0] in descendents) or (p[1] in descendents):
758 if (p[0] in descendents) or (p[1] in descendents):
759 roots.remove(n)
759 roots.remove(n)
760 else:
760 else:
761 p = tuple(self.parents(n))
761 p = tuple(self.parents(n))
762 # A node is a descendent if either of its parents are
762 # A node is a descendent if either of its parents are
763 # descendents. (We seeded the dependents list with the roots
763 # descendents. (We seeded the dependents list with the roots
764 # up there, remember?)
764 # up there, remember?)
765 if (p[0] in descendents) or (p[1] in descendents):
765 if (p[0] in descendents) or (p[1] in descendents):
766 descendents.add(n)
766 descendents.add(n)
767 isdescendent = True
767 isdescendent = True
768 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 if isdescendent and ((ancestors is None) or (n in ancestors)):
769 # Only include nodes that are both descendents and ancestors.
769 # Only include nodes that are both descendents and ancestors.
770 orderedout.append(n)
770 orderedout.append(n)
771 if (ancestors is not None) and (n in heads):
771 if (ancestors is not None) and (n in heads):
772 # We're trying to figure out which heads are reachable
772 # We're trying to figure out which heads are reachable
773 # from roots.
773 # from roots.
774 # Mark this head as having been reached
774 # Mark this head as having been reached
775 heads[n] = 1
775 heads[n] = 1
776 elif ancestors is None:
776 elif ancestors is None:
777 # Otherwise, we're trying to discover the heads.
777 # Otherwise, we're trying to discover the heads.
778 # Assume this is a head because if it isn't, the next step
778 # Assume this is a head because if it isn't, the next step
779 # will eventually remove it.
779 # will eventually remove it.
780 heads[n] = 1
780 heads[n] = 1
781 # But, obviously its parents aren't.
781 # But, obviously its parents aren't.
782 for p in self.parents(n):
782 for p in self.parents(n):
783 heads.pop(p, None)
783 heads.pop(p, None)
784 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 heads = [n for n in heads.iterkeys() if heads[n] != 0]
785 roots = list(roots)
785 roots = list(roots)
786 assert orderedout
786 assert orderedout
787 assert roots
787 assert roots
788 assert heads
788 assert heads
789 return (orderedout, roots, heads)
789 return (orderedout, roots, heads)
790
790
791 def heads(self, start=None, stop=None):
791 def heads(self, start=None, stop=None):
792 """return the list of all nodes that have no children
792 """return the list of all nodes that have no children
793
793
794 if start is specified, only heads that are descendants of
794 if start is specified, only heads that are descendants of
795 start will be returned
795 start will be returned
796 if stop is specified, it will consider all the revs from stop
796 if stop is specified, it will consider all the revs from stop
797 as if they had no children
797 as if they had no children
798 """
798 """
799 if start is None and stop is None:
799 if start is None and stop is None:
800 count = len(self)
800 count = len(self)
801 if not count:
801 if not count:
802 return [nullid]
802 return [nullid]
803 ishead = [1] * (count + 1)
803 ishead = [1] * (count + 1)
804 index = self.index
804 index = self.index
805 for r in xrange(count):
805 for r in xrange(count):
806 e = index[r]
806 e = index[r]
807 ishead[e[5]] = ishead[e[6]] = 0
807 ishead[e[5]] = ishead[e[6]] = 0
808 return [self.node(r) for r in xrange(count) if ishead[r]]
808 return [self.node(r) for r in xrange(count) if ishead[r]]
809
809
810 if start is None:
810 if start is None:
811 start = nullid
811 start = nullid
812 if stop is None:
812 if stop is None:
813 stop = []
813 stop = []
814 stoprevs = set([self.rev(n) for n in stop])
814 stoprevs = set([self.rev(n) for n in stop])
815 startrev = self.rev(start)
815 startrev = self.rev(start)
816 reachable = {startrev: 1}
816 reachable = {startrev: 1}
817 heads = {startrev: 1}
817 heads = {startrev: 1}
818
818
819 parentrevs = self.parentrevs
819 parentrevs = self.parentrevs
820 for r in xrange(startrev + 1, len(self)):
820 for r in xrange(startrev + 1, len(self)):
821 for p in parentrevs(r):
821 for p in parentrevs(r):
822 if p in reachable:
822 if p in reachable:
823 if r not in stoprevs:
823 if r not in stoprevs:
824 reachable[r] = 1
824 reachable[r] = 1
825 heads[r] = 1
825 heads[r] = 1
826 if p in heads and p not in stoprevs:
826 if p in heads and p not in stoprevs:
827 del heads[p]
827 del heads[p]
828
828
829 return [self.node(r) for r in heads]
829 return [self.node(r) for r in heads]
830
830
831 def children(self, node):
831 def children(self, node):
832 """find the children of a given node"""
832 """find the children of a given node"""
833 c = []
833 c = []
834 p = self.rev(node)
834 p = self.rev(node)
835 for r in range(p + 1, len(self)):
835 for r in range(p + 1, len(self)):
836 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
837 if prevs:
837 if prevs:
838 for pr in prevs:
838 for pr in prevs:
839 if pr == p:
839 if pr == p:
840 c.append(self.node(r))
840 c.append(self.node(r))
841 elif p == nullrev:
841 elif p == nullrev:
842 c.append(self.node(r))
842 c.append(self.node(r))
843 return c
843 return c
844
844
845 def _match(self, id):
845 def _match(self, id):
846 if isinstance(id, (long, int)):
846 if isinstance(id, (long, int)):
847 # rev
847 # rev
848 return self.node(id)
848 return self.node(id)
849 if len(id) == 20:
849 if len(id) == 20:
850 # possibly a binary node
850 # possibly a binary node
851 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 # odds of a binary node being all hex in ASCII are 1 in 10**25
852 try:
852 try:
853 node = id
853 node = id
854 self.rev(node) # quick search the index
854 self.rev(node) # quick search the index
855 return node
855 return node
856 except LookupError:
856 except LookupError:
857 pass # may be partial hex id
857 pass # may be partial hex id
858 try:
858 try:
859 # str(rev)
859 # str(rev)
860 rev = int(id)
860 rev = int(id)
861 if str(rev) != id:
861 if str(rev) != id:
862 raise ValueError
862 raise ValueError
863 if rev < 0:
863 if rev < 0:
864 rev = len(self) + rev
864 rev = len(self) + rev
865 if rev < 0 or rev >= len(self):
865 if rev < 0 or rev >= len(self):
866 raise ValueError
866 raise ValueError
867 return self.node(rev)
867 return self.node(rev)
868 except (ValueError, OverflowError):
868 except (ValueError, OverflowError):
869 pass
869 pass
870 if len(id) == 40:
870 if len(id) == 40:
871 try:
871 try:
872 # a full hex nodeid?
872 # a full hex nodeid?
873 node = bin(id)
873 node = bin(id)
874 self.rev(node)
874 self.rev(node)
875 return node
875 return node
876 except (TypeError, LookupError):
876 except (TypeError, LookupError):
877 pass
877 pass
878
878
879 def _partialmatch(self, id):
879 def _partialmatch(self, id):
880 if len(id) < 40:
880 if len(id) < 40:
881 try:
881 try:
882 # hex(node)[:...]
882 # hex(node)[:...]
883 l = len(id) / 2 # grab an even number of digits
883 l = len(id) / 2 # grab an even number of digits
884 bin_id = bin(id[:l*2])
884 bin_id = bin(id[:l*2])
885 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in self.nodemap if n[:l] == bin_id]
886 nl = [n for n in nl if hex(n).startswith(id)]
886 nl = [n for n in nl if hex(n).startswith(id)]
887 if len(nl) > 0:
887 if len(nl) > 0:
888 if len(nl) == 1:
888 if len(nl) == 1:
889 return nl[0]
889 return nl[0]
890 raise LookupError(id, self.indexfile,
890 raise LookupError(id, self.indexfile,
891 _('ambiguous identifier'))
891 _('ambiguous identifier'))
892 return None
892 return None
893 except TypeError:
893 except TypeError:
894 pass
894 pass
895
895
896 def lookup(self, id):
896 def lookup(self, id):
897 """locate a node based on:
897 """locate a node based on:
898 - revision number or str(revision number)
898 - revision number or str(revision number)
899 - nodeid or subset of hex nodeid
899 - nodeid or subset of hex nodeid
900 """
900 """
901 n = self._match(id)
901 n = self._match(id)
902 if n is not None:
902 if n is not None:
903 return n
903 return n
904 n = self._partialmatch(id)
904 n = self._partialmatch(id)
905 if n:
905 if n:
906 return n
906 return n
907
907
908 raise LookupError(id, self.indexfile, _('no match found'))
908 raise LookupError(id, self.indexfile, _('no match found'))
909
909
910 def cmp(self, node, text):
910 def cmp(self, node, text):
911 """compare text with a given file revision"""
911 """compare text with a given file revision"""
912 p1, p2 = self.parents(node)
912 p1, p2 = self.parents(node)
913 return hash(text, p1, p2) != node
913 return hash(text, p1, p2) != node
914
914
915 def _addchunk(self, offset, data):
915 def _addchunk(self, offset, data):
916 o, d = self._chunkcache
916 o, d = self._chunkcache
917 # try to add to existing cache
917 # try to add to existing cache
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
919 self._chunkcache = o, d + data
919 self._chunkcache = o, d + data
920 else:
920 else:
921 self._chunkcache = offset, data
921 self._chunkcache = offset, data
922
922
923 def _loadchunk(self, offset, length, df=None):
923 def _loadchunk(self, offset, length, df=None):
924 if not df:
924 if not df:
925 if self._inline:
925 if self._inline:
926 df = self.opener(self.indexfile)
926 df = self.opener(self.indexfile)
927 else:
927 else:
928 df = self.opener(self.datafile)
928 df = self.opener(self.datafile)
929
929
930 readahead = max(65536, length)
930 readahead = max(65536, length)
931 df.seek(offset)
931 df.seek(offset)
932 d = df.read(readahead)
932 d = df.read(readahead)
933 self._addchunk(offset, d)
933 self._addchunk(offset, d)
934 if readahead > length:
934 if readahead > length:
935 return d[:length]
935 return d[:length]
936 return d
936 return d
937
937
938 def _getchunk(self, offset, length, df=None):
938 def _getchunk(self, offset, length, df=None):
939 o, d = self._chunkcache
939 o, d = self._chunkcache
940 l = len(d)
940 l = len(d)
941
941
942 # is it in the cache?
942 # is it in the cache?
943 cachestart = offset - o
943 cachestart = offset - o
944 cacheend = cachestart + length
944 cacheend = cachestart + length
945 if cachestart >= 0 and cacheend <= l:
945 if cachestart >= 0 and cacheend <= l:
946 if cachestart == 0 and cacheend == l:
946 if cachestart == 0 and cacheend == l:
947 return d # avoid a copy
947 return d # avoid a copy
948 return d[cachestart:cacheend]
948 return d[cachestart:cacheend]
949
949
950 return self._loadchunk(offset, length, df)
950 return self._loadchunk(offset, length, df)
951
951
952 def _prime(self, startrev, endrev, df):
953 start = self.start(startrev)
954 end = self.end(endrev)
955 if self._inline:
956 start += (startrev + 1) * self._io.size
957 end += (startrev + 1) * self._io.size
958 self._loadchunk(start, end - start, df)
959
952 def chunk(self, rev, df=None):
960 def chunk(self, rev, df=None):
953 start, length = self.start(rev), self.length(rev)
961 start, length = self.start(rev), self.length(rev)
954 if self._inline:
962 if self._inline:
955 start += (rev + 1) * self._io.size
963 start += (rev + 1) * self._io.size
956 return decompress(self._getchunk(start, length, df))
964 return decompress(self._getchunk(start, length, df))
957
965
958 def revdiff(self, rev1, rev2):
966 def revdiff(self, rev1, rev2):
959 """return or calculate a delta between two revisions"""
967 """return or calculate a delta between two revisions"""
960 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
961 return self.chunk(rev2)
969 return self.chunk(rev2)
962
970
963 return mdiff.textdiff(self.revision(self.node(rev1)),
971 return mdiff.textdiff(self.revision(self.node(rev1)),
964 self.revision(self.node(rev2)))
972 self.revision(self.node(rev2)))
965
973
966 def revision(self, node):
974 def revision(self, node):
967 """return an uncompressed revision of a given node"""
975 """return an uncompressed revision of a given node"""
968 if node == nullid:
976 if node == nullid:
969 return ""
977 return ""
970 if self._cache and self._cache[0] == node:
978 if self._cache and self._cache[0] == node:
971 return str(self._cache[2])
979 return str(self._cache[2])
972
980
973 # look up what we need to read
981 # look up what we need to read
974 text = None
982 text = None
975 rev = self.rev(node)
983 rev = self.rev(node)
976 base = self.base(rev)
984 base = self.base(rev)
977
985
978 # check rev flags
986 # check rev flags
979 if self.index[rev][0] & 0xFFFF:
987 if self.index[rev][0] & 0xFFFF:
980 raise RevlogError(_('incompatible revision flag %x') %
988 raise RevlogError(_('incompatible revision flag %x') %
981 (self.index[rev][0] & 0xFFFF))
989 (self.index[rev][0] & 0xFFFF))
982
990
983 df = None
991 df = None
984
992
985 # do we have useful data cached?
993 # do we have useful data cached?
986 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
987 base = self._cache[1]
995 base = self._cache[1]
988 text = str(self._cache[2])
996 text = str(self._cache[2])
989 self._loadindex(base, rev + 1)
997 self._loadindex(base, rev + 1)
990 if not self._inline and rev > base + 1:
998 if not self._inline and rev > base + 1:
991 df = self.opener(self.datafile)
999 df = self.opener(self.datafile)
1000 self._prime(base, rev, df)
992 else:
1001 else:
993 self._loadindex(base, rev + 1)
1002 self._loadindex(base, rev + 1)
994 if not self._inline and rev > base:
1003 if not self._inline and rev > base:
995 df = self.opener(self.datafile)
1004 df = self.opener(self.datafile)
1005 self._prime(base, rev, df)
996 text = self.chunk(base, df=df)
1006 text = self.chunk(base, df=df)
997
1007
998 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
999 text = mdiff.patches(text, bins)
1009 text = mdiff.patches(text, bins)
1000 p1, p2 = self.parents(node)
1010 p1, p2 = self.parents(node)
1001 if node != hash(text, p1, p2):
1011 if node != hash(text, p1, p2):
1002 raise RevlogError(_("integrity check failed on %s:%d")
1012 raise RevlogError(_("integrity check failed on %s:%d")
1003 % (self.datafile, rev))
1013 % (self.datafile, rev))
1004
1014
1005 self._cache = (node, rev, text)
1015 self._cache = (node, rev, text)
1006 return text
1016 return text
1007
1017
1008 def checkinlinesize(self, tr, fp=None):
1018 def checkinlinesize(self, tr, fp=None):
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1010 return
1020 return
1011
1021
1012 trinfo = tr.find(self.indexfile)
1022 trinfo = tr.find(self.indexfile)
1013 if trinfo == None:
1023 if trinfo == None:
1014 raise RevlogError(_("%s not found in the transaction")
1024 raise RevlogError(_("%s not found in the transaction")
1015 % self.indexfile)
1025 % self.indexfile)
1016
1026
1017 trindex = trinfo[2]
1027 trindex = trinfo[2]
1018 dataoff = self.start(trindex)
1028 dataoff = self.start(trindex)
1019
1029
1020 tr.add(self.datafile, dataoff)
1030 tr.add(self.datafile, dataoff)
1021
1031
1022 if fp:
1032 if fp:
1023 fp.flush()
1033 fp.flush()
1024 fp.close()
1034 fp.close()
1025
1035
1026 df = self.opener(self.datafile, 'w')
1036 df = self.opener(self.datafile, 'w')
1027 try:
1037 try:
1028 calc = self._io.size
1038 calc = self._io.size
1029 for r in self:
1039 for r in self:
1030 start = self.start(r) + (r + 1) * calc
1040 start = self.start(r) + (r + 1) * calc
1031 length = self.length(r)
1041 length = self.length(r)
1032 d = self._getchunk(start, length)
1042 d = self._getchunk(start, length)
1033 df.write(d)
1043 df.write(d)
1034 finally:
1044 finally:
1035 df.close()
1045 df.close()
1036
1046
1037 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1038 self.version &= ~(REVLOGNGINLINEDATA)
1048 self.version &= ~(REVLOGNGINLINEDATA)
1039 self._inline = False
1049 self._inline = False
1040 for i in self:
1050 for i in self:
1041 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 e = self._io.packentry(self.index[i], self.node, self.version, i)
1042 fp.write(e)
1052 fp.write(e)
1043
1053
1044 # if we don't call rename, the temp file will never replace the
1054 # if we don't call rename, the temp file will never replace the
1045 # real index
1055 # real index
1046 fp.rename()
1056 fp.rename()
1047
1057
1048 tr.replace(self.indexfile, trindex * calc)
1058 tr.replace(self.indexfile, trindex * calc)
1049 self._chunkcache = (0, '')
1059 self._chunkcache = (0, '')
1050
1060
1051 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 def addrevision(self, text, transaction, link, p1, p2, d=None):
1052 """add a revision to the log
1062 """add a revision to the log
1053
1063
1054 text - the revision data to add
1064 text - the revision data to add
1055 transaction - the transaction object used for rollback
1065 transaction - the transaction object used for rollback
1056 link - the linkrev data to add
1066 link - the linkrev data to add
1057 p1, p2 - the parent nodeids of the revision
1067 p1, p2 - the parent nodeids of the revision
1058 d - an optional precomputed delta
1068 d - an optional precomputed delta
1059 """
1069 """
1060 dfh = None
1070 dfh = None
1061 if not self._inline:
1071 if not self._inline:
1062 dfh = self.opener(self.datafile, "a")
1072 dfh = self.opener(self.datafile, "a")
1063 ifh = self.opener(self.indexfile, "a+")
1073 ifh = self.opener(self.indexfile, "a+")
1064 try:
1074 try:
1065 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1066 finally:
1076 finally:
1067 if dfh:
1077 if dfh:
1068 dfh.close()
1078 dfh.close()
1069 ifh.close()
1079 ifh.close()
1070
1080
1071 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1072 node = hash(text, p1, p2)
1082 node = hash(text, p1, p2)
1073 if node in self.nodemap:
1083 if node in self.nodemap:
1074 return node
1084 return node
1075
1085
1076 curr = len(self)
1086 curr = len(self)
1077 prev = curr - 1
1087 prev = curr - 1
1078 base = self.base(prev)
1088 base = self.base(prev)
1079 offset = self.end(prev)
1089 offset = self.end(prev)
1080
1090
1081 if curr:
1091 if curr:
1082 if not d:
1092 if not d:
1083 ptext = self.revision(self.node(prev))
1093 ptext = self.revision(self.node(prev))
1084 d = mdiff.textdiff(ptext, text)
1094 d = mdiff.textdiff(ptext, text)
1085 data = compress(d)
1095 data = compress(d)
1086 l = len(data[1]) + len(data[0])
1096 l = len(data[1]) + len(data[0])
1087 dist = l + offset - self.start(base)
1097 dist = l + offset - self.start(base)
1088
1098
1089 # full versions are inserted when the needed deltas
1099 # full versions are inserted when the needed deltas
1090 # become comparable to the uncompressed text
1100 # become comparable to the uncompressed text
1091 if not curr or dist > len(text) * 2:
1101 if not curr or dist > len(text) * 2:
1092 data = compress(text)
1102 data = compress(text)
1093 l = len(data[1]) + len(data[0])
1103 l = len(data[1]) + len(data[0])
1094 base = curr
1104 base = curr
1095
1105
1096 e = (offset_type(offset, 0), l, len(text),
1106 e = (offset_type(offset, 0), l, len(text),
1097 base, link, self.rev(p1), self.rev(p2), node)
1107 base, link, self.rev(p1), self.rev(p2), node)
1098 self.index.insert(-1, e)
1108 self.index.insert(-1, e)
1099 self.nodemap[node] = curr
1109 self.nodemap[node] = curr
1100
1110
1101 entry = self._io.packentry(e, self.node, self.version, curr)
1111 entry = self._io.packentry(e, self.node, self.version, curr)
1102 if not self._inline:
1112 if not self._inline:
1103 transaction.add(self.datafile, offset)
1113 transaction.add(self.datafile, offset)
1104 transaction.add(self.indexfile, curr * len(entry))
1114 transaction.add(self.indexfile, curr * len(entry))
1105 if data[0]:
1115 if data[0]:
1106 dfh.write(data[0])
1116 dfh.write(data[0])
1107 dfh.write(data[1])
1117 dfh.write(data[1])
1108 dfh.flush()
1118 dfh.flush()
1109 ifh.write(entry)
1119 ifh.write(entry)
1110 else:
1120 else:
1111 offset += curr * self._io.size
1121 offset += curr * self._io.size
1112 transaction.add(self.indexfile, offset, curr)
1122 transaction.add(self.indexfile, offset, curr)
1113 ifh.write(entry)
1123 ifh.write(entry)
1114 ifh.write(data[0])
1124 ifh.write(data[0])
1115 ifh.write(data[1])
1125 ifh.write(data[1])
1116 self.checkinlinesize(transaction, ifh)
1126 self.checkinlinesize(transaction, ifh)
1117
1127
1118 self._cache = (node, curr, text)
1128 self._cache = (node, curr, text)
1119 return node
1129 return node
1120
1130
1121 def ancestor(self, a, b):
1131 def ancestor(self, a, b):
1122 """calculate the least common ancestor of nodes a and b"""
1132 """calculate the least common ancestor of nodes a and b"""
1123
1133
1124 def parents(rev):
1134 def parents(rev):
1125 return [p for p in self.parentrevs(rev) if p != nullrev]
1135 return [p for p in self.parentrevs(rev) if p != nullrev]
1126
1136
1127 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1128 if c is None:
1138 if c is None:
1129 return nullid
1139 return nullid
1130
1140
1131 return self.node(c)
1141 return self.node(c)
1132
1142
1133 def group(self, nodelist, lookup, infocollect=None):
1143 def group(self, nodelist, lookup, infocollect=None):
1134 """calculate a delta group
1144 """calculate a delta group
1135
1145
1136 Given a list of changeset revs, return a set of deltas and
1146 Given a list of changeset revs, return a set of deltas and
1137 metadata corresponding to nodes. the first delta is
1147 metadata corresponding to nodes. the first delta is
1138 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1139 have this parent as it has all history before these
1149 have this parent as it has all history before these
1140 changesets. parent is parent[0]
1150 changesets. parent is parent[0]
1141 """
1151 """
1142 revs = [self.rev(n) for n in nodelist]
1152 revs = [self.rev(n) for n in nodelist]
1143
1153
1144 # if we don't have any revisions touched by these changesets, bail
1154 # if we don't have any revisions touched by these changesets, bail
1145 if not revs:
1155 if not revs:
1146 yield changegroup.closechunk()
1156 yield changegroup.closechunk()
1147 return
1157 return
1148
1158
1149 # add the parent of the first rev
1159 # add the parent of the first rev
1150 p = self.parents(self.node(revs[0]))[0]
1160 p = self.parents(self.node(revs[0]))[0]
1151 revs.insert(0, self.rev(p))
1161 revs.insert(0, self.rev(p))
1152
1162
1153 # build deltas
1163 # build deltas
1154 for d in xrange(0, len(revs) - 1):
1164 for d in xrange(0, len(revs) - 1):
1155 a, b = revs[d], revs[d + 1]
1165 a, b = revs[d], revs[d + 1]
1156 nb = self.node(b)
1166 nb = self.node(b)
1157
1167
1158 if infocollect is not None:
1168 if infocollect is not None:
1159 infocollect(nb)
1169 infocollect(nb)
1160
1170
1161 p = self.parents(nb)
1171 p = self.parents(nb)
1162 meta = nb + p[0] + p[1] + lookup(nb)
1172 meta = nb + p[0] + p[1] + lookup(nb)
1163 if a == -1:
1173 if a == -1:
1164 d = self.revision(nb)
1174 d = self.revision(nb)
1165 meta += mdiff.trivialdiffheader(len(d))
1175 meta += mdiff.trivialdiffheader(len(d))
1166 else:
1176 else:
1167 d = self.revdiff(a, b)
1177 d = self.revdiff(a, b)
1168 yield changegroup.chunkheader(len(meta) + len(d))
1178 yield changegroup.chunkheader(len(meta) + len(d))
1169 yield meta
1179 yield meta
1170 if len(d) > 2**20:
1180 if len(d) > 2**20:
1171 pos = 0
1181 pos = 0
1172 while pos < len(d):
1182 while pos < len(d):
1173 pos2 = pos + 2 ** 18
1183 pos2 = pos + 2 ** 18
1174 yield d[pos:pos2]
1184 yield d[pos:pos2]
1175 pos = pos2
1185 pos = pos2
1176 else:
1186 else:
1177 yield d
1187 yield d
1178
1188
1179 yield changegroup.closechunk()
1189 yield changegroup.closechunk()
1180
1190
1181 def addgroup(self, revs, linkmapper, transaction):
1191 def addgroup(self, revs, linkmapper, transaction):
1182 """
1192 """
1183 add a delta group
1193 add a delta group
1184
1194
1185 given a set of deltas, add them to the revision log. the
1195 given a set of deltas, add them to the revision log. the
1186 first delta is against its parent, which should be in our
1196 first delta is against its parent, which should be in our
1187 log, the rest are against the previous delta.
1197 log, the rest are against the previous delta.
1188 """
1198 """
1189
1199
1190 #track the base of the current delta log
1200 #track the base of the current delta log
1191 r = len(self)
1201 r = len(self)
1192 t = r - 1
1202 t = r - 1
1193 node = None
1203 node = None
1194
1204
1195 base = prev = nullrev
1205 base = prev = nullrev
1196 start = end = textlen = 0
1206 start = end = textlen = 0
1197 if r:
1207 if r:
1198 end = self.end(t)
1208 end = self.end(t)
1199
1209
1200 ifh = self.opener(self.indexfile, "a+")
1210 ifh = self.opener(self.indexfile, "a+")
1201 isize = r * self._io.size
1211 isize = r * self._io.size
1202 if self._inline:
1212 if self._inline:
1203 transaction.add(self.indexfile, end + isize, r)
1213 transaction.add(self.indexfile, end + isize, r)
1204 dfh = None
1214 dfh = None
1205 else:
1215 else:
1206 transaction.add(self.indexfile, isize, r)
1216 transaction.add(self.indexfile, isize, r)
1207 transaction.add(self.datafile, end)
1217 transaction.add(self.datafile, end)
1208 dfh = self.opener(self.datafile, "a")
1218 dfh = self.opener(self.datafile, "a")
1209
1219
1210 try:
1220 try:
1211 # loop through our set of deltas
1221 # loop through our set of deltas
1212 chain = None
1222 chain = None
1213 for chunk in revs:
1223 for chunk in revs:
1214 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1224 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1215 link = linkmapper(cs)
1225 link = linkmapper(cs)
1216 if node in self.nodemap:
1226 if node in self.nodemap:
1217 # this can happen if two branches make the same change
1227 # this can happen if two branches make the same change
1218 chain = node
1228 chain = node
1219 continue
1229 continue
1220 delta = buffer(chunk, 80)
1230 delta = buffer(chunk, 80)
1221 del chunk
1231 del chunk
1222
1232
1223 for p in (p1, p2):
1233 for p in (p1, p2):
1224 if not p in self.nodemap:
1234 if not p in self.nodemap:
1225 raise LookupError(p, self.indexfile, _('unknown parent'))
1235 raise LookupError(p, self.indexfile, _('unknown parent'))
1226
1236
1227 if not chain:
1237 if not chain:
1228 # retrieve the parent revision of the delta chain
1238 # retrieve the parent revision of the delta chain
1229 chain = p1
1239 chain = p1
1230 if not chain in self.nodemap:
1240 if not chain in self.nodemap:
1231 raise LookupError(chain, self.indexfile, _('unknown base'))
1241 raise LookupError(chain, self.indexfile, _('unknown base'))
1232
1242
1233 # full versions are inserted when the needed deltas become
1243 # full versions are inserted when the needed deltas become
1234 # comparable to the uncompressed text or when the previous
1244 # comparable to the uncompressed text or when the previous
1235 # version is not the one we have a delta against. We use
1245 # version is not the one we have a delta against. We use
1236 # the size of the previous full rev as a proxy for the
1246 # the size of the previous full rev as a proxy for the
1237 # current size.
1247 # current size.
1238
1248
1239 if chain == prev:
1249 if chain == prev:
1240 cdelta = compress(delta)
1250 cdelta = compress(delta)
1241 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1251 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1242 textlen = mdiff.patchedsize(textlen, delta)
1252 textlen = mdiff.patchedsize(textlen, delta)
1243
1253
1244 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1254 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1245 # flush our writes here so we can read it in revision
1255 # flush our writes here so we can read it in revision
1246 if dfh:
1256 if dfh:
1247 dfh.flush()
1257 dfh.flush()
1248 ifh.flush()
1258 ifh.flush()
1249 text = self.revision(chain)
1259 text = self.revision(chain)
1250 if len(text) == 0:
1260 if len(text) == 0:
1251 # skip over trivial delta header
1261 # skip over trivial delta header
1252 text = buffer(delta, 12)
1262 text = buffer(delta, 12)
1253 else:
1263 else:
1254 text = mdiff.patches(text, [delta])
1264 text = mdiff.patches(text, [delta])
1255 del delta
1265 del delta
1256 chk = self._addrevision(text, transaction, link, p1, p2, None,
1266 chk = self._addrevision(text, transaction, link, p1, p2, None,
1257 ifh, dfh)
1267 ifh, dfh)
1258 if not dfh and not self._inline:
1268 if not dfh and not self._inline:
1259 # addrevision switched from inline to conventional
1269 # addrevision switched from inline to conventional
1260 # reopen the index
1270 # reopen the index
1261 dfh = self.opener(self.datafile, "a")
1271 dfh = self.opener(self.datafile, "a")
1262 ifh = self.opener(self.indexfile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1263 if chk != node:
1273 if chk != node:
1264 raise RevlogError(_("consistency error adding group"))
1274 raise RevlogError(_("consistency error adding group"))
1265 textlen = len(text)
1275 textlen = len(text)
1266 else:
1276 else:
1267 e = (offset_type(end, 0), cdeltalen, textlen, base,
1277 e = (offset_type(end, 0), cdeltalen, textlen, base,
1268 link, self.rev(p1), self.rev(p2), node)
1278 link, self.rev(p1), self.rev(p2), node)
1269 self.index.insert(-1, e)
1279 self.index.insert(-1, e)
1270 self.nodemap[node] = r
1280 self.nodemap[node] = r
1271 entry = self._io.packentry(e, self.node, self.version, r)
1281 entry = self._io.packentry(e, self.node, self.version, r)
1272 if self._inline:
1282 if self._inline:
1273 ifh.write(entry)
1283 ifh.write(entry)
1274 ifh.write(cdelta[0])
1284 ifh.write(cdelta[0])
1275 ifh.write(cdelta[1])
1285 ifh.write(cdelta[1])
1276 self.checkinlinesize(transaction, ifh)
1286 self.checkinlinesize(transaction, ifh)
1277 if not self._inline:
1287 if not self._inline:
1278 dfh = self.opener(self.datafile, "a")
1288 dfh = self.opener(self.datafile, "a")
1279 ifh = self.opener(self.indexfile, "a")
1289 ifh = self.opener(self.indexfile, "a")
1280 else:
1290 else:
1281 dfh.write(cdelta[0])
1291 dfh.write(cdelta[0])
1282 dfh.write(cdelta[1])
1292 dfh.write(cdelta[1])
1283 ifh.write(entry)
1293 ifh.write(entry)
1284
1294
1285 t, r, chain, prev = r, r + 1, node, node
1295 t, r, chain, prev = r, r + 1, node, node
1286 base = self.base(t)
1296 base = self.base(t)
1287 start = self.start(base)
1297 start = self.start(base)
1288 end = self.end(t)
1298 end = self.end(t)
1289 finally:
1299 finally:
1290 if dfh:
1300 if dfh:
1291 dfh.close()
1301 dfh.close()
1292 ifh.close()
1302 ifh.close()
1293
1303
1294 return node
1304 return node
1295
1305
1296 def strip(self, minlink, transaction):
1306 def strip(self, minlink, transaction):
1297 """truncate the revlog on the first revision with a linkrev >= minlink
1307 """truncate the revlog on the first revision with a linkrev >= minlink
1298
1308
1299 This function is called when we're stripping revision minlink and
1309 This function is called when we're stripping revision minlink and
1300 its descendants from the repository.
1310 its descendants from the repository.
1301
1311
1302 We have to remove all revisions with linkrev >= minlink, because
1312 We have to remove all revisions with linkrev >= minlink, because
1303 the equivalent changelog revisions will be renumbered after the
1313 the equivalent changelog revisions will be renumbered after the
1304 strip.
1314 strip.
1305
1315
1306 So we truncate the revlog on the first of these revisions, and
1316 So we truncate the revlog on the first of these revisions, and
1307 trust that the caller has saved the revisions that shouldn't be
1317 trust that the caller has saved the revisions that shouldn't be
1308 removed and that it'll readd them after this truncation.
1318 removed and that it'll readd them after this truncation.
1309 """
1319 """
1310 if len(self) == 0:
1320 if len(self) == 0:
1311 return
1321 return
1312
1322
1313 if isinstance(self.index, lazyindex):
1323 if isinstance(self.index, lazyindex):
1314 self._loadindexmap()
1324 self._loadindexmap()
1315
1325
1316 for rev in self:
1326 for rev in self:
1317 if self.index[rev][4] >= minlink:
1327 if self.index[rev][4] >= minlink:
1318 break
1328 break
1319 else:
1329 else:
1320 return
1330 return
1321
1331
1322 # first truncate the files on disk
1332 # first truncate the files on disk
1323 end = self.start(rev)
1333 end = self.start(rev)
1324 if not self._inline:
1334 if not self._inline:
1325 transaction.add(self.datafile, end)
1335 transaction.add(self.datafile, end)
1326 end = rev * self._io.size
1336 end = rev * self._io.size
1327 else:
1337 else:
1328 end += rev * self._io.size
1338 end += rev * self._io.size
1329
1339
1330 transaction.add(self.indexfile, end)
1340 transaction.add(self.indexfile, end)
1331
1341
1332 # then reset internal state in memory to forget those revisions
1342 # then reset internal state in memory to forget those revisions
1333 self._cache = None
1343 self._cache = None
1334 self._chunkcache = (0, '')
1344 self._chunkcache = (0, '')
1335 for x in xrange(rev, len(self)):
1345 for x in xrange(rev, len(self)):
1336 del self.nodemap[self.node(x)]
1346 del self.nodemap[self.node(x)]
1337
1347
1338 del self.index[rev:-1]
1348 del self.index[rev:-1]
1339
1349
1340 def checksize(self):
1350 def checksize(self):
1341 expected = 0
1351 expected = 0
1342 if len(self):
1352 if len(self):
1343 expected = max(0, self.end(len(self) - 1))
1353 expected = max(0, self.end(len(self) - 1))
1344
1354
1345 try:
1355 try:
1346 f = self.opener(self.datafile)
1356 f = self.opener(self.datafile)
1347 f.seek(0, 2)
1357 f.seek(0, 2)
1348 actual = f.tell()
1358 actual = f.tell()
1349 dd = actual - expected
1359 dd = actual - expected
1350 except IOError, inst:
1360 except IOError, inst:
1351 if inst.errno != errno.ENOENT:
1361 if inst.errno != errno.ENOENT:
1352 raise
1362 raise
1353 dd = 0
1363 dd = 0
1354
1364
1355 try:
1365 try:
1356 f = self.opener(self.indexfile)
1366 f = self.opener(self.indexfile)
1357 f.seek(0, 2)
1367 f.seek(0, 2)
1358 actual = f.tell()
1368 actual = f.tell()
1359 s = self._io.size
1369 s = self._io.size
1360 i = max(0, actual / s)
1370 i = max(0, actual / s)
1361 di = actual - (i * s)
1371 di = actual - (i * s)
1362 if self._inline:
1372 if self._inline:
1363 databytes = 0
1373 databytes = 0
1364 for r in self:
1374 for r in self:
1365 databytes += max(0, self.length(r))
1375 databytes += max(0, self.length(r))
1366 dd = 0
1376 dd = 0
1367 di = actual - len(self) * s - databytes
1377 di = actual - len(self) * s - databytes
1368 except IOError, inst:
1378 except IOError, inst:
1369 if inst.errno != errno.ENOENT:
1379 if inst.errno != errno.ENOENT:
1370 raise
1380 raise
1371 di = 0
1381 di = 0
1372
1382
1373 return (dd, di)
1383 return (dd, di)
1374
1384
1375 def files(self):
1385 def files(self):
1376 res = [ self.indexfile ]
1386 res = [ self.indexfile ]
1377 if not self._inline:
1387 if not self._inline:
1378 res.append(self.datafile)
1388 res.append(self.datafile)
1379 return res
1389 return res
General Comments 0
You need to be logged in to leave comments. Login now