##// END OF EJS Templates
revlog: report indexfile rather than datafile for integrity check
Matt Mackall -
r8643:648af8a6 default
parent child Browse files
Show More
@@ -1,1389 +1,1389 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf):
122 def __init__(self, dataf):
123 try:
123 try:
124 size = util.fstat(dataf).st_size
124 size = util.fstat(dataf).st_size
125 except AttributeError:
125 except AttributeError:
126 size = 0
126 size = 0
127 self.dataf = dataf
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
129 self.datasize = size
130 self.l = size/self.s
130 self.l = size/self.s
131 self.index = [None] * self.l
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
132 self.map = {nullid: nullrev}
133 self.allmap = 0
133 self.allmap = 0
134 self.all = 0
134 self.all = 0
135 self.mapfind_count = 0
135 self.mapfind_count = 0
136
136
137 def loadmap(self):
137 def loadmap(self):
138 """
138 """
139 during a commit, we need to make sure the rev being added is
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
142 which takes much less time.
143 """
143 """
144 if self.allmap:
144 if self.allmap:
145 return
145 return
146 end = self.datasize
146 end = self.datasize
147 self.allmap = 1
147 self.allmap = 1
148 cur = 0
148 cur = 0
149 count = 0
149 count = 0
150 blocksize = self.s * 256
150 blocksize = self.s * 256
151 self.dataf.seek(0)
151 self.dataf.seek(0)
152 while cur < end:
152 while cur < end:
153 data = self.dataf.read(blocksize)
153 data = self.dataf.read(blocksize)
154 off = 0
154 off = 0
155 for x in xrange(256):
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
157 self.map[n] = count
158 count += 1
158 count += 1
159 if count >= self.l:
159 if count >= self.l:
160 break
160 break
161 off += self.s
161 off += self.s
162 cur += blocksize
162 cur += blocksize
163
163
164 def loadblock(self, blockstart, blocksize, data=None):
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
165 if self.all:
166 return
166 return
167 if data is None:
167 if data is None:
168 self.dataf.seek(blockstart)
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
175 lend = len(data) / self.s
176 i = blockstart / self.s
176 i = blockstart / self.s
177 off = 0
177 off = 0
178 # lazyindex supports __delitem__
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
180 lend = len(self.index) - i
181 for x in xrange(lend):
181 for x in xrange(lend):
182 if self.index[i + x] is None:
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
183 b = data[off : off + self.s]
184 self.index[i + x] = b
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
186 self.map[n] = i + x
187 off += self.s
187 off += self.s
188
188
189 def findnode(self, node):
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
191 if self.allmap:
192 return None
192 return None
193
193
194 # hg log will cause many many searches for the manifest
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
195 # nodes. After we get called a few times, just load the whole
196 # thing.
196 # thing.
197 if self.mapfind_count > 8:
197 if self.mapfind_count > 8:
198 self.loadmap()
198 self.loadmap()
199 if node in self.map:
199 if node in self.map:
200 return node
200 return node
201 return None
201 return None
202 self.mapfind_count += 1
202 self.mapfind_count += 1
203 last = self.l - 1
203 last = self.l - 1
204 while self.index[last] != None:
204 while self.index[last] != None:
205 if last == 0:
205 if last == 0:
206 self.all = 1
206 self.all = 1
207 self.allmap = 1
207 self.allmap = 1
208 return None
208 return None
209 last -= 1
209 last -= 1
210 end = (last + 1) * self.s
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
211 blocksize = self.s * 256
212 while end >= 0:
212 while end >= 0:
213 start = max(end - blocksize, 0)
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
215 data = self.dataf.read(end - start)
216 findend = end - start
216 findend = end - start
217 while True:
217 while True:
218 # we're searching backwards, so we have to make sure
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
220 off = data.find(node, 0, findend)
221 findend = off
221 findend = off
222 if off >= 0:
222 if off >= 0:
223 i = off / self.s
223 i = off / self.s
224 off = i * self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
226 if n == node:
227 self.map[n] = i + start / self.s
227 self.map[n] = i + start / self.s
228 return node
228 return node
229 else:
229 else:
230 break
230 break
231 end -= blocksize
231 end -= blocksize
232 return None
232 return None
233
233
234 def loadindex(self, i=None, end=None):
234 def loadindex(self, i=None, end=None):
235 if self.all:
235 if self.all:
236 return
236 return
237 all = False
237 all = False
238 if i is None:
238 if i is None:
239 blockstart = 0
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
241 end = self.datasize
242 all = True
242 all = True
243 else:
243 else:
244 if end:
244 if end:
245 blockstart = i * self.s
245 blockstart = i * self.s
246 end = end * self.s
246 end = end * self.s
247 blocksize = end - blockstart
247 blocksize = end - blockstart
248 else:
248 else:
249 blockstart = (i & ~1023) * self.s
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
251 end = blockstart + blocksize
252 while blockstart < end:
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
254 blockstart += blocksize
255 if all:
255 if all:
256 self.all = True
256 self.all = True
257
257
258 class lazyindex(object):
258 class lazyindex(object):
259 """a lazy version of the index array"""
259 """a lazy version of the index array"""
260 def __init__(self, parser):
260 def __init__(self, parser):
261 self.p = parser
261 self.p = parser
262 def __len__(self):
262 def __len__(self):
263 return len(self.p.index)
263 return len(self.p.index)
264 def load(self, pos):
264 def load(self, pos):
265 if pos < 0:
265 if pos < 0:
266 pos += len(self.p.index)
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
268 return self.p.index[pos]
269 def __getitem__(self, pos):
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
273 def __delitem__(self, pos):
274 del self.p.index[pos]
274 del self.p.index[pos]
275 def insert(self, pos, e):
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
278 self.p.index.append(_pack(indexformatng, *e))
279
279
280 class lazymap(object):
280 class lazymap(object):
281 """a lazy version of the node map"""
281 """a lazy version of the node map"""
282 def __init__(self, parser):
282 def __init__(self, parser):
283 self.p = parser
283 self.p = parser
284 def load(self, key):
284 def load(self, key):
285 n = self.p.findnode(key)
285 n = self.p.findnode(key)
286 if n is None:
286 if n is None:
287 raise KeyError(key)
287 raise KeyError(key)
288 def __contains__(self, key):
288 def __contains__(self, key):
289 if key in self.p.map:
289 if key in self.p.map:
290 return True
290 return True
291 self.p.loadmap()
291 self.p.loadmap()
292 return key in self.p.map
292 return key in self.p.map
293 def __iter__(self):
293 def __iter__(self):
294 yield nullid
294 yield nullid
295 for i in xrange(self.p.l):
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
296 ret = self.p.index[i]
297 if not ret:
297 if not ret:
298 self.p.loadindex(i)
298 self.p.loadindex(i)
299 ret = self.p.index[i]
299 ret = self.p.index[i]
300 if isinstance(ret, str):
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
302 yield ret[7]
303 def __getitem__(self, key):
303 def __getitem__(self, key):
304 try:
304 try:
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 try:
307 try:
308 self.load(key)
308 self.load(key)
309 return self.p.map[key]
309 return self.p.map[key]
310 except KeyError:
310 except KeyError:
311 raise KeyError("node " + hex(key))
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
313 self.p.map[key] = val
314 def __delitem__(self, key):
314 def __delitem__(self, key):
315 del self.p.map[key]
315 del self.p.map[key]
316
316
317 indexformatv0 = ">4l20s20s20s"
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
318 v0shaoffset = 56
319
319
320 class revlogoldio(object):
320 class revlogoldio(object):
321 def __init__(self):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
322 self.size = struct.calcsize(indexformatv0)
323
323
324 def parseindex(self, fp, data, inline):
324 def parseindex(self, fp, data, inline):
325 s = self.size
325 s = self.size
326 index = []
326 index = []
327 nodemap = {nullid: nullrev}
327 nodemap = {nullid: nullrev}
328 n = off = 0
328 n = off = 0
329 if len(data) == _prereadsize:
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
330 data += fp.read() # read the rest
331 l = len(data)
331 l = len(data)
332 while off + s <= l:
332 while off + s <= l:
333 cur = data[off:off + s]
333 cur = data[off:off + s]
334 off += s
334 off += s
335 e = _unpack(indexformatv0, cur)
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
339 index.append(e2)
340 nodemap[e[6]] = n
340 nodemap[e[6]] = n
341 n += 1
341 n += 1
342
342
343 return index, nodemap, None
343 return index, nodemap, None
344
344
345 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
347 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
348 return _pack(indexformatv0, *e2)
349
349
350 # index ng:
350 # index ng:
351 # 6 bytes offset
351 # 6 bytes offset
352 # 2 bytes flags
352 # 2 bytes flags
353 # 4 bytes compressed length
353 # 4 bytes compressed length
354 # 4 bytes uncompressed length
354 # 4 bytes uncompressed length
355 # 4 bytes: base rev
355 # 4 bytes: base rev
356 # 4 bytes link rev
356 # 4 bytes link rev
357 # 4 bytes parent 1 rev
357 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
358 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
359 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
360 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
361 ngshaoffset = 32
362 versionformat = ">I"
362 versionformat = ">I"
363
363
364 class revlogio(object):
364 class revlogio(object):
365 def __init__(self):
365 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
366 self.size = struct.calcsize(indexformatng)
367
367
368 def parseindex(self, fp, data, inline):
368 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
369 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
370 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
371 # big index, let's parse it on demand
372 parser = lazyparser(fp)
372 parser = lazyparser(fp)
373 index = lazyindex(parser)
373 index = lazyindex(parser)
374 nodemap = lazymap(parser)
374 nodemap = lazymap(parser)
375 e = list(index[0])
375 e = list(index[0])
376 type = gettype(e[0])
376 type = gettype(e[0])
377 e[0] = offset_type(0, type)
377 e[0] = offset_type(0, type)
378 index[0] = e
378 index[0] = e
379 return index, nodemap, None
379 return index, nodemap, None
380 else:
380 else:
381 data += fp.read()
381 data += fp.read()
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkcache = (0, '')
473 self._chunkcache = (0, '')
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # alternate implementation, The advantage to this code is it
538 # alternate implementation, The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. But, the results are not
540 # cached, so finding the size of every revision will be slower.
540 # cached, so finding the size of every revision will be slower.
541 """
541 """
542 if self.cache and self.cache[1] == rev:
542 if self.cache and self.cache[1] == rev:
543 return len(self.cache[2])
543 return len(self.cache[2])
544
544
545 base = self.base(rev)
545 base = self.base(rev)
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
547 base = self.cache[1]
547 base = self.cache[1]
548 text = self.cache[2]
548 text = self.cache[2]
549 else:
549 else:
550 text = self.revision(self.node(base))
550 text = self.revision(self.node(base))
551
551
552 l = len(text)
552 l = len(text)
553 for x in xrange(base + 1, rev + 1):
553 for x in xrange(base + 1, rev + 1):
554 l = mdiff.patchedsize(l, self.chunk(x))
554 l = mdiff.patchedsize(l, self.chunk(x))
555 return l
555 return l
556 """
556 """
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
559 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
561 reachable = set((node,))
562 visit = [node]
562 visit = [node]
563 if stop:
563 if stop:
564 stopn = self.rev(stop)
564 stopn = self.rev(stop)
565 else:
565 else:
566 stopn = 0
566 stopn = 0
567 while visit:
567 while visit:
568 n = visit.pop(0)
568 n = visit.pop(0)
569 if n == stop:
569 if n == stop:
570 continue
570 continue
571 if n == nullid:
571 if n == nullid:
572 continue
572 continue
573 for p in self.parents(n):
573 for p in self.parents(n):
574 if self.rev(p) < stopn:
574 if self.rev(p) < stopn:
575 continue
575 continue
576 if p not in reachable:
576 if p not in reachable:
577 reachable.add(p)
577 reachable.add(p)
578 visit.append(p)
578 visit.append(p)
579 return reachable
579 return reachable
580
580
581 def ancestors(self, *revs):
581 def ancestors(self, *revs):
582 'Generate the ancestors of revs using a breadth-first visit'
582 'Generate the ancestors of revs using a breadth-first visit'
583 visit = list(revs)
583 visit = list(revs)
584 seen = set([nullrev])
584 seen = set([nullrev])
585 while visit:
585 while visit:
586 for parent in self.parentrevs(visit.pop(0)):
586 for parent in self.parentrevs(visit.pop(0)):
587 if parent not in seen:
587 if parent not in seen:
588 visit.append(parent)
588 visit.append(parent)
589 seen.add(parent)
589 seen.add(parent)
590 yield parent
590 yield parent
591
591
592 def descendants(self, *revs):
592 def descendants(self, *revs):
593 'Generate the descendants of revs in topological order'
593 'Generate the descendants of revs in topological order'
594 seen = set(revs)
594 seen = set(revs)
595 for i in xrange(min(revs) + 1, len(self)):
595 for i in xrange(min(revs) + 1, len(self)):
596 for x in self.parentrevs(i):
596 for x in self.parentrevs(i):
597 if x != nullrev and x in seen:
597 if x != nullrev and x in seen:
598 seen.add(i)
598 seen.add(i)
599 yield i
599 yield i
600 break
600 break
601
601
602 def findmissing(self, common=None, heads=None):
602 def findmissing(self, common=None, heads=None):
603 '''
603 '''
604 returns the topologically sorted list of nodes from the set:
604 returns the topologically sorted list of nodes from the set:
605 missing = (ancestors(heads) \ ancestors(common))
605 missing = (ancestors(heads) \ ancestors(common))
606
606
607 where ancestors() is the set of ancestors from heads, heads included
607 where ancestors() is the set of ancestors from heads, heads included
608
608
609 if heads is None, the heads of the revlog are used
609 if heads is None, the heads of the revlog are used
610 if common is None, nullid is assumed to be a common node
610 if common is None, nullid is assumed to be a common node
611 '''
611 '''
612 if common is None:
612 if common is None:
613 common = [nullid]
613 common = [nullid]
614 if heads is None:
614 if heads is None:
615 heads = self.heads()
615 heads = self.heads()
616
616
617 common = [self.rev(n) for n in common]
617 common = [self.rev(n) for n in common]
618 heads = [self.rev(n) for n in heads]
618 heads = [self.rev(n) for n in heads]
619
619
620 # we want the ancestors, but inclusive
620 # we want the ancestors, but inclusive
621 has = set(self.ancestors(*common))
621 has = set(self.ancestors(*common))
622 has.add(nullrev)
622 has.add(nullrev)
623 has.update(common)
623 has.update(common)
624
624
625 # take all ancestors from heads that aren't in has
625 # take all ancestors from heads that aren't in has
626 missing = set()
626 missing = set()
627 visit = [r for r in heads if r not in has]
627 visit = [r for r in heads if r not in has]
628 while visit:
628 while visit:
629 r = visit.pop(0)
629 r = visit.pop(0)
630 if r in missing:
630 if r in missing:
631 continue
631 continue
632 else:
632 else:
633 missing.add(r)
633 missing.add(r)
634 for p in self.parentrevs(r):
634 for p in self.parentrevs(r):
635 if p not in has:
635 if p not in has:
636 visit.append(p)
636 visit.append(p)
637 missing = list(missing)
637 missing = list(missing)
638 missing.sort()
638 missing.sort()
639 return [self.node(r) for r in missing]
639 return [self.node(r) for r in missing]
640
640
641 def nodesbetween(self, roots=None, heads=None):
641 def nodesbetween(self, roots=None, heads=None):
642 """Return a tuple containing three elements. Elements 1 and 2 contain
642 """Return a tuple containing three elements. Elements 1 and 2 contain
643 a final list bases and heads after all the unreachable ones have been
643 a final list bases and heads after all the unreachable ones have been
644 pruned. Element 0 contains a topologically sorted list of all
644 pruned. Element 0 contains a topologically sorted list of all
645
645
646 nodes that satisfy these constraints:
646 nodes that satisfy these constraints:
647 1. All nodes must be descended from a node in roots (the nodes on
647 1. All nodes must be descended from a node in roots (the nodes on
648 roots are considered descended from themselves).
648 roots are considered descended from themselves).
649 2. All nodes must also be ancestors of a node in heads (the nodes in
649 2. All nodes must also be ancestors of a node in heads (the nodes in
650 heads are considered to be their own ancestors).
650 heads are considered to be their own ancestors).
651
651
652 If roots is unspecified, nullid is assumed as the only root.
652 If roots is unspecified, nullid is assumed as the only root.
653 If heads is unspecified, it is taken to be the output of the
653 If heads is unspecified, it is taken to be the output of the
654 heads method (i.e. a list of all nodes in the repository that
654 heads method (i.e. a list of all nodes in the repository that
655 have no children)."""
655 have no children)."""
656 nonodes = ([], [], [])
656 nonodes = ([], [], [])
657 if roots is not None:
657 if roots is not None:
658 roots = list(roots)
658 roots = list(roots)
659 if not roots:
659 if not roots:
660 return nonodes
660 return nonodes
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 roots = [nullid] # Everybody's a descendent of nullid
663 roots = [nullid] # Everybody's a descendent of nullid
664 lowestrev = nullrev
664 lowestrev = nullrev
665 if (lowestrev == nullrev) and (heads is None):
665 if (lowestrev == nullrev) and (heads is None):
666 # We want _all_ the nodes!
666 # We want _all_ the nodes!
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 if heads is None:
668 if heads is None:
669 # All nodes are ancestors, so the latest ancestor is the last
669 # All nodes are ancestors, so the latest ancestor is the last
670 # node.
670 # node.
671 highestrev = len(self) - 1
671 highestrev = len(self) - 1
672 # Set ancestors to None to signal that every node is an ancestor.
672 # Set ancestors to None to signal that every node is an ancestor.
673 ancestors = None
673 ancestors = None
674 # Set heads to an empty dictionary for later discovery of heads
674 # Set heads to an empty dictionary for later discovery of heads
675 heads = {}
675 heads = {}
676 else:
676 else:
677 heads = list(heads)
677 heads = list(heads)
678 if not heads:
678 if not heads:
679 return nonodes
679 return nonodes
680 ancestors = set()
680 ancestors = set()
681 # Turn heads into a dictionary so we can remove 'fake' heads.
681 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Also, later we will be using it to filter out the heads we can't
682 # Also, later we will be using it to filter out the heads we can't
683 # find from roots.
683 # find from roots.
684 heads = dict.fromkeys(heads, 0)
684 heads = dict.fromkeys(heads, 0)
685 # Start at the top and keep marking parents until we're done.
685 # Start at the top and keep marking parents until we're done.
686 nodestotag = set(heads)
686 nodestotag = set(heads)
687 # Remember where the top was so we can use it as a limit later.
687 # Remember where the top was so we can use it as a limit later.
688 highestrev = max([self.rev(n) for n in nodestotag])
688 highestrev = max([self.rev(n) for n in nodestotag])
689 while nodestotag:
689 while nodestotag:
690 # grab a node to tag
690 # grab a node to tag
691 n = nodestotag.pop()
691 n = nodestotag.pop()
692 # Never tag nullid
692 # Never tag nullid
693 if n == nullid:
693 if n == nullid:
694 continue
694 continue
695 # A node's revision number represents its place in a
695 # A node's revision number represents its place in a
696 # topologically sorted list of nodes.
696 # topologically sorted list of nodes.
697 r = self.rev(n)
697 r = self.rev(n)
698 if r >= lowestrev:
698 if r >= lowestrev:
699 if n not in ancestors:
699 if n not in ancestors:
700 # If we are possibly a descendent of one of the roots
700 # If we are possibly a descendent of one of the roots
701 # and we haven't already been marked as an ancestor
701 # and we haven't already been marked as an ancestor
702 ancestors.add(n) # Mark as ancestor
702 ancestors.add(n) # Mark as ancestor
703 # Add non-nullid parents to list of nodes to tag.
703 # Add non-nullid parents to list of nodes to tag.
704 nodestotag.update([p for p in self.parents(n) if
704 nodestotag.update([p for p in self.parents(n) if
705 p != nullid])
705 p != nullid])
706 elif n in heads: # We've seen it before, is it a fake head?
706 elif n in heads: # We've seen it before, is it a fake head?
707 # So it is, real heads should not be the ancestors of
707 # So it is, real heads should not be the ancestors of
708 # any other heads.
708 # any other heads.
709 heads.pop(n)
709 heads.pop(n)
710 if not ancestors:
710 if not ancestors:
711 return nonodes
711 return nonodes
712 # Now that we have our set of ancestors, we want to remove any
712 # Now that we have our set of ancestors, we want to remove any
713 # roots that are not ancestors.
713 # roots that are not ancestors.
714
714
715 # If one of the roots was nullid, everything is included anyway.
715 # If one of the roots was nullid, everything is included anyway.
716 if lowestrev > nullrev:
716 if lowestrev > nullrev:
717 # But, since we weren't, let's recompute the lowest rev to not
717 # But, since we weren't, let's recompute the lowest rev to not
718 # include roots that aren't ancestors.
718 # include roots that aren't ancestors.
719
719
720 # Filter out roots that aren't ancestors of heads
720 # Filter out roots that aren't ancestors of heads
721 roots = [n for n in roots if n in ancestors]
721 roots = [n for n in roots if n in ancestors]
722 # Recompute the lowest revision
722 # Recompute the lowest revision
723 if roots:
723 if roots:
724 lowestrev = min([self.rev(n) for n in roots])
724 lowestrev = min([self.rev(n) for n in roots])
725 else:
725 else:
726 # No more roots? Return empty list
726 # No more roots? Return empty list
727 return nonodes
727 return nonodes
728 else:
728 else:
729 # We are descending from nullid, and don't need to care about
729 # We are descending from nullid, and don't need to care about
730 # any other roots.
730 # any other roots.
731 lowestrev = nullrev
731 lowestrev = nullrev
732 roots = [nullid]
732 roots = [nullid]
733 # Transform our roots list into a set.
733 # Transform our roots list into a set.
734 descendents = set(roots)
734 descendents = set(roots)
735 # Also, keep the original roots so we can filter out roots that aren't
735 # Also, keep the original roots so we can filter out roots that aren't
736 # 'real' roots (i.e. are descended from other roots).
736 # 'real' roots (i.e. are descended from other roots).
737 roots = descendents.copy()
737 roots = descendents.copy()
738 # Our topologically sorted list of output nodes.
738 # Our topologically sorted list of output nodes.
739 orderedout = []
739 orderedout = []
740 # Don't start at nullid since we don't want nullid in our output list,
740 # Don't start at nullid since we don't want nullid in our output list,
741 # and if nullid shows up in descedents, empty parents will look like
741 # and if nullid shows up in descedents, empty parents will look like
742 # they're descendents.
742 # they're descendents.
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 n = self.node(r)
744 n = self.node(r)
745 isdescendent = False
745 isdescendent = False
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 isdescendent = True
747 isdescendent = True
748 elif n in descendents:
748 elif n in descendents:
749 # n is already a descendent
749 # n is already a descendent
750 isdescendent = True
750 isdescendent = True
751 # This check only needs to be done here because all the roots
751 # This check only needs to be done here because all the roots
752 # will start being marked is descendents before the loop.
752 # will start being marked is descendents before the loop.
753 if n in roots:
753 if n in roots:
754 # If n was a root, check if it's a 'real' root.
754 # If n was a root, check if it's a 'real' root.
755 p = tuple(self.parents(n))
755 p = tuple(self.parents(n))
756 # If any of its parents are descendents, it's not a root.
756 # If any of its parents are descendents, it's not a root.
757 if (p[0] in descendents) or (p[1] in descendents):
757 if (p[0] in descendents) or (p[1] in descendents):
758 roots.remove(n)
758 roots.remove(n)
759 else:
759 else:
760 p = tuple(self.parents(n))
760 p = tuple(self.parents(n))
761 # A node is a descendent if either of its parents are
761 # A node is a descendent if either of its parents are
762 # descendents. (We seeded the dependents list with the roots
762 # descendents. (We seeded the dependents list with the roots
763 # up there, remember?)
763 # up there, remember?)
764 if (p[0] in descendents) or (p[1] in descendents):
764 if (p[0] in descendents) or (p[1] in descendents):
765 descendents.add(n)
765 descendents.add(n)
766 isdescendent = True
766 isdescendent = True
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 # Only include nodes that are both descendents and ancestors.
768 # Only include nodes that are both descendents and ancestors.
769 orderedout.append(n)
769 orderedout.append(n)
770 if (ancestors is not None) and (n in heads):
770 if (ancestors is not None) and (n in heads):
771 # We're trying to figure out which heads are reachable
771 # We're trying to figure out which heads are reachable
772 # from roots.
772 # from roots.
773 # Mark this head as having been reached
773 # Mark this head as having been reached
774 heads[n] = 1
774 heads[n] = 1
775 elif ancestors is None:
775 elif ancestors is None:
776 # Otherwise, we're trying to discover the heads.
776 # Otherwise, we're trying to discover the heads.
777 # Assume this is a head because if it isn't, the next step
777 # Assume this is a head because if it isn't, the next step
778 # will eventually remove it.
778 # will eventually remove it.
779 heads[n] = 1
779 heads[n] = 1
780 # But, obviously its parents aren't.
780 # But, obviously its parents aren't.
781 for p in self.parents(n):
781 for p in self.parents(n):
782 heads.pop(p, None)
782 heads.pop(p, None)
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 roots = list(roots)
784 roots = list(roots)
785 assert orderedout
785 assert orderedout
786 assert roots
786 assert roots
787 assert heads
787 assert heads
788 return (orderedout, roots, heads)
788 return (orderedout, roots, heads)
789
789
790 def heads(self, start=None, stop=None):
790 def heads(self, start=None, stop=None):
791 """return the list of all nodes that have no children
791 """return the list of all nodes that have no children
792
792
793 if start is specified, only heads that are descendants of
793 if start is specified, only heads that are descendants of
794 start will be returned
794 start will be returned
795 if stop is specified, it will consider all the revs from stop
795 if stop is specified, it will consider all the revs from stop
796 as if they had no children
796 as if they had no children
797 """
797 """
798 if start is None and stop is None:
798 if start is None and stop is None:
799 count = len(self)
799 count = len(self)
800 if not count:
800 if not count:
801 return [nullid]
801 return [nullid]
802 ishead = [1] * (count + 1)
802 ishead = [1] * (count + 1)
803 index = self.index
803 index = self.index
804 for r in xrange(count):
804 for r in xrange(count):
805 e = index[r]
805 e = index[r]
806 ishead[e[5]] = ishead[e[6]] = 0
806 ishead[e[5]] = ishead[e[6]] = 0
807 return [self.node(r) for r in xrange(count) if ishead[r]]
807 return [self.node(r) for r in xrange(count) if ishead[r]]
808
808
809 if start is None:
809 if start is None:
810 start = nullid
810 start = nullid
811 if stop is None:
811 if stop is None:
812 stop = []
812 stop = []
813 stoprevs = set([self.rev(n) for n in stop])
813 stoprevs = set([self.rev(n) for n in stop])
814 startrev = self.rev(start)
814 startrev = self.rev(start)
815 reachable = set((startrev,))
815 reachable = set((startrev,))
816 heads = set((startrev,))
816 heads = set((startrev,))
817
817
818 parentrevs = self.parentrevs
818 parentrevs = self.parentrevs
819 for r in xrange(startrev + 1, len(self)):
819 for r in xrange(startrev + 1, len(self)):
820 for p in parentrevs(r):
820 for p in parentrevs(r):
821 if p in reachable:
821 if p in reachable:
822 if r not in stoprevs:
822 if r not in stoprevs:
823 reachable.add(r)
823 reachable.add(r)
824 heads.add(r)
824 heads.add(r)
825 if p in heads and p not in stoprevs:
825 if p in heads and p not in stoprevs:
826 heads.remove(p)
826 heads.remove(p)
827
827
828 return [self.node(r) for r in heads]
828 return [self.node(r) for r in heads]
829
829
830 def children(self, node):
830 def children(self, node):
831 """find the children of a given node"""
831 """find the children of a given node"""
832 c = []
832 c = []
833 p = self.rev(node)
833 p = self.rev(node)
834 for r in range(p + 1, len(self)):
834 for r in range(p + 1, len(self)):
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 if prevs:
836 if prevs:
837 for pr in prevs:
837 for pr in prevs:
838 if pr == p:
838 if pr == p:
839 c.append(self.node(r))
839 c.append(self.node(r))
840 elif p == nullrev:
840 elif p == nullrev:
841 c.append(self.node(r))
841 c.append(self.node(r))
842 return c
842 return c
843
843
844 def _match(self, id):
844 def _match(self, id):
845 if isinstance(id, (long, int)):
845 if isinstance(id, (long, int)):
846 # rev
846 # rev
847 return self.node(id)
847 return self.node(id)
848 if len(id) == 20:
848 if len(id) == 20:
849 # possibly a binary node
849 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
851 try:
852 node = id
852 node = id
853 self.rev(node) # quick search the index
853 self.rev(node) # quick search the index
854 return node
854 return node
855 except LookupError:
855 except LookupError:
856 pass # may be partial hex id
856 pass # may be partial hex id
857 try:
857 try:
858 # str(rev)
858 # str(rev)
859 rev = int(id)
859 rev = int(id)
860 if str(rev) != id:
860 if str(rev) != id:
861 raise ValueError
861 raise ValueError
862 if rev < 0:
862 if rev < 0:
863 rev = len(self) + rev
863 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
864 if rev < 0 or rev >= len(self):
865 raise ValueError
865 raise ValueError
866 return self.node(rev)
866 return self.node(rev)
867 except (ValueError, OverflowError):
867 except (ValueError, OverflowError):
868 pass
868 pass
869 if len(id) == 40:
869 if len(id) == 40:
870 try:
870 try:
871 # a full hex nodeid?
871 # a full hex nodeid?
872 node = bin(id)
872 node = bin(id)
873 self.rev(node)
873 self.rev(node)
874 return node
874 return node
875 except (TypeError, LookupError):
875 except (TypeError, LookupError):
876 pass
876 pass
877
877
878 def _partialmatch(self, id):
878 def _partialmatch(self, id):
879 if len(id) < 40:
879 if len(id) < 40:
880 try:
880 try:
881 # hex(node)[:...]
881 # hex(node)[:...]
882 l = len(id) / 2 # grab an even number of digits
882 l = len(id) / 2 # grab an even number of digits
883 bin_id = bin(id[:l*2])
883 bin_id = bin(id[:l*2])
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in nl if hex(n).startswith(id)]
885 nl = [n for n in nl if hex(n).startswith(id)]
886 if len(nl) > 0:
886 if len(nl) > 0:
887 if len(nl) == 1:
887 if len(nl) == 1:
888 return nl[0]
888 return nl[0]
889 raise LookupError(id, self.indexfile,
889 raise LookupError(id, self.indexfile,
890 _('ambiguous identifier'))
890 _('ambiguous identifier'))
891 return None
891 return None
892 except TypeError:
892 except TypeError:
893 pass
893 pass
894
894
895 def lookup(self, id):
895 def lookup(self, id):
896 """locate a node based on:
896 """locate a node based on:
897 - revision number or str(revision number)
897 - revision number or str(revision number)
898 - nodeid or subset of hex nodeid
898 - nodeid or subset of hex nodeid
899 """
899 """
900 n = self._match(id)
900 n = self._match(id)
901 if n is not None:
901 if n is not None:
902 return n
902 return n
903 n = self._partialmatch(id)
903 n = self._partialmatch(id)
904 if n:
904 if n:
905 return n
905 return n
906
906
907 raise LookupError(id, self.indexfile, _('no match found'))
907 raise LookupError(id, self.indexfile, _('no match found'))
908
908
909 def cmp(self, node, text):
909 def cmp(self, node, text):
910 """compare text with a given file revision"""
910 """compare text with a given file revision"""
911 p1, p2 = self.parents(node)
911 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
912 return hash(text, p1, p2) != node
913
913
914 def _addchunk(self, offset, data):
914 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
915 o, d = self._chunkcache
916 # try to add to existing cache
916 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data
918 self._chunkcache = o, d + data
919 else:
919 else:
920 self._chunkcache = offset, data
920 self._chunkcache = offset, data
921
921
922 def _loadchunk(self, offset, length, df=None):
922 def _loadchunk(self, offset, length, df=None):
923 if not df:
923 if not df:
924 if self._inline:
924 if self._inline:
925 df = self.opener(self.indexfile)
925 df = self.opener(self.indexfile)
926 else:
926 else:
927 df = self.opener(self.datafile)
927 df = self.opener(self.datafile)
928
928
929 readahead = max(65536, length)
929 readahead = max(65536, length)
930 df.seek(offset)
930 df.seek(offset)
931 d = df.read(readahead)
931 d = df.read(readahead)
932 self._addchunk(offset, d)
932 self._addchunk(offset, d)
933 if readahead > length:
933 if readahead > length:
934 return d[:length]
934 return d[:length]
935 return d
935 return d
936
936
937 def _getchunk(self, offset, length, df=None):
937 def _getchunk(self, offset, length, df=None):
938 o, d = self._chunkcache
938 o, d = self._chunkcache
939 l = len(d)
939 l = len(d)
940
940
941 # is it in the cache?
941 # is it in the cache?
942 cachestart = offset - o
942 cachestart = offset - o
943 cacheend = cachestart + length
943 cacheend = cachestart + length
944 if cachestart >= 0 and cacheend <= l:
944 if cachestart >= 0 and cacheend <= l:
945 if cachestart == 0 and cacheend == l:
945 if cachestart == 0 and cacheend == l:
946 return d # avoid a copy
946 return d # avoid a copy
947 return d[cachestart:cacheend]
947 return d[cachestart:cacheend]
948
948
949 return self._loadchunk(offset, length, df)
949 return self._loadchunk(offset, length, df)
950
950
951 def _prime(self, startrev, endrev, df):
951 def _prime(self, startrev, endrev, df):
952 start = self.start(startrev)
952 start = self.start(startrev)
953 end = self.end(endrev)
953 end = self.end(endrev)
954 if self._inline:
954 if self._inline:
955 start += (startrev + 1) * self._io.size
955 start += (startrev + 1) * self._io.size
956 end += (startrev + 1) * self._io.size
956 end += (startrev + 1) * self._io.size
957 self._loadchunk(start, end - start, df)
957 self._loadchunk(start, end - start, df)
958
958
959 def chunk(self, rev, df=None):
959 def chunk(self, rev, df=None):
960 start, length = self.start(rev), self.length(rev)
960 start, length = self.start(rev), self.length(rev)
961 if self._inline:
961 if self._inline:
962 start += (rev + 1) * self._io.size
962 start += (rev + 1) * self._io.size
963 return decompress(self._getchunk(start, length, df))
963 return decompress(self._getchunk(start, length, df))
964
964
965 def revdiff(self, rev1, rev2):
965 def revdiff(self, rev1, rev2):
966 """return or calculate a delta between two revisions"""
966 """return or calculate a delta between two revisions"""
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
967 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
968 return self.chunk(rev2)
968 return self.chunk(rev2)
969
969
970 return mdiff.textdiff(self.revision(self.node(rev1)),
970 return mdiff.textdiff(self.revision(self.node(rev1)),
971 self.revision(self.node(rev2)))
971 self.revision(self.node(rev2)))
972
972
973 def revision(self, node):
973 def revision(self, node):
974 """return an uncompressed revision of a given node"""
974 """return an uncompressed revision of a given node"""
975 if node == nullid:
975 if node == nullid:
976 return ""
976 return ""
977 if self._cache and self._cache[0] == node:
977 if self._cache and self._cache[0] == node:
978 return str(self._cache[2])
978 return str(self._cache[2])
979
979
980 # look up what we need to read
980 # look up what we need to read
981 text = None
981 text = None
982 rev = self.rev(node)
982 rev = self.rev(node)
983 base = self.base(rev)
983 base = self.base(rev)
984
984
985 # check rev flags
985 # check rev flags
986 if self.index[rev][0] & 0xFFFF:
986 if self.index[rev][0] & 0xFFFF:
987 raise RevlogError(_('incompatible revision flag %x') %
987 raise RevlogError(_('incompatible revision flag %x') %
988 (self.index[rev][0] & 0xFFFF))
988 (self.index[rev][0] & 0xFFFF))
989
989
990 df = None
990 df = None
991
991
992 # do we have useful data cached?
992 # do we have useful data cached?
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
993 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
994 base = self._cache[1]
994 base = self._cache[1]
995 text = str(self._cache[2])
995 text = str(self._cache[2])
996 self._loadindex(base, rev + 1)
996 self._loadindex(base, rev + 1)
997 if not self._inline and rev > base + 1:
997 if not self._inline and rev > base + 1:
998 df = self.opener(self.datafile)
998 df = self.opener(self.datafile)
999 self._prime(base, rev, df)
999 self._prime(base, rev, df)
1000 else:
1000 else:
1001 self._loadindex(base, rev + 1)
1001 self._loadindex(base, rev + 1)
1002 if not self._inline and rev > base:
1002 if not self._inline and rev > base:
1003 df = self.opener(self.datafile)
1003 df = self.opener(self.datafile)
1004 self._prime(base, rev, df)
1004 self._prime(base, rev, df)
1005 text = self.chunk(base, df=df)
1005 text = self.chunk(base, df=df)
1006
1006
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1007 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
1008 text = mdiff.patches(text, bins)
1008 text = mdiff.patches(text, bins)
1009 p1, p2 = self.parents(node)
1009 p1, p2 = self.parents(node)
1010 if node != hash(text, p1, p2):
1010 if node != hash(text, p1, p2):
1011 raise RevlogError(_("integrity check failed on %s:%d")
1011 raise RevlogError(_("integrity check failed on %s:%d")
1012 % (self.datafile, rev))
1012 % (self.indexfile, rev))
1013
1013
1014 self._cache = (node, rev, text)
1014 self._cache = (node, rev, text)
1015 return text
1015 return text
1016
1016
1017 def checkinlinesize(self, tr, fp=None):
1017 def checkinlinesize(self, tr, fp=None):
1018 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1018 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1019 return
1019 return
1020
1020
1021 trinfo = tr.find(self.indexfile)
1021 trinfo = tr.find(self.indexfile)
1022 if trinfo is None:
1022 if trinfo is None:
1023 raise RevlogError(_("%s not found in the transaction")
1023 raise RevlogError(_("%s not found in the transaction")
1024 % self.indexfile)
1024 % self.indexfile)
1025
1025
1026 trindex = trinfo[2]
1026 trindex = trinfo[2]
1027 dataoff = self.start(trindex)
1027 dataoff = self.start(trindex)
1028
1028
1029 tr.add(self.datafile, dataoff)
1029 tr.add(self.datafile, dataoff)
1030
1030
1031 if fp:
1031 if fp:
1032 fp.flush()
1032 fp.flush()
1033 fp.close()
1033 fp.close()
1034
1034
1035 df = self.opener(self.datafile, 'w')
1035 df = self.opener(self.datafile, 'w')
1036 try:
1036 try:
1037 calc = self._io.size
1037 calc = self._io.size
1038 for r in self:
1038 for r in self:
1039 start = self.start(r) + (r + 1) * calc
1039 start = self.start(r) + (r + 1) * calc
1040 length = self.length(r)
1040 length = self.length(r)
1041 d = self._getchunk(start, length)
1041 d = self._getchunk(start, length)
1042 df.write(d)
1042 df.write(d)
1043 finally:
1043 finally:
1044 df.close()
1044 df.close()
1045
1045
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1046 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1047 self.version &= ~(REVLOGNGINLINEDATA)
1047 self.version &= ~(REVLOGNGINLINEDATA)
1048 self._inline = False
1048 self._inline = False
1049 for i in self:
1049 for i in self:
1050 e = self._io.packentry(self.index[i], self.node, self.version, i)
1050 e = self._io.packentry(self.index[i], self.node, self.version, i)
1051 fp.write(e)
1051 fp.write(e)
1052
1052
1053 # if we don't call rename, the temp file will never replace the
1053 # if we don't call rename, the temp file will never replace the
1054 # real index
1054 # real index
1055 fp.rename()
1055 fp.rename()
1056
1056
1057 tr.replace(self.indexfile, trindex * calc)
1057 tr.replace(self.indexfile, trindex * calc)
1058 self._chunkcache = (0, '')
1058 self._chunkcache = (0, '')
1059
1059
1060 def addrevision(self, text, transaction, link, p1, p2, d=None):
1060 def addrevision(self, text, transaction, link, p1, p2, d=None):
1061 """add a revision to the log
1061 """add a revision to the log
1062
1062
1063 text - the revision data to add
1063 text - the revision data to add
1064 transaction - the transaction object used for rollback
1064 transaction - the transaction object used for rollback
1065 link - the linkrev data to add
1065 link - the linkrev data to add
1066 p1, p2 - the parent nodeids of the revision
1066 p1, p2 - the parent nodeids of the revision
1067 d - an optional precomputed delta
1067 d - an optional precomputed delta
1068 """
1068 """
1069 dfh = None
1069 dfh = None
1070 if not self._inline:
1070 if not self._inline:
1071 dfh = self.opener(self.datafile, "a")
1071 dfh = self.opener(self.datafile, "a")
1072 ifh = self.opener(self.indexfile, "a+")
1072 ifh = self.opener(self.indexfile, "a+")
1073 try:
1073 try:
1074 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1074 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1075 finally:
1075 finally:
1076 if dfh:
1076 if dfh:
1077 dfh.close()
1077 dfh.close()
1078 ifh.close()
1078 ifh.close()
1079
1079
1080 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1080 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1081 node = hash(text, p1, p2)
1081 node = hash(text, p1, p2)
1082 if node in self.nodemap:
1082 if node in self.nodemap:
1083 return node
1083 return node
1084
1084
1085 curr = len(self)
1085 curr = len(self)
1086 prev = curr - 1
1086 prev = curr - 1
1087 base = self.base(prev)
1087 base = self.base(prev)
1088 offset = self.end(prev)
1088 offset = self.end(prev)
1089
1089
1090 if curr:
1090 if curr:
1091 if not d:
1091 if not d:
1092 ptext = self.revision(self.node(prev))
1092 ptext = self.revision(self.node(prev))
1093 d = mdiff.textdiff(ptext, text)
1093 d = mdiff.textdiff(ptext, text)
1094 data = compress(d)
1094 data = compress(d)
1095 l = len(data[1]) + len(data[0])
1095 l = len(data[1]) + len(data[0])
1096 dist = l + offset - self.start(base)
1096 dist = l + offset - self.start(base)
1097
1097
1098 # full versions are inserted when the needed deltas
1098 # full versions are inserted when the needed deltas
1099 # become comparable to the uncompressed text
1099 # become comparable to the uncompressed text
1100 if not curr or dist > len(text) * 2:
1100 if not curr or dist > len(text) * 2:
1101 data = compress(text)
1101 data = compress(text)
1102 l = len(data[1]) + len(data[0])
1102 l = len(data[1]) + len(data[0])
1103 base = curr
1103 base = curr
1104
1104
1105 e = (offset_type(offset, 0), l, len(text),
1105 e = (offset_type(offset, 0), l, len(text),
1106 base, link, self.rev(p1), self.rev(p2), node)
1106 base, link, self.rev(p1), self.rev(p2), node)
1107 self.index.insert(-1, e)
1107 self.index.insert(-1, e)
1108 self.nodemap[node] = curr
1108 self.nodemap[node] = curr
1109
1109
1110 entry = self._io.packentry(e, self.node, self.version, curr)
1110 entry = self._io.packentry(e, self.node, self.version, curr)
1111 if not self._inline:
1111 if not self._inline:
1112 transaction.add(self.datafile, offset)
1112 transaction.add(self.datafile, offset)
1113 transaction.add(self.indexfile, curr * len(entry))
1113 transaction.add(self.indexfile, curr * len(entry))
1114 if data[0]:
1114 if data[0]:
1115 dfh.write(data[0])
1115 dfh.write(data[0])
1116 dfh.write(data[1])
1116 dfh.write(data[1])
1117 dfh.flush()
1117 dfh.flush()
1118 ifh.write(entry)
1118 ifh.write(entry)
1119 else:
1119 else:
1120 offset += curr * self._io.size
1120 offset += curr * self._io.size
1121 transaction.add(self.indexfile, offset, curr)
1121 transaction.add(self.indexfile, offset, curr)
1122 ifh.write(entry)
1122 ifh.write(entry)
1123 ifh.write(data[0])
1123 ifh.write(data[0])
1124 ifh.write(data[1])
1124 ifh.write(data[1])
1125 self.checkinlinesize(transaction, ifh)
1125 self.checkinlinesize(transaction, ifh)
1126
1126
1127 self._cache = (node, curr, text)
1127 self._cache = (node, curr, text)
1128 return node
1128 return node
1129
1129
1130 def ancestor(self, a, b):
1130 def ancestor(self, a, b):
1131 """calculate the least common ancestor of nodes a and b"""
1131 """calculate the least common ancestor of nodes a and b"""
1132
1132
1133 def parents(rev):
1133 def parents(rev):
1134 return [p for p in self.parentrevs(rev) if p != nullrev]
1134 return [p for p in self.parentrevs(rev) if p != nullrev]
1135
1135
1136 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1136 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1137 if c is None:
1137 if c is None:
1138 return nullid
1138 return nullid
1139
1139
1140 return self.node(c)
1140 return self.node(c)
1141
1141
1142 def group(self, nodelist, lookup, infocollect=None):
1142 def group(self, nodelist, lookup, infocollect=None):
1143 """calculate a delta group
1143 """calculate a delta group
1144
1144
1145 Given a list of changeset revs, return a set of deltas and
1145 Given a list of changeset revs, return a set of deltas and
1146 metadata corresponding to nodes. the first delta is
1146 metadata corresponding to nodes. the first delta is
1147 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1147 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1148 have this parent as it has all history before these
1148 have this parent as it has all history before these
1149 changesets. parent is parent[0]
1149 changesets. parent is parent[0]
1150 """
1150 """
1151
1151
1152 revs = [self.rev(n) for n in nodelist]
1152 revs = [self.rev(n) for n in nodelist]
1153
1153
1154 # if we don't have any revisions touched by these changesets, bail
1154 # if we don't have any revisions touched by these changesets, bail
1155 if not revs:
1155 if not revs:
1156 yield changegroup.closechunk()
1156 yield changegroup.closechunk()
1157 return
1157 return
1158
1158
1159 # add the parent of the first rev
1159 # add the parent of the first rev
1160 p = self.parentrevs(revs[0])[0]
1160 p = self.parentrevs(revs[0])[0]
1161 revs.insert(0, p)
1161 revs.insert(0, p)
1162
1162
1163 # build deltas
1163 # build deltas
1164 for d in xrange(len(revs) - 1):
1164 for d in xrange(len(revs) - 1):
1165 a, b = revs[d], revs[d + 1]
1165 a, b = revs[d], revs[d + 1]
1166 nb = self.node(b)
1166 nb = self.node(b)
1167
1167
1168 if infocollect is not None:
1168 if infocollect is not None:
1169 infocollect(nb)
1169 infocollect(nb)
1170
1170
1171 p = self.parents(nb)
1171 p = self.parents(nb)
1172 meta = nb + p[0] + p[1] + lookup(nb)
1172 meta = nb + p[0] + p[1] + lookup(nb)
1173 if a == -1:
1173 if a == -1:
1174 d = self.revision(nb)
1174 d = self.revision(nb)
1175 meta += mdiff.trivialdiffheader(len(d))
1175 meta += mdiff.trivialdiffheader(len(d))
1176 else:
1176 else:
1177 d = self.revdiff(a, b)
1177 d = self.revdiff(a, b)
1178 yield changegroup.chunkheader(len(meta) + len(d))
1178 yield changegroup.chunkheader(len(meta) + len(d))
1179 yield meta
1179 yield meta
1180 if len(d) > 2**20:
1180 if len(d) > 2**20:
1181 pos = 0
1181 pos = 0
1182 while pos < len(d):
1182 while pos < len(d):
1183 pos2 = pos + 2 ** 18
1183 pos2 = pos + 2 ** 18
1184 yield d[pos:pos2]
1184 yield d[pos:pos2]
1185 pos = pos2
1185 pos = pos2
1186 else:
1186 else:
1187 yield d
1187 yield d
1188
1188
1189 yield changegroup.closechunk()
1189 yield changegroup.closechunk()
1190
1190
1191 def addgroup(self, revs, linkmapper, transaction):
1191 def addgroup(self, revs, linkmapper, transaction):
1192 """
1192 """
1193 add a delta group
1193 add a delta group
1194
1194
1195 given a set of deltas, add them to the revision log. the
1195 given a set of deltas, add them to the revision log. the
1196 first delta is against its parent, which should be in our
1196 first delta is against its parent, which should be in our
1197 log, the rest are against the previous delta.
1197 log, the rest are against the previous delta.
1198 """
1198 """
1199
1199
1200 #track the base of the current delta log
1200 #track the base of the current delta log
1201 r = len(self)
1201 r = len(self)
1202 t = r - 1
1202 t = r - 1
1203 node = None
1203 node = None
1204
1204
1205 base = prev = nullrev
1205 base = prev = nullrev
1206 start = end = textlen = 0
1206 start = end = textlen = 0
1207 if r:
1207 if r:
1208 end = self.end(t)
1208 end = self.end(t)
1209
1209
1210 ifh = self.opener(self.indexfile, "a+")
1210 ifh = self.opener(self.indexfile, "a+")
1211 isize = r * self._io.size
1211 isize = r * self._io.size
1212 if self._inline:
1212 if self._inline:
1213 transaction.add(self.indexfile, end + isize, r)
1213 transaction.add(self.indexfile, end + isize, r)
1214 dfh = None
1214 dfh = None
1215 else:
1215 else:
1216 transaction.add(self.indexfile, isize, r)
1216 transaction.add(self.indexfile, isize, r)
1217 transaction.add(self.datafile, end)
1217 transaction.add(self.datafile, end)
1218 dfh = self.opener(self.datafile, "a")
1218 dfh = self.opener(self.datafile, "a")
1219
1219
1220 try:
1220 try:
1221 # loop through our set of deltas
1221 # loop through our set of deltas
1222 chain = None
1222 chain = None
1223 for chunk in revs:
1223 for chunk in revs:
1224 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1224 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1225 link = linkmapper(cs)
1225 link = linkmapper(cs)
1226 if node in self.nodemap:
1226 if node in self.nodemap:
1227 # this can happen if two branches make the same change
1227 # this can happen if two branches make the same change
1228 chain = node
1228 chain = node
1229 continue
1229 continue
1230 delta = buffer(chunk, 80)
1230 delta = buffer(chunk, 80)
1231 del chunk
1231 del chunk
1232
1232
1233 for p in (p1, p2):
1233 for p in (p1, p2):
1234 if not p in self.nodemap:
1234 if not p in self.nodemap:
1235 raise LookupError(p, self.indexfile, _('unknown parent'))
1235 raise LookupError(p, self.indexfile, _('unknown parent'))
1236
1236
1237 if not chain:
1237 if not chain:
1238 # retrieve the parent revision of the delta chain
1238 # retrieve the parent revision of the delta chain
1239 chain = p1
1239 chain = p1
1240 if not chain in self.nodemap:
1240 if not chain in self.nodemap:
1241 raise LookupError(chain, self.indexfile, _('unknown base'))
1241 raise LookupError(chain, self.indexfile, _('unknown base'))
1242
1242
1243 # full versions are inserted when the needed deltas become
1243 # full versions are inserted when the needed deltas become
1244 # comparable to the uncompressed text or when the previous
1244 # comparable to the uncompressed text or when the previous
1245 # version is not the one we have a delta against. We use
1245 # version is not the one we have a delta against. We use
1246 # the size of the previous full rev as a proxy for the
1246 # the size of the previous full rev as a proxy for the
1247 # current size.
1247 # current size.
1248
1248
1249 if chain == prev:
1249 if chain == prev:
1250 cdelta = compress(delta)
1250 cdelta = compress(delta)
1251 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1251 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1252 textlen = mdiff.patchedsize(textlen, delta)
1252 textlen = mdiff.patchedsize(textlen, delta)
1253
1253
1254 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1254 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1255 # flush our writes here so we can read it in revision
1255 # flush our writes here so we can read it in revision
1256 if dfh:
1256 if dfh:
1257 dfh.flush()
1257 dfh.flush()
1258 ifh.flush()
1258 ifh.flush()
1259 text = self.revision(chain)
1259 text = self.revision(chain)
1260 if len(text) == 0:
1260 if len(text) == 0:
1261 # skip over trivial delta header
1261 # skip over trivial delta header
1262 text = buffer(delta, 12)
1262 text = buffer(delta, 12)
1263 else:
1263 else:
1264 text = mdiff.patches(text, [delta])
1264 text = mdiff.patches(text, [delta])
1265 del delta
1265 del delta
1266 chk = self._addrevision(text, transaction, link, p1, p2, None,
1266 chk = self._addrevision(text, transaction, link, p1, p2, None,
1267 ifh, dfh)
1267 ifh, dfh)
1268 if not dfh and not self._inline:
1268 if not dfh and not self._inline:
1269 # addrevision switched from inline to conventional
1269 # addrevision switched from inline to conventional
1270 # reopen the index
1270 # reopen the index
1271 dfh = self.opener(self.datafile, "a")
1271 dfh = self.opener(self.datafile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1273 if chk != node:
1273 if chk != node:
1274 raise RevlogError(_("consistency error adding group"))
1274 raise RevlogError(_("consistency error adding group"))
1275 textlen = len(text)
1275 textlen = len(text)
1276 else:
1276 else:
1277 e = (offset_type(end, 0), cdeltalen, textlen, base,
1277 e = (offset_type(end, 0), cdeltalen, textlen, base,
1278 link, self.rev(p1), self.rev(p2), node)
1278 link, self.rev(p1), self.rev(p2), node)
1279 self.index.insert(-1, e)
1279 self.index.insert(-1, e)
1280 self.nodemap[node] = r
1280 self.nodemap[node] = r
1281 entry = self._io.packentry(e, self.node, self.version, r)
1281 entry = self._io.packentry(e, self.node, self.version, r)
1282 if self._inline:
1282 if self._inline:
1283 ifh.write(entry)
1283 ifh.write(entry)
1284 ifh.write(cdelta[0])
1284 ifh.write(cdelta[0])
1285 ifh.write(cdelta[1])
1285 ifh.write(cdelta[1])
1286 self.checkinlinesize(transaction, ifh)
1286 self.checkinlinesize(transaction, ifh)
1287 if not self._inline:
1287 if not self._inline:
1288 dfh = self.opener(self.datafile, "a")
1288 dfh = self.opener(self.datafile, "a")
1289 ifh = self.opener(self.indexfile, "a")
1289 ifh = self.opener(self.indexfile, "a")
1290 else:
1290 else:
1291 dfh.write(cdelta[0])
1291 dfh.write(cdelta[0])
1292 dfh.write(cdelta[1])
1292 dfh.write(cdelta[1])
1293 ifh.write(entry)
1293 ifh.write(entry)
1294
1294
1295 t, r, chain, prev = r, r + 1, node, node
1295 t, r, chain, prev = r, r + 1, node, node
1296 base = self.base(t)
1296 base = self.base(t)
1297 start = self.start(base)
1297 start = self.start(base)
1298 end = self.end(t)
1298 end = self.end(t)
1299 finally:
1299 finally:
1300 if dfh:
1300 if dfh:
1301 dfh.close()
1301 dfh.close()
1302 ifh.close()
1302 ifh.close()
1303
1303
1304 return node
1304 return node
1305
1305
1306 def strip(self, minlink, transaction):
1306 def strip(self, minlink, transaction):
1307 """truncate the revlog on the first revision with a linkrev >= minlink
1307 """truncate the revlog on the first revision with a linkrev >= minlink
1308
1308
1309 This function is called when we're stripping revision minlink and
1309 This function is called when we're stripping revision minlink and
1310 its descendants from the repository.
1310 its descendants from the repository.
1311
1311
1312 We have to remove all revisions with linkrev >= minlink, because
1312 We have to remove all revisions with linkrev >= minlink, because
1313 the equivalent changelog revisions will be renumbered after the
1313 the equivalent changelog revisions will be renumbered after the
1314 strip.
1314 strip.
1315
1315
1316 So we truncate the revlog on the first of these revisions, and
1316 So we truncate the revlog on the first of these revisions, and
1317 trust that the caller has saved the revisions that shouldn't be
1317 trust that the caller has saved the revisions that shouldn't be
1318 removed and that it'll readd them after this truncation.
1318 removed and that it'll readd them after this truncation.
1319 """
1319 """
1320 if len(self) == 0:
1320 if len(self) == 0:
1321 return
1321 return
1322
1322
1323 if isinstance(self.index, lazyindex):
1323 if isinstance(self.index, lazyindex):
1324 self._loadindexmap()
1324 self._loadindexmap()
1325
1325
1326 for rev in self:
1326 for rev in self:
1327 if self.index[rev][4] >= minlink:
1327 if self.index[rev][4] >= minlink:
1328 break
1328 break
1329 else:
1329 else:
1330 return
1330 return
1331
1331
1332 # first truncate the files on disk
1332 # first truncate the files on disk
1333 end = self.start(rev)
1333 end = self.start(rev)
1334 if not self._inline:
1334 if not self._inline:
1335 transaction.add(self.datafile, end)
1335 transaction.add(self.datafile, end)
1336 end = rev * self._io.size
1336 end = rev * self._io.size
1337 else:
1337 else:
1338 end += rev * self._io.size
1338 end += rev * self._io.size
1339
1339
1340 transaction.add(self.indexfile, end)
1340 transaction.add(self.indexfile, end)
1341
1341
1342 # then reset internal state in memory to forget those revisions
1342 # then reset internal state in memory to forget those revisions
1343 self._cache = None
1343 self._cache = None
1344 self._chunkcache = (0, '')
1344 self._chunkcache = (0, '')
1345 for x in xrange(rev, len(self)):
1345 for x in xrange(rev, len(self)):
1346 del self.nodemap[self.node(x)]
1346 del self.nodemap[self.node(x)]
1347
1347
1348 del self.index[rev:-1]
1348 del self.index[rev:-1]
1349
1349
1350 def checksize(self):
1350 def checksize(self):
1351 expected = 0
1351 expected = 0
1352 if len(self):
1352 if len(self):
1353 expected = max(0, self.end(len(self) - 1))
1353 expected = max(0, self.end(len(self) - 1))
1354
1354
1355 try:
1355 try:
1356 f = self.opener(self.datafile)
1356 f = self.opener(self.datafile)
1357 f.seek(0, 2)
1357 f.seek(0, 2)
1358 actual = f.tell()
1358 actual = f.tell()
1359 dd = actual - expected
1359 dd = actual - expected
1360 except IOError, inst:
1360 except IOError, inst:
1361 if inst.errno != errno.ENOENT:
1361 if inst.errno != errno.ENOENT:
1362 raise
1362 raise
1363 dd = 0
1363 dd = 0
1364
1364
1365 try:
1365 try:
1366 f = self.opener(self.indexfile)
1366 f = self.opener(self.indexfile)
1367 f.seek(0, 2)
1367 f.seek(0, 2)
1368 actual = f.tell()
1368 actual = f.tell()
1369 s = self._io.size
1369 s = self._io.size
1370 i = max(0, actual / s)
1370 i = max(0, actual / s)
1371 di = actual - (i * s)
1371 di = actual - (i * s)
1372 if self._inline:
1372 if self._inline:
1373 databytes = 0
1373 databytes = 0
1374 for r in self:
1374 for r in self:
1375 databytes += max(0, self.length(r))
1375 databytes += max(0, self.length(r))
1376 dd = 0
1376 dd = 0
1377 di = actual - len(self) * s - databytes
1377 di = actual - len(self) * s - databytes
1378 except IOError, inst:
1378 except IOError, inst:
1379 if inst.errno != errno.ENOENT:
1379 if inst.errno != errno.ENOENT:
1380 raise
1380 raise
1381 di = 0
1381 di = 0
1382
1382
1383 return (dd, di)
1383 return (dd, di)
1384
1384
1385 def files(self):
1385 def files(self):
1386 res = [ self.indexfile ]
1386 res = [ self.indexfile ]
1387 if not self._inline:
1387 if not self._inline:
1388 res.append(self.datafile)
1388 res.append(self.datafile)
1389 return res
1389 return res
General Comments 0
You need to be logged in to leave comments. Login now