##// END OF EJS Templates
revlog: speed up parents()
Matt Mackall -
r7363:9d28ff20 default
parent child Browse files
Show More
@@ -1,1369 +1,1370 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import bin, hex, nullid, nullrev, short
13 from node import bin, hex, nullid, nullrev, short
14 from i18n import _
14 from i18n import _
15 import changegroup, errno, ancestor, mdiff, parsers
15 import changegroup, errno, ancestor, mdiff, parsers
16 import struct, util, zlib
16 import struct, util, zlib
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = util.sha1
22 _sha = util.sha1
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 class RevlogError(Exception):
32 class RevlogError(Exception):
33 pass
33 pass
34
34
35 class LookupError(RevlogError, KeyError):
35 class LookupError(RevlogError, KeyError):
36 def __init__(self, name, index, message):
36 def __init__(self, name, index, message):
37 self.name = name
37 self.name = name
38 if isinstance(name, str) and len(name) == 20:
38 if isinstance(name, str) and len(name) == 20:
39 name = short(name)
39 name = short(name)
40 RevlogError.__init__(self, _('%s@%s: %s') % (index, name, message))
40 RevlogError.__init__(self, _('%s@%s: %s') % (index, name, message))
41
41
42 def __str__(self):
42 def __str__(self):
43 return RevlogError.__str__(self)
43 return RevlogError.__str__(self)
44
44
45 def getoffset(q):
45 def getoffset(q):
46 return int(q >> 16)
46 return int(q >> 16)
47
47
48 def gettype(q):
48 def gettype(q):
49 return int(q & 0xFFFF)
49 return int(q & 0xFFFF)
50
50
51 def offset_type(offset, type):
51 def offset_type(offset, type):
52 return long(long(offset) << 16 | type)
52 return long(long(offset) << 16 | type)
53
53
54 def hash(text, p1, p2):
54 def hash(text, p1, p2):
55 """generate a hash from the given text and its parent hashes
55 """generate a hash from the given text and its parent hashes
56
56
57 This hash combines both the current file contents and its history
57 This hash combines both the current file contents and its history
58 in a manner that makes it easy to distinguish nodes with the same
58 in a manner that makes it easy to distinguish nodes with the same
59 content in the revision graph.
59 content in the revision graph.
60 """
60 """
61 l = [p1, p2]
61 l = [p1, p2]
62 l.sort()
62 l.sort()
63 s = _sha(l[0])
63 s = _sha(l[0])
64 s.update(l[1])
64 s.update(l[1])
65 s.update(text)
65 s.update(text)
66 return s.digest()
66 return s.digest()
67
67
68 def compress(text):
68 def compress(text):
69 """ generate a possibly-compressed representation of text """
69 """ generate a possibly-compressed representation of text """
70 if not text:
70 if not text:
71 return ("", text)
71 return ("", text)
72 l = len(text)
72 l = len(text)
73 bin = None
73 bin = None
74 if l < 44:
74 if l < 44:
75 pass
75 pass
76 elif l > 1000000:
76 elif l > 1000000:
77 # zlib makes an internal copy, thus doubling memory usage for
77 # zlib makes an internal copy, thus doubling memory usage for
78 # large files, so lets do this in pieces
78 # large files, so lets do this in pieces
79 z = zlib.compressobj()
79 z = zlib.compressobj()
80 p = []
80 p = []
81 pos = 0
81 pos = 0
82 while pos < l:
82 while pos < l:
83 pos2 = pos + 2**20
83 pos2 = pos + 2**20
84 p.append(z.compress(text[pos:pos2]))
84 p.append(z.compress(text[pos:pos2]))
85 pos = pos2
85 pos = pos2
86 p.append(z.flush())
86 p.append(z.flush())
87 if sum(map(len, p)) < l:
87 if sum(map(len, p)) < l:
88 bin = "".join(p)
88 bin = "".join(p)
89 else:
89 else:
90 bin = _compress(text)
90 bin = _compress(text)
91 if bin is None or len(bin) > l:
91 if bin is None or len(bin) > l:
92 if text[0] == '\0':
92 if text[0] == '\0':
93 return ("", text)
93 return ("", text)
94 return ('u', text)
94 return ('u', text)
95 return ("", bin)
95 return ("", bin)
96
96
97 def decompress(bin):
97 def decompress(bin):
98 """ decompress the given input """
98 """ decompress the given input """
99 if not bin:
99 if not bin:
100 return bin
100 return bin
101 t = bin[0]
101 t = bin[0]
102 if t == '\0':
102 if t == '\0':
103 return bin
103 return bin
104 if t == 'x':
104 if t == 'x':
105 return _decompress(bin)
105 return _decompress(bin)
106 if t == 'u':
106 if t == 'u':
107 return bin[1:]
107 return bin[1:]
108 raise RevlogError(_("unknown compression type %r") % t)
108 raise RevlogError(_("unknown compression type %r") % t)
109
109
110 class lazyparser(object):
110 class lazyparser(object):
111 """
111 """
112 this class avoids the need to parse the entirety of large indices
112 this class avoids the need to parse the entirety of large indices
113 """
113 """
114
114
115 # lazyparser is not safe to use on windows if win32 extensions not
115 # lazyparser is not safe to use on windows if win32 extensions not
116 # available. it keeps file handle open, which make it not possible
116 # available. it keeps file handle open, which make it not possible
117 # to break hardlinks on local cloned repos.
117 # to break hardlinks on local cloned repos.
118
118
119 def __init__(self, dataf, size):
119 def __init__(self, dataf, size):
120 self.dataf = dataf
120 self.dataf = dataf
121 self.s = struct.calcsize(indexformatng)
121 self.s = struct.calcsize(indexformatng)
122 self.datasize = size
122 self.datasize = size
123 self.l = size/self.s
123 self.l = size/self.s
124 self.index = [None] * self.l
124 self.index = [None] * self.l
125 self.map = {nullid: nullrev}
125 self.map = {nullid: nullrev}
126 self.allmap = 0
126 self.allmap = 0
127 self.all = 0
127 self.all = 0
128 self.mapfind_count = 0
128 self.mapfind_count = 0
129
129
130 def loadmap(self):
130 def loadmap(self):
131 """
131 """
132 during a commit, we need to make sure the rev being added is
132 during a commit, we need to make sure the rev being added is
133 not a duplicate. This requires loading the entire index,
133 not a duplicate. This requires loading the entire index,
134 which is fairly slow. loadmap can load up just the node map,
134 which is fairly slow. loadmap can load up just the node map,
135 which takes much less time.
135 which takes much less time.
136 """
136 """
137 if self.allmap:
137 if self.allmap:
138 return
138 return
139 end = self.datasize
139 end = self.datasize
140 self.allmap = 1
140 self.allmap = 1
141 cur = 0
141 cur = 0
142 count = 0
142 count = 0
143 blocksize = self.s * 256
143 blocksize = self.s * 256
144 self.dataf.seek(0)
144 self.dataf.seek(0)
145 while cur < end:
145 while cur < end:
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 off = 0
147 off = 0
148 for x in xrange(256):
148 for x in xrange(256):
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
150 self.map[n] = count
150 self.map[n] = count
151 count += 1
151 count += 1
152 if count >= self.l:
152 if count >= self.l:
153 break
153 break
154 off += self.s
154 off += self.s
155 cur += blocksize
155 cur += blocksize
156
156
157 def loadblock(self, blockstart, blocksize, data=None):
157 def loadblock(self, blockstart, blocksize, data=None):
158 if self.all:
158 if self.all:
159 return
159 return
160 if data is None:
160 if data is None:
161 self.dataf.seek(blockstart)
161 self.dataf.seek(blockstart)
162 if blockstart + blocksize > self.datasize:
162 if blockstart + blocksize > self.datasize:
163 # the revlog may have grown since we've started running,
163 # the revlog may have grown since we've started running,
164 # but we don't have space in self.index for more entries.
164 # but we don't have space in self.index for more entries.
165 # limit blocksize so that we don't get too much data.
165 # limit blocksize so that we don't get too much data.
166 blocksize = max(self.datasize - blockstart, 0)
166 blocksize = max(self.datasize - blockstart, 0)
167 data = self.dataf.read(blocksize)
167 data = self.dataf.read(blocksize)
168 lend = len(data) / self.s
168 lend = len(data) / self.s
169 i = blockstart / self.s
169 i = blockstart / self.s
170 off = 0
170 off = 0
171 # lazyindex supports __delitem__
171 # lazyindex supports __delitem__
172 if lend > len(self.index) - i:
172 if lend > len(self.index) - i:
173 lend = len(self.index) - i
173 lend = len(self.index) - i
174 for x in xrange(lend):
174 for x in xrange(lend):
175 if self.index[i + x] == None:
175 if self.index[i + x] == None:
176 b = data[off : off + self.s]
176 b = data[off : off + self.s]
177 self.index[i + x] = b
177 self.index[i + x] = b
178 n = b[ngshaoffset:ngshaoffset + 20]
178 n = b[ngshaoffset:ngshaoffset + 20]
179 self.map[n] = i + x
179 self.map[n] = i + x
180 off += self.s
180 off += self.s
181
181
182 def findnode(self, node):
182 def findnode(self, node):
183 """search backwards through the index file for a specific node"""
183 """search backwards through the index file for a specific node"""
184 if self.allmap:
184 if self.allmap:
185 return None
185 return None
186
186
187 # hg log will cause many many searches for the manifest
187 # hg log will cause many many searches for the manifest
188 # nodes. After we get called a few times, just load the whole
188 # nodes. After we get called a few times, just load the whole
189 # thing.
189 # thing.
190 if self.mapfind_count > 8:
190 if self.mapfind_count > 8:
191 self.loadmap()
191 self.loadmap()
192 if node in self.map:
192 if node in self.map:
193 return node
193 return node
194 return None
194 return None
195 self.mapfind_count += 1
195 self.mapfind_count += 1
196 last = self.l - 1
196 last = self.l - 1
197 while self.index[last] != None:
197 while self.index[last] != None:
198 if last == 0:
198 if last == 0:
199 self.all = 1
199 self.all = 1
200 self.allmap = 1
200 self.allmap = 1
201 return None
201 return None
202 last -= 1
202 last -= 1
203 end = (last + 1) * self.s
203 end = (last + 1) * self.s
204 blocksize = self.s * 256
204 blocksize = self.s * 256
205 while end >= 0:
205 while end >= 0:
206 start = max(end - blocksize, 0)
206 start = max(end - blocksize, 0)
207 self.dataf.seek(start)
207 self.dataf.seek(start)
208 data = self.dataf.read(end - start)
208 data = self.dataf.read(end - start)
209 findend = end - start
209 findend = end - start
210 while True:
210 while True:
211 # we're searching backwards, so we have to make sure
211 # we're searching backwards, so we have to make sure
212 # we don't find a changeset where this node is a parent
212 # we don't find a changeset where this node is a parent
213 off = data.find(node, 0, findend)
213 off = data.find(node, 0, findend)
214 findend = off
214 findend = off
215 if off >= 0:
215 if off >= 0:
216 i = off / self.s
216 i = off / self.s
217 off = i * self.s
217 off = i * self.s
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
219 if n == node:
219 if n == node:
220 self.map[n] = i + start / self.s
220 self.map[n] = i + start / self.s
221 return node
221 return node
222 else:
222 else:
223 break
223 break
224 end -= blocksize
224 end -= blocksize
225 return None
225 return None
226
226
227 def loadindex(self, i=None, end=None):
227 def loadindex(self, i=None, end=None):
228 if self.all:
228 if self.all:
229 return
229 return
230 all = False
230 all = False
231 if i == None:
231 if i == None:
232 blockstart = 0
232 blockstart = 0
233 blocksize = (65536 / self.s) * self.s
233 blocksize = (65536 / self.s) * self.s
234 end = self.datasize
234 end = self.datasize
235 all = True
235 all = True
236 else:
236 else:
237 if end:
237 if end:
238 blockstart = i * self.s
238 blockstart = i * self.s
239 end = end * self.s
239 end = end * self.s
240 blocksize = end - blockstart
240 blocksize = end - blockstart
241 else:
241 else:
242 blockstart = (i & ~1023) * self.s
242 blockstart = (i & ~1023) * self.s
243 blocksize = self.s * 1024
243 blocksize = self.s * 1024
244 end = blockstart + blocksize
244 end = blockstart + blocksize
245 while blockstart < end:
245 while blockstart < end:
246 self.loadblock(blockstart, blocksize)
246 self.loadblock(blockstart, blocksize)
247 blockstart += blocksize
247 blockstart += blocksize
248 if all:
248 if all:
249 self.all = True
249 self.all = True
250
250
251 class lazyindex(object):
251 class lazyindex(object):
252 """a lazy version of the index array"""
252 """a lazy version of the index array"""
253 def __init__(self, parser):
253 def __init__(self, parser):
254 self.p = parser
254 self.p = parser
255 def __len__(self):
255 def __len__(self):
256 return len(self.p.index)
256 return len(self.p.index)
257 def load(self, pos):
257 def load(self, pos):
258 if pos < 0:
258 if pos < 0:
259 pos += len(self.p.index)
259 pos += len(self.p.index)
260 self.p.loadindex(pos)
260 self.p.loadindex(pos)
261 return self.p.index[pos]
261 return self.p.index[pos]
262 def __getitem__(self, pos):
262 def __getitem__(self, pos):
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
264 def __setitem__(self, pos, item):
264 def __setitem__(self, pos, item):
265 self.p.index[pos] = _pack(indexformatng, *item)
265 self.p.index[pos] = _pack(indexformatng, *item)
266 def __delitem__(self, pos):
266 def __delitem__(self, pos):
267 del self.p.index[pos]
267 del self.p.index[pos]
268 def insert(self, pos, e):
268 def insert(self, pos, e):
269 self.p.index.insert(pos, _pack(indexformatng, *e))
269 self.p.index.insert(pos, _pack(indexformatng, *e))
270 def append(self, e):
270 def append(self, e):
271 self.p.index.append(_pack(indexformatng, *e))
271 self.p.index.append(_pack(indexformatng, *e))
272
272
273 class lazymap(object):
273 class lazymap(object):
274 """a lazy version of the node map"""
274 """a lazy version of the node map"""
275 def __init__(self, parser):
275 def __init__(self, parser):
276 self.p = parser
276 self.p = parser
277 def load(self, key):
277 def load(self, key):
278 n = self.p.findnode(key)
278 n = self.p.findnode(key)
279 if n == None:
279 if n == None:
280 raise KeyError(key)
280 raise KeyError(key)
281 def __contains__(self, key):
281 def __contains__(self, key):
282 if key in self.p.map:
282 if key in self.p.map:
283 return True
283 return True
284 self.p.loadmap()
284 self.p.loadmap()
285 return key in self.p.map
285 return key in self.p.map
286 def __iter__(self):
286 def __iter__(self):
287 yield nullid
287 yield nullid
288 for i in xrange(self.p.l):
288 for i in xrange(self.p.l):
289 ret = self.p.index[i]
289 ret = self.p.index[i]
290 if not ret:
290 if not ret:
291 self.p.loadindex(i)
291 self.p.loadindex(i)
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if isinstance(ret, str):
293 if isinstance(ret, str):
294 ret = _unpack(indexformatng, ret)
294 ret = _unpack(indexformatng, ret)
295 yield ret[7]
295 yield ret[7]
296 def __getitem__(self, key):
296 def __getitem__(self, key):
297 try:
297 try:
298 return self.p.map[key]
298 return self.p.map[key]
299 except KeyError:
299 except KeyError:
300 try:
300 try:
301 self.load(key)
301 self.load(key)
302 return self.p.map[key]
302 return self.p.map[key]
303 except KeyError:
303 except KeyError:
304 raise KeyError("node " + hex(key))
304 raise KeyError("node " + hex(key))
305 def __setitem__(self, key, val):
305 def __setitem__(self, key, val):
306 self.p.map[key] = val
306 self.p.map[key] = val
307 def __delitem__(self, key):
307 def __delitem__(self, key):
308 del self.p.map[key]
308 del self.p.map[key]
309
309
310 indexformatv0 = ">4l20s20s20s"
310 indexformatv0 = ">4l20s20s20s"
311 v0shaoffset = 56
311 v0shaoffset = 56
312
312
313 class revlogoldio(object):
313 class revlogoldio(object):
314 def __init__(self):
314 def __init__(self):
315 self.size = struct.calcsize(indexformatv0)
315 self.size = struct.calcsize(indexformatv0)
316
316
317 def parseindex(self, fp, inline):
317 def parseindex(self, fp, inline):
318 s = self.size
318 s = self.size
319 index = []
319 index = []
320 nodemap = {nullid: nullrev}
320 nodemap = {nullid: nullrev}
321 n = off = 0
321 n = off = 0
322 data = fp.read()
322 data = fp.read()
323 l = len(data)
323 l = len(data)
324 while off + s <= l:
324 while off + s <= l:
325 cur = data[off:off + s]
325 cur = data[off:off + s]
326 off += s
326 off += s
327 e = _unpack(indexformatv0, cur)
327 e = _unpack(indexformatv0, cur)
328 # transform to revlogv1 format
328 # transform to revlogv1 format
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
331 index.append(e2)
331 index.append(e2)
332 nodemap[e[6]] = n
332 nodemap[e[6]] = n
333 n += 1
333 n += 1
334
334
335 return index, nodemap, None
335 return index, nodemap, None
336
336
337 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
339 node(entry[5]), node(entry[6]), entry[7])
339 node(entry[5]), node(entry[6]), entry[7])
340 return _pack(indexformatv0, *e2)
340 return _pack(indexformatv0, *e2)
341
341
342 # index ng:
342 # index ng:
343 # 6 bytes offset
343 # 6 bytes offset
344 # 2 bytes flags
344 # 2 bytes flags
345 # 4 bytes compressed length
345 # 4 bytes compressed length
346 # 4 bytes uncompressed length
346 # 4 bytes uncompressed length
347 # 4 bytes: base rev
347 # 4 bytes: base rev
348 # 4 bytes link rev
348 # 4 bytes link rev
349 # 4 bytes parent 1 rev
349 # 4 bytes parent 1 rev
350 # 4 bytes parent 2 rev
350 # 4 bytes parent 2 rev
351 # 32 bytes: nodeid
351 # 32 bytes: nodeid
352 indexformatng = ">Qiiiiii20s12x"
352 indexformatng = ">Qiiiiii20s12x"
353 ngshaoffset = 32
353 ngshaoffset = 32
354 versionformat = ">I"
354 versionformat = ">I"
355
355
356 class revlogio(object):
356 class revlogio(object):
357 def __init__(self):
357 def __init__(self):
358 self.size = struct.calcsize(indexformatng)
358 self.size = struct.calcsize(indexformatng)
359
359
360 def parseindex(self, fp, inline):
360 def parseindex(self, fp, inline):
361 try:
361 try:
362 size = util.fstat(fp).st_size
362 size = util.fstat(fp).st_size
363 except AttributeError:
363 except AttributeError:
364 size = 0
364 size = 0
365
365
366 if util.openhardlinks() and not inline and size > 1000000:
366 if util.openhardlinks() and not inline and size > 1000000:
367 # big index, let's parse it on demand
367 # big index, let's parse it on demand
368 parser = lazyparser(fp, size)
368 parser = lazyparser(fp, size)
369 index = lazyindex(parser)
369 index = lazyindex(parser)
370 nodemap = lazymap(parser)
370 nodemap = lazymap(parser)
371 e = list(index[0])
371 e = list(index[0])
372 type = gettype(e[0])
372 type = gettype(e[0])
373 e[0] = offset_type(0, type)
373 e[0] = offset_type(0, type)
374 index[0] = e
374 index[0] = e
375 return index, nodemap, None
375 return index, nodemap, None
376
376
377 data = fp.read()
377 data = fp.read()
378 # call the C implementation to parse the index data
378 # call the C implementation to parse the index data
379 index, nodemap, cache = parsers.parse_index(data, inline)
379 index, nodemap, cache = parsers.parse_index(data, inline)
380 return index, nodemap, cache
380 return index, nodemap, cache
381
381
382 def packentry(self, entry, node, version, rev):
382 def packentry(self, entry, node, version, rev):
383 p = _pack(indexformatng, *entry)
383 p = _pack(indexformatng, *entry)
384 if rev == 0:
384 if rev == 0:
385 p = _pack(versionformat, version) + p[4:]
385 p = _pack(versionformat, version) + p[4:]
386 return p
386 return p
387
387
388 class revlog(object):
388 class revlog(object):
389 """
389 """
390 the underlying revision storage object
390 the underlying revision storage object
391
391
392 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
393
393
394 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
395 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
396 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
397 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
398 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
399 data.
399 data.
400
400
401 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
402 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
403 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
404 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
405 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
406 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
407
407
408 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
409 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
410 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
411 for locking while reading.
411 for locking while reading.
412 """
412 """
413 def __init__(self, opener, indexfile):
413 def __init__(self, opener, indexfile):
414 """
414 """
415 create a revlog object
415 create a revlog object
416
416
417 opener is a function that abstracts the file opening operation
417 opener is a function that abstracts the file opening operation
418 and can be used to implement COW semantics or the like.
418 and can be used to implement COW semantics or the like.
419 """
419 """
420 self.indexfile = indexfile
420 self.indexfile = indexfile
421 self.datafile = indexfile[:-2] + ".d"
421 self.datafile = indexfile[:-2] + ".d"
422 self.opener = opener
422 self.opener = opener
423 self._cache = None
423 self._cache = None
424 self._chunkcache = None
424 self._chunkcache = None
425 self.nodemap = {nullid: nullrev}
425 self.nodemap = {nullid: nullrev}
426 self.index = []
426 self.index = []
427
427
428 v = REVLOG_DEFAULT_VERSION
428 v = REVLOG_DEFAULT_VERSION
429 if hasattr(opener, "defversion"):
429 if hasattr(opener, "defversion"):
430 v = opener.defversion
430 v = opener.defversion
431 if v & REVLOGNG:
431 if v & REVLOGNG:
432 v |= REVLOGNGINLINEDATA
432 v |= REVLOGNGINLINEDATA
433
433
434 i = ""
434 i = ""
435 try:
435 try:
436 f = self.opener(self.indexfile)
436 f = self.opener(self.indexfile)
437 i = f.read(4)
437 i = f.read(4)
438 f.seek(0)
438 f.seek(0)
439 if len(i) > 0:
439 if len(i) > 0:
440 v = struct.unpack(versionformat, i)[0]
440 v = struct.unpack(versionformat, i)[0]
441 except IOError, inst:
441 except IOError, inst:
442 if inst.errno != errno.ENOENT:
442 if inst.errno != errno.ENOENT:
443 raise
443 raise
444
444
445 self.version = v
445 self.version = v
446 self._inline = v & REVLOGNGINLINEDATA
446 self._inline = v & REVLOGNGINLINEDATA
447 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
448 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
449 if fmt == REVLOGV0 and flags:
449 if fmt == REVLOGV0 and flags:
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
451 % (self.indexfile, flags >> 16))
451 % (self.indexfile, flags >> 16))
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
454 % (self.indexfile, flags >> 16))
454 % (self.indexfile, flags >> 16))
455 elif fmt > REVLOGNG:
455 elif fmt > REVLOGNG:
456 raise RevlogError(_("index %s unknown format %d")
456 raise RevlogError(_("index %s unknown format %d")
457 % (self.indexfile, fmt))
457 % (self.indexfile, fmt))
458
458
459 self._io = revlogio()
459 self._io = revlogio()
460 if self.version == REVLOGV0:
460 if self.version == REVLOGV0:
461 self._io = revlogoldio()
461 self._io = revlogoldio()
462 if i:
462 if i:
463 d = self._io.parseindex(f, self._inline)
463 d = self._io.parseindex(f, self._inline)
464 self.index, self.nodemap, self._chunkcache = d
464 self.index, self.nodemap, self._chunkcache = d
465
465
466 # add the magic null revision at -1 (if it hasn't been done already)
466 # add the magic null revision at -1 (if it hasn't been done already)
467 if (self.index == [] or isinstance(self.index, lazyindex) or
467 if (self.index == [] or isinstance(self.index, lazyindex) or
468 self.index[-1][7] != nullid) :
468 self.index[-1][7] != nullid) :
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
469 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
470
470
471 def _loadindex(self, start, end):
471 def _loadindex(self, start, end):
472 """load a block of indexes all at once from the lazy parser"""
472 """load a block of indexes all at once from the lazy parser"""
473 if isinstance(self.index, lazyindex):
473 if isinstance(self.index, lazyindex):
474 self.index.p.loadindex(start, end)
474 self.index.p.loadindex(start, end)
475
475
476 def _loadindexmap(self):
476 def _loadindexmap(self):
477 """loads both the map and the index from the lazy parser"""
477 """loads both the map and the index from the lazy parser"""
478 if isinstance(self.index, lazyindex):
478 if isinstance(self.index, lazyindex):
479 p = self.index.p
479 p = self.index.p
480 p.loadindex()
480 p.loadindex()
481 self.nodemap = p.map
481 self.nodemap = p.map
482
482
483 def _loadmap(self):
483 def _loadmap(self):
484 """loads the map from the lazy parser"""
484 """loads the map from the lazy parser"""
485 if isinstance(self.nodemap, lazymap):
485 if isinstance(self.nodemap, lazymap):
486 self.nodemap.p.loadmap()
486 self.nodemap.p.loadmap()
487 self.nodemap = self.nodemap.p.map
487 self.nodemap = self.nodemap.p.map
488
488
489 def tip(self):
489 def tip(self):
490 return self.node(len(self.index) - 2)
490 return self.node(len(self.index) - 2)
491 def __len__(self):
491 def __len__(self):
492 return len(self.index) - 1
492 return len(self.index) - 1
493 def __iter__(self):
493 def __iter__(self):
494 for i in xrange(len(self)):
494 for i in xrange(len(self)):
495 yield i
495 yield i
496 def rev(self, node):
496 def rev(self, node):
497 try:
497 try:
498 return self.nodemap[node]
498 return self.nodemap[node]
499 except KeyError:
499 except KeyError:
500 raise LookupError(node, self.indexfile, _('no node'))
500 raise LookupError(node, self.indexfile, _('no node'))
501 def node(self, rev):
501 def node(self, rev):
502 return self.index[rev][7]
502 return self.index[rev][7]
503 def linkrev(self, rev):
503 def linkrev(self, rev):
504 return self.index[rev][4]
504 return self.index[rev][4]
505 def parents(self, node):
505 def parents(self, node):
506 d = self.index[self.rev(node)][5:7]
506 i = self.index
507 return (self.node(d[0]), self.node(d[1]))
507 d = i[self.rev(node)]
508 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
508 def parentrevs(self, rev):
509 def parentrevs(self, rev):
509 return self.index[rev][5:7]
510 return self.index[rev][5:7]
510 def start(self, rev):
511 def start(self, rev):
511 return int(self.index[rev][0] >> 16)
512 return int(self.index[rev][0] >> 16)
512 def end(self, rev):
513 def end(self, rev):
513 return self.start(rev) + self.length(rev)
514 return self.start(rev) + self.length(rev)
514 def length(self, rev):
515 def length(self, rev):
515 return self.index[rev][1]
516 return self.index[rev][1]
516 def base(self, rev):
517 def base(self, rev):
517 return self.index[rev][3]
518 return self.index[rev][3]
518
519
519 def size(self, rev):
520 def size(self, rev):
520 """return the length of the uncompressed text for a given revision"""
521 """return the length of the uncompressed text for a given revision"""
521 l = self.index[rev][2]
522 l = self.index[rev][2]
522 if l >= 0:
523 if l >= 0:
523 return l
524 return l
524
525
525 t = self.revision(self.node(rev))
526 t = self.revision(self.node(rev))
526 return len(t)
527 return len(t)
527
528
528 # alternate implementation, The advantage to this code is it
529 # alternate implementation, The advantage to this code is it
529 # will be faster for a single revision. But, the results are not
530 # will be faster for a single revision. But, the results are not
530 # cached, so finding the size of every revision will be slower.
531 # cached, so finding the size of every revision will be slower.
531 """
532 """
532 if self.cache and self.cache[1] == rev:
533 if self.cache and self.cache[1] == rev:
533 return len(self.cache[2])
534 return len(self.cache[2])
534
535
535 base = self.base(rev)
536 base = self.base(rev)
536 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
537 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
537 base = self.cache[1]
538 base = self.cache[1]
538 text = self.cache[2]
539 text = self.cache[2]
539 else:
540 else:
540 text = self.revision(self.node(base))
541 text = self.revision(self.node(base))
541
542
542 l = len(text)
543 l = len(text)
543 for x in xrange(base + 1, rev + 1):
544 for x in xrange(base + 1, rev + 1):
544 l = mdiff.patchedsize(l, self.chunk(x))
545 l = mdiff.patchedsize(l, self.chunk(x))
545 return l
546 return l
546 """
547 """
547
548
548 def reachable(self, node, stop=None):
549 def reachable(self, node, stop=None):
549 """return a hash of all nodes ancestral to a given node, including
550 """return a hash of all nodes ancestral to a given node, including
550 the node itself, stopping when stop is matched"""
551 the node itself, stopping when stop is matched"""
551 reachable = {}
552 reachable = {}
552 visit = [node]
553 visit = [node]
553 reachable[node] = 1
554 reachable[node] = 1
554 if stop:
555 if stop:
555 stopn = self.rev(stop)
556 stopn = self.rev(stop)
556 else:
557 else:
557 stopn = 0
558 stopn = 0
558 while visit:
559 while visit:
559 n = visit.pop(0)
560 n = visit.pop(0)
560 if n == stop:
561 if n == stop:
561 continue
562 continue
562 if n == nullid:
563 if n == nullid:
563 continue
564 continue
564 for p in self.parents(n):
565 for p in self.parents(n):
565 if self.rev(p) < stopn:
566 if self.rev(p) < stopn:
566 continue
567 continue
567 if p not in reachable:
568 if p not in reachable:
568 reachable[p] = 1
569 reachable[p] = 1
569 visit.append(p)
570 visit.append(p)
570 return reachable
571 return reachable
571
572
572 def ancestors(self, *revs):
573 def ancestors(self, *revs):
573 'Generate the ancestors of revs using a breadth-first visit'
574 'Generate the ancestors of revs using a breadth-first visit'
574 visit = list(revs)
575 visit = list(revs)
575 seen = util.set([nullrev])
576 seen = util.set([nullrev])
576 while visit:
577 while visit:
577 for parent in self.parentrevs(visit.pop(0)):
578 for parent in self.parentrevs(visit.pop(0)):
578 if parent not in seen:
579 if parent not in seen:
579 visit.append(parent)
580 visit.append(parent)
580 seen.add(parent)
581 seen.add(parent)
581 yield parent
582 yield parent
582
583
583 def descendants(self, *revs):
584 def descendants(self, *revs):
584 'Generate the descendants of revs in topological order'
585 'Generate the descendants of revs in topological order'
585 seen = util.set(revs)
586 seen = util.set(revs)
586 for i in xrange(min(revs) + 1, len(self)):
587 for i in xrange(min(revs) + 1, len(self)):
587 for x in self.parentrevs(i):
588 for x in self.parentrevs(i):
588 if x != nullrev and x in seen:
589 if x != nullrev and x in seen:
589 seen.add(i)
590 seen.add(i)
590 yield i
591 yield i
591 break
592 break
592
593
593 def findmissing(self, common=None, heads=None):
594 def findmissing(self, common=None, heads=None):
594 '''
595 '''
595 returns the topologically sorted list of nodes from the set:
596 returns the topologically sorted list of nodes from the set:
596 missing = (ancestors(heads) \ ancestors(common))
597 missing = (ancestors(heads) \ ancestors(common))
597
598
598 where ancestors() is the set of ancestors from heads, heads included
599 where ancestors() is the set of ancestors from heads, heads included
599
600
600 if heads is None, the heads of the revlog are used
601 if heads is None, the heads of the revlog are used
601 if common is None, nullid is assumed to be a common node
602 if common is None, nullid is assumed to be a common node
602 '''
603 '''
603 if common is None:
604 if common is None:
604 common = [nullid]
605 common = [nullid]
605 if heads is None:
606 if heads is None:
606 heads = self.heads()
607 heads = self.heads()
607
608
608 common = [self.rev(n) for n in common]
609 common = [self.rev(n) for n in common]
609 heads = [self.rev(n) for n in heads]
610 heads = [self.rev(n) for n in heads]
610
611
611 # we want the ancestors, but inclusive
612 # we want the ancestors, but inclusive
612 has = dict.fromkeys(self.ancestors(*common))
613 has = dict.fromkeys(self.ancestors(*common))
613 has[nullrev] = None
614 has[nullrev] = None
614 for r in common:
615 for r in common:
615 has[r] = None
616 has[r] = None
616
617
617 # take all ancestors from heads that aren't in has
618 # take all ancestors from heads that aren't in has
618 missing = {}
619 missing = {}
619 visit = [r for r in heads if r not in has]
620 visit = [r for r in heads if r not in has]
620 while visit:
621 while visit:
621 r = visit.pop(0)
622 r = visit.pop(0)
622 if r in missing:
623 if r in missing:
623 continue
624 continue
624 else:
625 else:
625 missing[r] = None
626 missing[r] = None
626 for p in self.parentrevs(r):
627 for p in self.parentrevs(r):
627 if p not in has:
628 if p not in has:
628 visit.append(p)
629 visit.append(p)
629 missing = missing.keys()
630 missing = missing.keys()
630 missing.sort()
631 missing.sort()
631 return [self.node(r) for r in missing]
632 return [self.node(r) for r in missing]
632
633
633 def nodesbetween(self, roots=None, heads=None):
634 def nodesbetween(self, roots=None, heads=None):
634 """Return a tuple containing three elements. Elements 1 and 2 contain
635 """Return a tuple containing three elements. Elements 1 and 2 contain
635 a final list bases and heads after all the unreachable ones have been
636 a final list bases and heads after all the unreachable ones have been
636 pruned. Element 0 contains a topologically sorted list of all
637 pruned. Element 0 contains a topologically sorted list of all
637
638
638 nodes that satisfy these constraints:
639 nodes that satisfy these constraints:
639 1. All nodes must be descended from a node in roots (the nodes on
640 1. All nodes must be descended from a node in roots (the nodes on
640 roots are considered descended from themselves).
641 roots are considered descended from themselves).
641 2. All nodes must also be ancestors of a node in heads (the nodes in
642 2. All nodes must also be ancestors of a node in heads (the nodes in
642 heads are considered to be their own ancestors).
643 heads are considered to be their own ancestors).
643
644
644 If roots is unspecified, nullid is assumed as the only root.
645 If roots is unspecified, nullid is assumed as the only root.
645 If heads is unspecified, it is taken to be the output of the
646 If heads is unspecified, it is taken to be the output of the
646 heads method (i.e. a list of all nodes in the repository that
647 heads method (i.e. a list of all nodes in the repository that
647 have no children)."""
648 have no children)."""
648 nonodes = ([], [], [])
649 nonodes = ([], [], [])
649 if roots is not None:
650 if roots is not None:
650 roots = list(roots)
651 roots = list(roots)
651 if not roots:
652 if not roots:
652 return nonodes
653 return nonodes
653 lowestrev = min([self.rev(n) for n in roots])
654 lowestrev = min([self.rev(n) for n in roots])
654 else:
655 else:
655 roots = [nullid] # Everybody's a descendent of nullid
656 roots = [nullid] # Everybody's a descendent of nullid
656 lowestrev = nullrev
657 lowestrev = nullrev
657 if (lowestrev == nullrev) and (heads is None):
658 if (lowestrev == nullrev) and (heads is None):
658 # We want _all_ the nodes!
659 # We want _all_ the nodes!
659 return ([self.node(r) for r in self], [nullid], list(self.heads()))
660 return ([self.node(r) for r in self], [nullid], list(self.heads()))
660 if heads is None:
661 if heads is None:
661 # All nodes are ancestors, so the latest ancestor is the last
662 # All nodes are ancestors, so the latest ancestor is the last
662 # node.
663 # node.
663 highestrev = len(self) - 1
664 highestrev = len(self) - 1
664 # Set ancestors to None to signal that every node is an ancestor.
665 # Set ancestors to None to signal that every node is an ancestor.
665 ancestors = None
666 ancestors = None
666 # Set heads to an empty dictionary for later discovery of heads
667 # Set heads to an empty dictionary for later discovery of heads
667 heads = {}
668 heads = {}
668 else:
669 else:
669 heads = list(heads)
670 heads = list(heads)
670 if not heads:
671 if not heads:
671 return nonodes
672 return nonodes
672 ancestors = {}
673 ancestors = {}
673 # Turn heads into a dictionary so we can remove 'fake' heads.
674 # Turn heads into a dictionary so we can remove 'fake' heads.
674 # Also, later we will be using it to filter out the heads we can't
675 # Also, later we will be using it to filter out the heads we can't
675 # find from roots.
676 # find from roots.
676 heads = dict.fromkeys(heads, 0)
677 heads = dict.fromkeys(heads, 0)
677 # Start at the top and keep marking parents until we're done.
678 # Start at the top and keep marking parents until we're done.
678 nodestotag = heads.keys()
679 nodestotag = heads.keys()
679 # Remember where the top was so we can use it as a limit later.
680 # Remember where the top was so we can use it as a limit later.
680 highestrev = max([self.rev(n) for n in nodestotag])
681 highestrev = max([self.rev(n) for n in nodestotag])
681 while nodestotag:
682 while nodestotag:
682 # grab a node to tag
683 # grab a node to tag
683 n = nodestotag.pop()
684 n = nodestotag.pop()
684 # Never tag nullid
685 # Never tag nullid
685 if n == nullid:
686 if n == nullid:
686 continue
687 continue
687 # A node's revision number represents its place in a
688 # A node's revision number represents its place in a
688 # topologically sorted list of nodes.
689 # topologically sorted list of nodes.
689 r = self.rev(n)
690 r = self.rev(n)
690 if r >= lowestrev:
691 if r >= lowestrev:
691 if n not in ancestors:
692 if n not in ancestors:
692 # If we are possibly a descendent of one of the roots
693 # If we are possibly a descendent of one of the roots
693 # and we haven't already been marked as an ancestor
694 # and we haven't already been marked as an ancestor
694 ancestors[n] = 1 # Mark as ancestor
695 ancestors[n] = 1 # Mark as ancestor
695 # Add non-nullid parents to list of nodes to tag.
696 # Add non-nullid parents to list of nodes to tag.
696 nodestotag.extend([p for p in self.parents(n) if
697 nodestotag.extend([p for p in self.parents(n) if
697 p != nullid])
698 p != nullid])
698 elif n in heads: # We've seen it before, is it a fake head?
699 elif n in heads: # We've seen it before, is it a fake head?
699 # So it is, real heads should not be the ancestors of
700 # So it is, real heads should not be the ancestors of
700 # any other heads.
701 # any other heads.
701 heads.pop(n)
702 heads.pop(n)
702 if not ancestors:
703 if not ancestors:
703 return nonodes
704 return nonodes
704 # Now that we have our set of ancestors, we want to remove any
705 # Now that we have our set of ancestors, we want to remove any
705 # roots that are not ancestors.
706 # roots that are not ancestors.
706
707
707 # If one of the roots was nullid, everything is included anyway.
708 # If one of the roots was nullid, everything is included anyway.
708 if lowestrev > nullrev:
709 if lowestrev > nullrev:
709 # But, since we weren't, let's recompute the lowest rev to not
710 # But, since we weren't, let's recompute the lowest rev to not
710 # include roots that aren't ancestors.
711 # include roots that aren't ancestors.
711
712
712 # Filter out roots that aren't ancestors of heads
713 # Filter out roots that aren't ancestors of heads
713 roots = [n for n in roots if n in ancestors]
714 roots = [n for n in roots if n in ancestors]
714 # Recompute the lowest revision
715 # Recompute the lowest revision
715 if roots:
716 if roots:
716 lowestrev = min([self.rev(n) for n in roots])
717 lowestrev = min([self.rev(n) for n in roots])
717 else:
718 else:
718 # No more roots? Return empty list
719 # No more roots? Return empty list
719 return nonodes
720 return nonodes
720 else:
721 else:
721 # We are descending from nullid, and don't need to care about
722 # We are descending from nullid, and don't need to care about
722 # any other roots.
723 # any other roots.
723 lowestrev = nullrev
724 lowestrev = nullrev
724 roots = [nullid]
725 roots = [nullid]
725 # Transform our roots list into a 'set' (i.e. a dictionary where the
726 # Transform our roots list into a 'set' (i.e. a dictionary where the
726 # values don't matter.
727 # values don't matter.
727 descendents = dict.fromkeys(roots, 1)
728 descendents = dict.fromkeys(roots, 1)
728 # Also, keep the original roots so we can filter out roots that aren't
729 # Also, keep the original roots so we can filter out roots that aren't
729 # 'real' roots (i.e. are descended from other roots).
730 # 'real' roots (i.e. are descended from other roots).
730 roots = descendents.copy()
731 roots = descendents.copy()
731 # Our topologically sorted list of output nodes.
732 # Our topologically sorted list of output nodes.
732 orderedout = []
733 orderedout = []
733 # Don't start at nullid since we don't want nullid in our output list,
734 # Don't start at nullid since we don't want nullid in our output list,
734 # and if nullid shows up in descedents, empty parents will look like
735 # and if nullid shows up in descedents, empty parents will look like
735 # they're descendents.
736 # they're descendents.
736 for r in xrange(max(lowestrev, 0), highestrev + 1):
737 for r in xrange(max(lowestrev, 0), highestrev + 1):
737 n = self.node(r)
738 n = self.node(r)
738 isdescendent = False
739 isdescendent = False
739 if lowestrev == nullrev: # Everybody is a descendent of nullid
740 if lowestrev == nullrev: # Everybody is a descendent of nullid
740 isdescendent = True
741 isdescendent = True
741 elif n in descendents:
742 elif n in descendents:
742 # n is already a descendent
743 # n is already a descendent
743 isdescendent = True
744 isdescendent = True
744 # This check only needs to be done here because all the roots
745 # This check only needs to be done here because all the roots
745 # will start being marked is descendents before the loop.
746 # will start being marked is descendents before the loop.
746 if n in roots:
747 if n in roots:
747 # If n was a root, check if it's a 'real' root.
748 # If n was a root, check if it's a 'real' root.
748 p = tuple(self.parents(n))
749 p = tuple(self.parents(n))
749 # If any of its parents are descendents, it's not a root.
750 # If any of its parents are descendents, it's not a root.
750 if (p[0] in descendents) or (p[1] in descendents):
751 if (p[0] in descendents) or (p[1] in descendents):
751 roots.pop(n)
752 roots.pop(n)
752 else:
753 else:
753 p = tuple(self.parents(n))
754 p = tuple(self.parents(n))
754 # A node is a descendent if either of its parents are
755 # A node is a descendent if either of its parents are
755 # descendents. (We seeded the dependents list with the roots
756 # descendents. (We seeded the dependents list with the roots
756 # up there, remember?)
757 # up there, remember?)
757 if (p[0] in descendents) or (p[1] in descendents):
758 if (p[0] in descendents) or (p[1] in descendents):
758 descendents[n] = 1
759 descendents[n] = 1
759 isdescendent = True
760 isdescendent = True
760 if isdescendent and ((ancestors is None) or (n in ancestors)):
761 if isdescendent and ((ancestors is None) or (n in ancestors)):
761 # Only include nodes that are both descendents and ancestors.
762 # Only include nodes that are both descendents and ancestors.
762 orderedout.append(n)
763 orderedout.append(n)
763 if (ancestors is not None) and (n in heads):
764 if (ancestors is not None) and (n in heads):
764 # We're trying to figure out which heads are reachable
765 # We're trying to figure out which heads are reachable
765 # from roots.
766 # from roots.
766 # Mark this head as having been reached
767 # Mark this head as having been reached
767 heads[n] = 1
768 heads[n] = 1
768 elif ancestors is None:
769 elif ancestors is None:
769 # Otherwise, we're trying to discover the heads.
770 # Otherwise, we're trying to discover the heads.
770 # Assume this is a head because if it isn't, the next step
771 # Assume this is a head because if it isn't, the next step
771 # will eventually remove it.
772 # will eventually remove it.
772 heads[n] = 1
773 heads[n] = 1
773 # But, obviously its parents aren't.
774 # But, obviously its parents aren't.
774 for p in self.parents(n):
775 for p in self.parents(n):
775 heads.pop(p, None)
776 heads.pop(p, None)
776 heads = [n for n in heads.iterkeys() if heads[n] != 0]
777 heads = [n for n in heads.iterkeys() if heads[n] != 0]
777 roots = roots.keys()
778 roots = roots.keys()
778 assert orderedout
779 assert orderedout
779 assert roots
780 assert roots
780 assert heads
781 assert heads
781 return (orderedout, roots, heads)
782 return (orderedout, roots, heads)
782
783
783 def heads(self, start=None, stop=None):
784 def heads(self, start=None, stop=None):
784 """return the list of all nodes that have no children
785 """return the list of all nodes that have no children
785
786
786 if start is specified, only heads that are descendants of
787 if start is specified, only heads that are descendants of
787 start will be returned
788 start will be returned
788 if stop is specified, it will consider all the revs from stop
789 if stop is specified, it will consider all the revs from stop
789 as if they had no children
790 as if they had no children
790 """
791 """
791 if start is None and stop is None:
792 if start is None and stop is None:
792 count = len(self)
793 count = len(self)
793 if not count:
794 if not count:
794 return [nullid]
795 return [nullid]
795 ishead = [1] * (count + 1)
796 ishead = [1] * (count + 1)
796 index = self.index
797 index = self.index
797 for r in xrange(count):
798 for r in xrange(count):
798 e = index[r]
799 e = index[r]
799 ishead[e[5]] = ishead[e[6]] = 0
800 ishead[e[5]] = ishead[e[6]] = 0
800 return [self.node(r) for r in xrange(count) if ishead[r]]
801 return [self.node(r) for r in xrange(count) if ishead[r]]
801
802
802 if start is None:
803 if start is None:
803 start = nullid
804 start = nullid
804 if stop is None:
805 if stop is None:
805 stop = []
806 stop = []
806 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
807 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
807 startrev = self.rev(start)
808 startrev = self.rev(start)
808 reachable = {startrev: 1}
809 reachable = {startrev: 1}
809 heads = {startrev: 1}
810 heads = {startrev: 1}
810
811
811 parentrevs = self.parentrevs
812 parentrevs = self.parentrevs
812 for r in xrange(startrev + 1, len(self)):
813 for r in xrange(startrev + 1, len(self)):
813 for p in parentrevs(r):
814 for p in parentrevs(r):
814 if p in reachable:
815 if p in reachable:
815 if r not in stoprevs:
816 if r not in stoprevs:
816 reachable[r] = 1
817 reachable[r] = 1
817 heads[r] = 1
818 heads[r] = 1
818 if p in heads and p not in stoprevs:
819 if p in heads and p not in stoprevs:
819 del heads[p]
820 del heads[p]
820
821
821 return [self.node(r) for r in heads]
822 return [self.node(r) for r in heads]
822
823
823 def children(self, node):
824 def children(self, node):
824 """find the children of a given node"""
825 """find the children of a given node"""
825 c = []
826 c = []
826 p = self.rev(node)
827 p = self.rev(node)
827 for r in range(p + 1, len(self)):
828 for r in range(p + 1, len(self)):
828 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
829 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
829 if prevs:
830 if prevs:
830 for pr in prevs:
831 for pr in prevs:
831 if pr == p:
832 if pr == p:
832 c.append(self.node(r))
833 c.append(self.node(r))
833 elif p == nullrev:
834 elif p == nullrev:
834 c.append(self.node(r))
835 c.append(self.node(r))
835 return c
836 return c
836
837
837 def _match(self, id):
838 def _match(self, id):
838 if isinstance(id, (long, int)):
839 if isinstance(id, (long, int)):
839 # rev
840 # rev
840 return self.node(id)
841 return self.node(id)
841 if len(id) == 20:
842 if len(id) == 20:
842 # possibly a binary node
843 # possibly a binary node
843 # odds of a binary node being all hex in ASCII are 1 in 10**25
844 # odds of a binary node being all hex in ASCII are 1 in 10**25
844 try:
845 try:
845 node = id
846 node = id
846 r = self.rev(node) # quick search the index
847 r = self.rev(node) # quick search the index
847 return node
848 return node
848 except LookupError:
849 except LookupError:
849 pass # may be partial hex id
850 pass # may be partial hex id
850 try:
851 try:
851 # str(rev)
852 # str(rev)
852 rev = int(id)
853 rev = int(id)
853 if str(rev) != id:
854 if str(rev) != id:
854 raise ValueError
855 raise ValueError
855 if rev < 0:
856 if rev < 0:
856 rev = len(self) + rev
857 rev = len(self) + rev
857 if rev < 0 or rev >= len(self):
858 if rev < 0 or rev >= len(self):
858 raise ValueError
859 raise ValueError
859 return self.node(rev)
860 return self.node(rev)
860 except (ValueError, OverflowError):
861 except (ValueError, OverflowError):
861 pass
862 pass
862 if len(id) == 40:
863 if len(id) == 40:
863 try:
864 try:
864 # a full hex nodeid?
865 # a full hex nodeid?
865 node = bin(id)
866 node = bin(id)
866 r = self.rev(node)
867 r = self.rev(node)
867 return node
868 return node
868 except (TypeError, LookupError):
869 except (TypeError, LookupError):
869 pass
870 pass
870
871
871 def _partialmatch(self, id):
872 def _partialmatch(self, id):
872 if len(id) < 40:
873 if len(id) < 40:
873 try:
874 try:
874 # hex(node)[:...]
875 # hex(node)[:...]
875 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
876 bin_id = bin(id[:len(id) & ~1]) # grab an even number of digits
876 node = None
877 node = None
877 for n in self.nodemap:
878 for n in self.nodemap:
878 if n.startswith(bin_id) and hex(n).startswith(id):
879 if n.startswith(bin_id) and hex(n).startswith(id):
879 if node is not None:
880 if node is not None:
880 raise LookupError(id, self.indexfile,
881 raise LookupError(id, self.indexfile,
881 _('ambiguous identifier'))
882 _('ambiguous identifier'))
882 node = n
883 node = n
883 if node is not None:
884 if node is not None:
884 return node
885 return node
885 except TypeError:
886 except TypeError:
886 pass
887 pass
887
888
888 def lookup(self, id):
889 def lookup(self, id):
889 """locate a node based on:
890 """locate a node based on:
890 - revision number or str(revision number)
891 - revision number or str(revision number)
891 - nodeid or subset of hex nodeid
892 - nodeid or subset of hex nodeid
892 """
893 """
893 n = self._match(id)
894 n = self._match(id)
894 if n is not None:
895 if n is not None:
895 return n
896 return n
896 n = self._partialmatch(id)
897 n = self._partialmatch(id)
897 if n:
898 if n:
898 return n
899 return n
899
900
900 raise LookupError(id, self.indexfile, _('no match found'))
901 raise LookupError(id, self.indexfile, _('no match found'))
901
902
902 def cmp(self, node, text):
903 def cmp(self, node, text):
903 """compare text with a given file revision"""
904 """compare text with a given file revision"""
904 p1, p2 = self.parents(node)
905 p1, p2 = self.parents(node)
905 return hash(text, p1, p2) != node
906 return hash(text, p1, p2) != node
906
907
907 def chunk(self, rev, df=None):
908 def chunk(self, rev, df=None):
908 def loadcache(df):
909 def loadcache(df):
909 if not df:
910 if not df:
910 if self._inline:
911 if self._inline:
911 df = self.opener(self.indexfile)
912 df = self.opener(self.indexfile)
912 else:
913 else:
913 df = self.opener(self.datafile)
914 df = self.opener(self.datafile)
914 df.seek(start)
915 df.seek(start)
915 self._chunkcache = (start, df.read(cache_length))
916 self._chunkcache = (start, df.read(cache_length))
916
917
917 start, length = self.start(rev), self.length(rev)
918 start, length = self.start(rev), self.length(rev)
918 if self._inline:
919 if self._inline:
919 start += (rev + 1) * self._io.size
920 start += (rev + 1) * self._io.size
920 end = start + length
921 end = start + length
921
922
922 offset = 0
923 offset = 0
923 if not self._chunkcache:
924 if not self._chunkcache:
924 cache_length = max(65536, length)
925 cache_length = max(65536, length)
925 loadcache(df)
926 loadcache(df)
926 else:
927 else:
927 cache_start = self._chunkcache[0]
928 cache_start = self._chunkcache[0]
928 cache_length = len(self._chunkcache[1])
929 cache_length = len(self._chunkcache[1])
929 cache_end = cache_start + cache_length
930 cache_end = cache_start + cache_length
930 if start >= cache_start and end <= cache_end:
931 if start >= cache_start and end <= cache_end:
931 # it is cached
932 # it is cached
932 offset = start - cache_start
933 offset = start - cache_start
933 else:
934 else:
934 cache_length = max(65536, length)
935 cache_length = max(65536, length)
935 loadcache(df)
936 loadcache(df)
936
937
937 # avoid copying large chunks
938 # avoid copying large chunks
938 c = self._chunkcache[1]
939 c = self._chunkcache[1]
939 if cache_length != length:
940 if cache_length != length:
940 c = c[offset:offset + length]
941 c = c[offset:offset + length]
941
942
942 return decompress(c)
943 return decompress(c)
943
944
944 def revdiff(self, rev1, rev2):
945 def revdiff(self, rev1, rev2):
945 """return or calculate a delta between two revisions"""
946 """return or calculate a delta between two revisions"""
946 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
947 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
947 return self.chunk(rev2)
948 return self.chunk(rev2)
948
949
949 return mdiff.textdiff(self.revision(self.node(rev1)),
950 return mdiff.textdiff(self.revision(self.node(rev1)),
950 self.revision(self.node(rev2)))
951 self.revision(self.node(rev2)))
951
952
952 def revision(self, node):
953 def revision(self, node):
953 """return an uncompressed revision of a given node"""
954 """return an uncompressed revision of a given node"""
954 if node == nullid:
955 if node == nullid:
955 return ""
956 return ""
956 if self._cache and self._cache[0] == node:
957 if self._cache and self._cache[0] == node:
957 return str(self._cache[2])
958 return str(self._cache[2])
958
959
959 # look up what we need to read
960 # look up what we need to read
960 text = None
961 text = None
961 rev = self.rev(node)
962 rev = self.rev(node)
962 base = self.base(rev)
963 base = self.base(rev)
963
964
964 # check rev flags
965 # check rev flags
965 if self.index[rev][0] & 0xFFFF:
966 if self.index[rev][0] & 0xFFFF:
966 raise RevlogError(_('incompatible revision flag %x') %
967 raise RevlogError(_('incompatible revision flag %x') %
967 (self.index[rev][0] & 0xFFFF))
968 (self.index[rev][0] & 0xFFFF))
968
969
969 df = None
970 df = None
970
971
971 # do we have useful data cached?
972 # do we have useful data cached?
972 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
973 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
973 base = self._cache[1]
974 base = self._cache[1]
974 text = str(self._cache[2])
975 text = str(self._cache[2])
975 self._loadindex(base, rev + 1)
976 self._loadindex(base, rev + 1)
976 if not self._inline and rev > base + 1:
977 if not self._inline and rev > base + 1:
977 df = self.opener(self.datafile)
978 df = self.opener(self.datafile)
978 else:
979 else:
979 self._loadindex(base, rev + 1)
980 self._loadindex(base, rev + 1)
980 if not self._inline and rev > base:
981 if not self._inline and rev > base:
981 df = self.opener(self.datafile)
982 df = self.opener(self.datafile)
982 text = self.chunk(base, df=df)
983 text = self.chunk(base, df=df)
983
984
984 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
985 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
985 text = mdiff.patches(text, bins)
986 text = mdiff.patches(text, bins)
986 p1, p2 = self.parents(node)
987 p1, p2 = self.parents(node)
987 if node != hash(text, p1, p2):
988 if node != hash(text, p1, p2):
988 raise RevlogError(_("integrity check failed on %s:%d")
989 raise RevlogError(_("integrity check failed on %s:%d")
989 % (self.datafile, rev))
990 % (self.datafile, rev))
990
991
991 self._cache = (node, rev, text)
992 self._cache = (node, rev, text)
992 return text
993 return text
993
994
994 def checkinlinesize(self, tr, fp=None):
995 def checkinlinesize(self, tr, fp=None):
995 if not self._inline:
996 if not self._inline:
996 return
997 return
997 if not fp:
998 if not fp:
998 fp = self.opener(self.indexfile, 'r')
999 fp = self.opener(self.indexfile, 'r')
999 fp.seek(0, 2)
1000 fp.seek(0, 2)
1000 size = fp.tell()
1001 size = fp.tell()
1001 if size < 131072:
1002 if size < 131072:
1002 return
1003 return
1003 trinfo = tr.find(self.indexfile)
1004 trinfo = tr.find(self.indexfile)
1004 if trinfo == None:
1005 if trinfo == None:
1005 raise RevlogError(_("%s not found in the transaction")
1006 raise RevlogError(_("%s not found in the transaction")
1006 % self.indexfile)
1007 % self.indexfile)
1007
1008
1008 trindex = trinfo[2]
1009 trindex = trinfo[2]
1009 dataoff = self.start(trindex)
1010 dataoff = self.start(trindex)
1010
1011
1011 tr.add(self.datafile, dataoff)
1012 tr.add(self.datafile, dataoff)
1012 df = self.opener(self.datafile, 'w')
1013 df = self.opener(self.datafile, 'w')
1013 try:
1014 try:
1014 calc = self._io.size
1015 calc = self._io.size
1015 for r in self:
1016 for r in self:
1016 start = self.start(r) + (r + 1) * calc
1017 start = self.start(r) + (r + 1) * calc
1017 length = self.length(r)
1018 length = self.length(r)
1018 fp.seek(start)
1019 fp.seek(start)
1019 d = fp.read(length)
1020 d = fp.read(length)
1020 df.write(d)
1021 df.write(d)
1021 finally:
1022 finally:
1022 df.close()
1023 df.close()
1023
1024
1024 fp.close()
1025 fp.close()
1025 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1026 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1026 self.version &= ~(REVLOGNGINLINEDATA)
1027 self.version &= ~(REVLOGNGINLINEDATA)
1027 self._inline = False
1028 self._inline = False
1028 for i in self:
1029 for i in self:
1029 e = self._io.packentry(self.index[i], self.node, self.version, i)
1030 e = self._io.packentry(self.index[i], self.node, self.version, i)
1030 fp.write(e)
1031 fp.write(e)
1031
1032
1032 # if we don't call rename, the temp file will never replace the
1033 # if we don't call rename, the temp file will never replace the
1033 # real index
1034 # real index
1034 fp.rename()
1035 fp.rename()
1035
1036
1036 tr.replace(self.indexfile, trindex * calc)
1037 tr.replace(self.indexfile, trindex * calc)
1037 self._chunkcache = None
1038 self._chunkcache = None
1038
1039
1039 def addrevision(self, text, transaction, link, p1, p2, d=None):
1040 def addrevision(self, text, transaction, link, p1, p2, d=None):
1040 """add a revision to the log
1041 """add a revision to the log
1041
1042
1042 text - the revision data to add
1043 text - the revision data to add
1043 transaction - the transaction object used for rollback
1044 transaction - the transaction object used for rollback
1044 link - the linkrev data to add
1045 link - the linkrev data to add
1045 p1, p2 - the parent nodeids of the revision
1046 p1, p2 - the parent nodeids of the revision
1046 d - an optional precomputed delta
1047 d - an optional precomputed delta
1047 """
1048 """
1048 dfh = None
1049 dfh = None
1049 if not self._inline:
1050 if not self._inline:
1050 dfh = self.opener(self.datafile, "a")
1051 dfh = self.opener(self.datafile, "a")
1051 ifh = self.opener(self.indexfile, "a+")
1052 ifh = self.opener(self.indexfile, "a+")
1052 try:
1053 try:
1053 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1054 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1054 finally:
1055 finally:
1055 if dfh:
1056 if dfh:
1056 dfh.close()
1057 dfh.close()
1057 ifh.close()
1058 ifh.close()
1058
1059
1059 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1060 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1060 node = hash(text, p1, p2)
1061 node = hash(text, p1, p2)
1061 if node in self.nodemap:
1062 if node in self.nodemap:
1062 return node
1063 return node
1063
1064
1064 curr = len(self)
1065 curr = len(self)
1065 prev = curr - 1
1066 prev = curr - 1
1066 base = self.base(prev)
1067 base = self.base(prev)
1067 offset = self.end(prev)
1068 offset = self.end(prev)
1068
1069
1069 if curr:
1070 if curr:
1070 if not d:
1071 if not d:
1071 ptext = self.revision(self.node(prev))
1072 ptext = self.revision(self.node(prev))
1072 d = mdiff.textdiff(ptext, text)
1073 d = mdiff.textdiff(ptext, text)
1073 data = compress(d)
1074 data = compress(d)
1074 l = len(data[1]) + len(data[0])
1075 l = len(data[1]) + len(data[0])
1075 dist = l + offset - self.start(base)
1076 dist = l + offset - self.start(base)
1076
1077
1077 # full versions are inserted when the needed deltas
1078 # full versions are inserted when the needed deltas
1078 # become comparable to the uncompressed text
1079 # become comparable to the uncompressed text
1079 if not curr or dist > len(text) * 2:
1080 if not curr or dist > len(text) * 2:
1080 data = compress(text)
1081 data = compress(text)
1081 l = len(data[1]) + len(data[0])
1082 l = len(data[1]) + len(data[0])
1082 base = curr
1083 base = curr
1083
1084
1084 e = (offset_type(offset, 0), l, len(text),
1085 e = (offset_type(offset, 0), l, len(text),
1085 base, link, self.rev(p1), self.rev(p2), node)
1086 base, link, self.rev(p1), self.rev(p2), node)
1086 self.index.insert(-1, e)
1087 self.index.insert(-1, e)
1087 self.nodemap[node] = curr
1088 self.nodemap[node] = curr
1088
1089
1089 entry = self._io.packentry(e, self.node, self.version, curr)
1090 entry = self._io.packentry(e, self.node, self.version, curr)
1090 if not self._inline:
1091 if not self._inline:
1091 transaction.add(self.datafile, offset)
1092 transaction.add(self.datafile, offset)
1092 transaction.add(self.indexfile, curr * len(entry))
1093 transaction.add(self.indexfile, curr * len(entry))
1093 if data[0]:
1094 if data[0]:
1094 dfh.write(data[0])
1095 dfh.write(data[0])
1095 dfh.write(data[1])
1096 dfh.write(data[1])
1096 dfh.flush()
1097 dfh.flush()
1097 ifh.write(entry)
1098 ifh.write(entry)
1098 else:
1099 else:
1099 offset += curr * self._io.size
1100 offset += curr * self._io.size
1100 transaction.add(self.indexfile, offset, curr)
1101 transaction.add(self.indexfile, offset, curr)
1101 ifh.write(entry)
1102 ifh.write(entry)
1102 ifh.write(data[0])
1103 ifh.write(data[0])
1103 ifh.write(data[1])
1104 ifh.write(data[1])
1104 self.checkinlinesize(transaction, ifh)
1105 self.checkinlinesize(transaction, ifh)
1105
1106
1106 self._cache = (node, curr, text)
1107 self._cache = (node, curr, text)
1107 return node
1108 return node
1108
1109
1109 def ancestor(self, a, b):
1110 def ancestor(self, a, b):
1110 """calculate the least common ancestor of nodes a and b"""
1111 """calculate the least common ancestor of nodes a and b"""
1111
1112
1112 def parents(rev):
1113 def parents(rev):
1113 return [p for p in self.parentrevs(rev) if p != nullrev]
1114 return [p for p in self.parentrevs(rev) if p != nullrev]
1114
1115
1115 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1116 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1116 if c is None:
1117 if c is None:
1117 return nullid
1118 return nullid
1118
1119
1119 return self.node(c)
1120 return self.node(c)
1120
1121
1121 def group(self, nodelist, lookup, infocollect=None):
1122 def group(self, nodelist, lookup, infocollect=None):
1122 """calculate a delta group
1123 """calculate a delta group
1123
1124
1124 Given a list of changeset revs, return a set of deltas and
1125 Given a list of changeset revs, return a set of deltas and
1125 metadata corresponding to nodes. the first delta is
1126 metadata corresponding to nodes. the first delta is
1126 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1127 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1127 have this parent as it has all history before these
1128 have this parent as it has all history before these
1128 changesets. parent is parent[0]
1129 changesets. parent is parent[0]
1129 """
1130 """
1130 revs = [self.rev(n) for n in nodelist]
1131 revs = [self.rev(n) for n in nodelist]
1131
1132
1132 # if we don't have any revisions touched by these changesets, bail
1133 # if we don't have any revisions touched by these changesets, bail
1133 if not revs:
1134 if not revs:
1134 yield changegroup.closechunk()
1135 yield changegroup.closechunk()
1135 return
1136 return
1136
1137
1137 # add the parent of the first rev
1138 # add the parent of the first rev
1138 p = self.parents(self.node(revs[0]))[0]
1139 p = self.parents(self.node(revs[0]))[0]
1139 revs.insert(0, self.rev(p))
1140 revs.insert(0, self.rev(p))
1140
1141
1141 # build deltas
1142 # build deltas
1142 for d in xrange(0, len(revs) - 1):
1143 for d in xrange(0, len(revs) - 1):
1143 a, b = revs[d], revs[d + 1]
1144 a, b = revs[d], revs[d + 1]
1144 nb = self.node(b)
1145 nb = self.node(b)
1145
1146
1146 if infocollect is not None:
1147 if infocollect is not None:
1147 infocollect(nb)
1148 infocollect(nb)
1148
1149
1149 p = self.parents(nb)
1150 p = self.parents(nb)
1150 meta = nb + p[0] + p[1] + lookup(nb)
1151 meta = nb + p[0] + p[1] + lookup(nb)
1151 if a == -1:
1152 if a == -1:
1152 d = self.revision(nb)
1153 d = self.revision(nb)
1153 meta += mdiff.trivialdiffheader(len(d))
1154 meta += mdiff.trivialdiffheader(len(d))
1154 else:
1155 else:
1155 d = self.revdiff(a, b)
1156 d = self.revdiff(a, b)
1156 yield changegroup.chunkheader(len(meta) + len(d))
1157 yield changegroup.chunkheader(len(meta) + len(d))
1157 yield meta
1158 yield meta
1158 if len(d) > 2**20:
1159 if len(d) > 2**20:
1159 pos = 0
1160 pos = 0
1160 while pos < len(d):
1161 while pos < len(d):
1161 pos2 = pos + 2 ** 18
1162 pos2 = pos + 2 ** 18
1162 yield d[pos:pos2]
1163 yield d[pos:pos2]
1163 pos = pos2
1164 pos = pos2
1164 else:
1165 else:
1165 yield d
1166 yield d
1166
1167
1167 yield changegroup.closechunk()
1168 yield changegroup.closechunk()
1168
1169
1169 def addgroup(self, revs, linkmapper, transaction):
1170 def addgroup(self, revs, linkmapper, transaction):
1170 """
1171 """
1171 add a delta group
1172 add a delta group
1172
1173
1173 given a set of deltas, add them to the revision log. the
1174 given a set of deltas, add them to the revision log. the
1174 first delta is against its parent, which should be in our
1175 first delta is against its parent, which should be in our
1175 log, the rest are against the previous delta.
1176 log, the rest are against the previous delta.
1176 """
1177 """
1177
1178
1178 #track the base of the current delta log
1179 #track the base of the current delta log
1179 r = len(self)
1180 r = len(self)
1180 t = r - 1
1181 t = r - 1
1181 node = None
1182 node = None
1182
1183
1183 base = prev = nullrev
1184 base = prev = nullrev
1184 start = end = textlen = 0
1185 start = end = textlen = 0
1185 if r:
1186 if r:
1186 end = self.end(t)
1187 end = self.end(t)
1187
1188
1188 ifh = self.opener(self.indexfile, "a+")
1189 ifh = self.opener(self.indexfile, "a+")
1189 isize = r * self._io.size
1190 isize = r * self._io.size
1190 if self._inline:
1191 if self._inline:
1191 transaction.add(self.indexfile, end + isize, r)
1192 transaction.add(self.indexfile, end + isize, r)
1192 dfh = None
1193 dfh = None
1193 else:
1194 else:
1194 transaction.add(self.indexfile, isize, r)
1195 transaction.add(self.indexfile, isize, r)
1195 transaction.add(self.datafile, end)
1196 transaction.add(self.datafile, end)
1196 dfh = self.opener(self.datafile, "a")
1197 dfh = self.opener(self.datafile, "a")
1197
1198
1198 try:
1199 try:
1199 # loop through our set of deltas
1200 # loop through our set of deltas
1200 chain = None
1201 chain = None
1201 for chunk in revs:
1202 for chunk in revs:
1202 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1203 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1203 link = linkmapper(cs)
1204 link = linkmapper(cs)
1204 if node in self.nodemap:
1205 if node in self.nodemap:
1205 # this can happen if two branches make the same change
1206 # this can happen if two branches make the same change
1206 chain = node
1207 chain = node
1207 continue
1208 continue
1208 delta = buffer(chunk, 80)
1209 delta = buffer(chunk, 80)
1209 del chunk
1210 del chunk
1210
1211
1211 for p in (p1, p2):
1212 for p in (p1, p2):
1212 if not p in self.nodemap:
1213 if not p in self.nodemap:
1213 raise LookupError(p, self.indexfile, _('unknown parent'))
1214 raise LookupError(p, self.indexfile, _('unknown parent'))
1214
1215
1215 if not chain:
1216 if not chain:
1216 # retrieve the parent revision of the delta chain
1217 # retrieve the parent revision of the delta chain
1217 chain = p1
1218 chain = p1
1218 if not chain in self.nodemap:
1219 if not chain in self.nodemap:
1219 raise LookupError(chain, self.indexfile, _('unknown base'))
1220 raise LookupError(chain, self.indexfile, _('unknown base'))
1220
1221
1221 # full versions are inserted when the needed deltas become
1222 # full versions are inserted when the needed deltas become
1222 # comparable to the uncompressed text or when the previous
1223 # comparable to the uncompressed text or when the previous
1223 # version is not the one we have a delta against. We use
1224 # version is not the one we have a delta against. We use
1224 # the size of the previous full rev as a proxy for the
1225 # the size of the previous full rev as a proxy for the
1225 # current size.
1226 # current size.
1226
1227
1227 if chain == prev:
1228 if chain == prev:
1228 cdelta = compress(delta)
1229 cdelta = compress(delta)
1229 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1230 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1230 textlen = mdiff.patchedsize(textlen, delta)
1231 textlen = mdiff.patchedsize(textlen, delta)
1231
1232
1232 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1233 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1233 # flush our writes here so we can read it in revision
1234 # flush our writes here so we can read it in revision
1234 if dfh:
1235 if dfh:
1235 dfh.flush()
1236 dfh.flush()
1236 ifh.flush()
1237 ifh.flush()
1237 text = self.revision(chain)
1238 text = self.revision(chain)
1238 if len(text) == 0:
1239 if len(text) == 0:
1239 # skip over trivial delta header
1240 # skip over trivial delta header
1240 text = buffer(delta, 12)
1241 text = buffer(delta, 12)
1241 else:
1242 else:
1242 text = mdiff.patches(text, [delta])
1243 text = mdiff.patches(text, [delta])
1243 del delta
1244 del delta
1244 chk = self._addrevision(text, transaction, link, p1, p2, None,
1245 chk = self._addrevision(text, transaction, link, p1, p2, None,
1245 ifh, dfh)
1246 ifh, dfh)
1246 if not dfh and not self._inline:
1247 if not dfh and not self._inline:
1247 # addrevision switched from inline to conventional
1248 # addrevision switched from inline to conventional
1248 # reopen the index
1249 # reopen the index
1249 dfh = self.opener(self.datafile, "a")
1250 dfh = self.opener(self.datafile, "a")
1250 ifh = self.opener(self.indexfile, "a")
1251 ifh = self.opener(self.indexfile, "a")
1251 if chk != node:
1252 if chk != node:
1252 raise RevlogError(_("consistency error adding group"))
1253 raise RevlogError(_("consistency error adding group"))
1253 textlen = len(text)
1254 textlen = len(text)
1254 else:
1255 else:
1255 e = (offset_type(end, 0), cdeltalen, textlen, base,
1256 e = (offset_type(end, 0), cdeltalen, textlen, base,
1256 link, self.rev(p1), self.rev(p2), node)
1257 link, self.rev(p1), self.rev(p2), node)
1257 self.index.insert(-1, e)
1258 self.index.insert(-1, e)
1258 self.nodemap[node] = r
1259 self.nodemap[node] = r
1259 entry = self._io.packentry(e, self.node, self.version, r)
1260 entry = self._io.packentry(e, self.node, self.version, r)
1260 if self._inline:
1261 if self._inline:
1261 ifh.write(entry)
1262 ifh.write(entry)
1262 ifh.write(cdelta[0])
1263 ifh.write(cdelta[0])
1263 ifh.write(cdelta[1])
1264 ifh.write(cdelta[1])
1264 self.checkinlinesize(transaction, ifh)
1265 self.checkinlinesize(transaction, ifh)
1265 if not self._inline:
1266 if not self._inline:
1266 dfh = self.opener(self.datafile, "a")
1267 dfh = self.opener(self.datafile, "a")
1267 ifh = self.opener(self.indexfile, "a")
1268 ifh = self.opener(self.indexfile, "a")
1268 else:
1269 else:
1269 dfh.write(cdelta[0])
1270 dfh.write(cdelta[0])
1270 dfh.write(cdelta[1])
1271 dfh.write(cdelta[1])
1271 ifh.write(entry)
1272 ifh.write(entry)
1272
1273
1273 t, r, chain, prev = r, r + 1, node, node
1274 t, r, chain, prev = r, r + 1, node, node
1274 base = self.base(t)
1275 base = self.base(t)
1275 start = self.start(base)
1276 start = self.start(base)
1276 end = self.end(t)
1277 end = self.end(t)
1277 finally:
1278 finally:
1278 if dfh:
1279 if dfh:
1279 dfh.close()
1280 dfh.close()
1280 ifh.close()
1281 ifh.close()
1281
1282
1282 return node
1283 return node
1283
1284
1284 def strip(self, minlink):
1285 def strip(self, minlink):
1285 """truncate the revlog on the first revision with a linkrev >= minlink
1286 """truncate the revlog on the first revision with a linkrev >= minlink
1286
1287
1287 This function is called when we're stripping revision minlink and
1288 This function is called when we're stripping revision minlink and
1288 its descendants from the repository.
1289 its descendants from the repository.
1289
1290
1290 We have to remove all revisions with linkrev >= minlink, because
1291 We have to remove all revisions with linkrev >= minlink, because
1291 the equivalent changelog revisions will be renumbered after the
1292 the equivalent changelog revisions will be renumbered after the
1292 strip.
1293 strip.
1293
1294
1294 So we truncate the revlog on the first of these revisions, and
1295 So we truncate the revlog on the first of these revisions, and
1295 trust that the caller has saved the revisions that shouldn't be
1296 trust that the caller has saved the revisions that shouldn't be
1296 removed and that it'll readd them after this truncation.
1297 removed and that it'll readd them after this truncation.
1297 """
1298 """
1298 if len(self) == 0:
1299 if len(self) == 0:
1299 return
1300 return
1300
1301
1301 if isinstance(self.index, lazyindex):
1302 if isinstance(self.index, lazyindex):
1302 self._loadindexmap()
1303 self._loadindexmap()
1303
1304
1304 for rev in self:
1305 for rev in self:
1305 if self.index[rev][4] >= minlink:
1306 if self.index[rev][4] >= minlink:
1306 break
1307 break
1307 else:
1308 else:
1308 return
1309 return
1309
1310
1310 # first truncate the files on disk
1311 # first truncate the files on disk
1311 end = self.start(rev)
1312 end = self.start(rev)
1312 if not self._inline:
1313 if not self._inline:
1313 df = self.opener(self.datafile, "a")
1314 df = self.opener(self.datafile, "a")
1314 df.truncate(end)
1315 df.truncate(end)
1315 end = rev * self._io.size
1316 end = rev * self._io.size
1316 else:
1317 else:
1317 end += rev * self._io.size
1318 end += rev * self._io.size
1318
1319
1319 indexf = self.opener(self.indexfile, "a")
1320 indexf = self.opener(self.indexfile, "a")
1320 indexf.truncate(end)
1321 indexf.truncate(end)
1321
1322
1322 # then reset internal state in memory to forget those revisions
1323 # then reset internal state in memory to forget those revisions
1323 self._cache = None
1324 self._cache = None
1324 self._chunkcache = None
1325 self._chunkcache = None
1325 for x in xrange(rev, len(self)):
1326 for x in xrange(rev, len(self)):
1326 del self.nodemap[self.node(x)]
1327 del self.nodemap[self.node(x)]
1327
1328
1328 del self.index[rev:-1]
1329 del self.index[rev:-1]
1329
1330
1330 def checksize(self):
1331 def checksize(self):
1331 expected = 0
1332 expected = 0
1332 if len(self):
1333 if len(self):
1333 expected = max(0, self.end(len(self) - 1))
1334 expected = max(0, self.end(len(self) - 1))
1334
1335
1335 try:
1336 try:
1336 f = self.opener(self.datafile)
1337 f = self.opener(self.datafile)
1337 f.seek(0, 2)
1338 f.seek(0, 2)
1338 actual = f.tell()
1339 actual = f.tell()
1339 dd = actual - expected
1340 dd = actual - expected
1340 except IOError, inst:
1341 except IOError, inst:
1341 if inst.errno != errno.ENOENT:
1342 if inst.errno != errno.ENOENT:
1342 raise
1343 raise
1343 dd = 0
1344 dd = 0
1344
1345
1345 try:
1346 try:
1346 f = self.opener(self.indexfile)
1347 f = self.opener(self.indexfile)
1347 f.seek(0, 2)
1348 f.seek(0, 2)
1348 actual = f.tell()
1349 actual = f.tell()
1349 s = self._io.size
1350 s = self._io.size
1350 i = max(0, actual / s)
1351 i = max(0, actual / s)
1351 di = actual - (i * s)
1352 di = actual - (i * s)
1352 if self._inline:
1353 if self._inline:
1353 databytes = 0
1354 databytes = 0
1354 for r in self:
1355 for r in self:
1355 databytes += max(0, self.length(r))
1356 databytes += max(0, self.length(r))
1356 dd = 0
1357 dd = 0
1357 di = actual - len(self) * s - databytes
1358 di = actual - len(self) * s - databytes
1358 except IOError, inst:
1359 except IOError, inst:
1359 if inst.errno != errno.ENOENT:
1360 if inst.errno != errno.ENOENT:
1360 raise
1361 raise
1361 di = 0
1362 di = 0
1362
1363
1363 return (dd, di)
1364 return (dd, di)
1364
1365
1365 def files(self):
1366 def files(self):
1366 res = [ self.indexfile ]
1367 res = [ self.indexfile ]
1367 if not self._inline:
1368 if not self._inline:
1368 res.append(self.datafile)
1369 res.append(self.datafile)
1369 return res
1370 return res
General Comments 0
You need to be logged in to leave comments. Login now