##// END OF EJS Templates
revlog: preread revlog .i file...
Matt Mackall -
r8314:57a41c08 default
parent child Browse files
Show More
@@ -1,1370 +1,1373 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
35
34 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
35 LookupError = error.LookupError
37 LookupError = error.LookupError
36
38
37 def getoffset(q):
39 def getoffset(q):
38 return int(q >> 16)
40 return int(q >> 16)
39
41
40 def gettype(q):
42 def gettype(q):
41 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
42
44
43 def offset_type(offset, type):
45 def offset_type(offset, type):
44 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
45
47
46 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
47
49
48 def hash(text, p1, p2):
50 def hash(text, p1, p2):
49 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
50
52
51 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
52 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
53 content in the revision graph.
55 content in the revision graph.
54 """
56 """
55 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
56 if p2 == nullid:
58 if p2 == nullid:
57 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
58 s = nullhash.copy()
60 s = nullhash.copy()
59 s.update(p1)
61 s.update(p1)
60 else:
62 else:
61 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
62 l = [p1, p2]
64 l = [p1, p2]
63 l.sort()
65 l.sort()
64 s = _sha(l[0])
66 s = _sha(l[0])
65 s.update(l[1])
67 s.update(l[1])
66 s.update(text)
68 s.update(text)
67 return s.digest()
69 return s.digest()
68
70
69 def compress(text):
71 def compress(text):
70 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
71 if not text:
73 if not text:
72 return ("", text)
74 return ("", text)
73 l = len(text)
75 l = len(text)
74 bin = None
76 bin = None
75 if l < 44:
77 if l < 44:
76 pass
78 pass
77 elif l > 1000000:
79 elif l > 1000000:
78 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
79 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
80 z = zlib.compressobj()
82 z = zlib.compressobj()
81 p = []
83 p = []
82 pos = 0
84 pos = 0
83 while pos < l:
85 while pos < l:
84 pos2 = pos + 2**20
86 pos2 = pos + 2**20
85 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
86 pos = pos2
88 pos = pos2
87 p.append(z.flush())
89 p.append(z.flush())
88 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
89 bin = "".join(p)
91 bin = "".join(p)
90 else:
92 else:
91 bin = _compress(text)
93 bin = _compress(text)
92 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
93 if text[0] == '\0':
95 if text[0] == '\0':
94 return ("", text)
96 return ("", text)
95 return ('u', text)
97 return ('u', text)
96 return ("", bin)
98 return ("", bin)
97
99
98 def decompress(bin):
100 def decompress(bin):
99 """ decompress the given input """
101 """ decompress the given input """
100 if not bin:
102 if not bin:
101 return bin
103 return bin
102 t = bin[0]
104 t = bin[0]
103 if t == '\0':
105 if t == '\0':
104 return bin
106 return bin
105 if t == 'x':
107 if t == 'x':
106 return _decompress(bin)
108 return _decompress(bin)
107 if t == 'u':
109 if t == 'u':
108 return bin[1:]
110 return bin[1:]
109 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
110
112
111 class lazyparser(object):
113 class lazyparser(object):
112 """
114 """
113 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
114 """
116 """
115
117
116 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
117 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
118 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
119
121
120 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
121 self.dataf = dataf
123 self.dataf = dataf
122 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
123 self.datasize = size
125 self.datasize = size
124 self.l = size/self.s
126 self.l = size/self.s
125 self.index = [None] * self.l
127 self.index = [None] * self.l
126 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
127 self.allmap = 0
129 self.allmap = 0
128 self.all = 0
130 self.all = 0
129 self.mapfind_count = 0
131 self.mapfind_count = 0
130
132
131 def loadmap(self):
133 def loadmap(self):
132 """
134 """
133 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
134 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
135 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
136 which takes much less time.
138 which takes much less time.
137 """
139 """
138 if self.allmap:
140 if self.allmap:
139 return
141 return
140 end = self.datasize
142 end = self.datasize
141 self.allmap = 1
143 self.allmap = 1
142 cur = 0
144 cur = 0
143 count = 0
145 count = 0
144 blocksize = self.s * 256
146 blocksize = self.s * 256
145 self.dataf.seek(0)
147 self.dataf.seek(0)
146 while cur < end:
148 while cur < end:
147 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
148 off = 0
150 off = 0
149 for x in xrange(256):
151 for x in xrange(256):
150 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
151 self.map[n] = count
153 self.map[n] = count
152 count += 1
154 count += 1
153 if count >= self.l:
155 if count >= self.l:
154 break
156 break
155 off += self.s
157 off += self.s
156 cur += blocksize
158 cur += blocksize
157
159
158 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
159 if self.all:
161 if self.all:
160 return
162 return
161 if data is None:
163 if data is None:
162 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
163 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
164 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
165 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
166 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
167 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
168 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
169 lend = len(data) / self.s
171 lend = len(data) / self.s
170 i = blockstart / self.s
172 i = blockstart / self.s
171 off = 0
173 off = 0
172 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
173 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
174 lend = len(self.index) - i
176 lend = len(self.index) - i
175 for x in xrange(lend):
177 for x in xrange(lend):
176 if self.index[i + x] == None:
178 if self.index[i + x] == None:
177 b = data[off : off + self.s]
179 b = data[off : off + self.s]
178 self.index[i + x] = b
180 self.index[i + x] = b
179 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
180 self.map[n] = i + x
182 self.map[n] = i + x
181 off += self.s
183 off += self.s
182
184
183 def findnode(self, node):
185 def findnode(self, node):
184 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
185 if self.allmap:
187 if self.allmap:
186 return None
188 return None
187
189
188 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
189 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
190 # thing.
192 # thing.
191 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
192 self.loadmap()
194 self.loadmap()
193 if node in self.map:
195 if node in self.map:
194 return node
196 return node
195 return None
197 return None
196 self.mapfind_count += 1
198 self.mapfind_count += 1
197 last = self.l - 1
199 last = self.l - 1
198 while self.index[last] != None:
200 while self.index[last] != None:
199 if last == 0:
201 if last == 0:
200 self.all = 1
202 self.all = 1
201 self.allmap = 1
203 self.allmap = 1
202 return None
204 return None
203 last -= 1
205 last -= 1
204 end = (last + 1) * self.s
206 end = (last + 1) * self.s
205 blocksize = self.s * 256
207 blocksize = self.s * 256
206 while end >= 0:
208 while end >= 0:
207 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
208 self.dataf.seek(start)
210 self.dataf.seek(start)
209 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
210 findend = end - start
212 findend = end - start
211 while True:
213 while True:
212 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
213 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
214 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
215 findend = off
217 findend = off
216 if off >= 0:
218 if off >= 0:
217 i = off / self.s
219 i = off / self.s
218 off = i * self.s
220 off = i * self.s
219 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
220 if n == node:
222 if n == node:
221 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
222 return node
224 return node
223 else:
225 else:
224 break
226 break
225 end -= blocksize
227 end -= blocksize
226 return None
228 return None
227
229
228 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
229 if self.all:
231 if self.all:
230 return
232 return
231 all = False
233 all = False
232 if i == None:
234 if i == None:
233 blockstart = 0
235 blockstart = 0
234 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
235 end = self.datasize
237 end = self.datasize
236 all = True
238 all = True
237 else:
239 else:
238 if end:
240 if end:
239 blockstart = i * self.s
241 blockstart = i * self.s
240 end = end * self.s
242 end = end * self.s
241 blocksize = end - blockstart
243 blocksize = end - blockstart
242 else:
244 else:
243 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
244 blocksize = self.s * 1024
246 blocksize = self.s * 1024
245 end = blockstart + blocksize
247 end = blockstart + blocksize
246 while blockstart < end:
248 while blockstart < end:
247 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
248 blockstart += blocksize
250 blockstart += blocksize
249 if all:
251 if all:
250 self.all = True
252 self.all = True
251
253
252 class lazyindex(object):
254 class lazyindex(object):
253 """a lazy version of the index array"""
255 """a lazy version of the index array"""
254 def __init__(self, parser):
256 def __init__(self, parser):
255 self.p = parser
257 self.p = parser
256 def __len__(self):
258 def __len__(self):
257 return len(self.p.index)
259 return len(self.p.index)
258 def load(self, pos):
260 def load(self, pos):
259 if pos < 0:
261 if pos < 0:
260 pos += len(self.p.index)
262 pos += len(self.p.index)
261 self.p.loadindex(pos)
263 self.p.loadindex(pos)
262 return self.p.index[pos]
264 return self.p.index[pos]
263 def __getitem__(self, pos):
265 def __getitem__(self, pos):
264 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
265 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
266 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
267 def __delitem__(self, pos):
269 def __delitem__(self, pos):
268 del self.p.index[pos]
270 del self.p.index[pos]
269 def insert(self, pos, e):
271 def insert(self, pos, e):
270 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
271 def append(self, e):
273 def append(self, e):
272 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
273
275
274 class lazymap(object):
276 class lazymap(object):
275 """a lazy version of the node map"""
277 """a lazy version of the node map"""
276 def __init__(self, parser):
278 def __init__(self, parser):
277 self.p = parser
279 self.p = parser
278 def load(self, key):
280 def load(self, key):
279 n = self.p.findnode(key)
281 n = self.p.findnode(key)
280 if n == None:
282 if n == None:
281 raise KeyError(key)
283 raise KeyError(key)
282 def __contains__(self, key):
284 def __contains__(self, key):
283 if key in self.p.map:
285 if key in self.p.map:
284 return True
286 return True
285 self.p.loadmap()
287 self.p.loadmap()
286 return key in self.p.map
288 return key in self.p.map
287 def __iter__(self):
289 def __iter__(self):
288 yield nullid
290 yield nullid
289 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
290 ret = self.p.index[i]
292 ret = self.p.index[i]
291 if not ret:
293 if not ret:
292 self.p.loadindex(i)
294 self.p.loadindex(i)
293 ret = self.p.index[i]
295 ret = self.p.index[i]
294 if isinstance(ret, str):
296 if isinstance(ret, str):
295 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
296 yield ret[7]
298 yield ret[7]
297 def __getitem__(self, key):
299 def __getitem__(self, key):
298 try:
300 try:
299 return self.p.map[key]
301 return self.p.map[key]
300 except KeyError:
302 except KeyError:
301 try:
303 try:
302 self.load(key)
304 self.load(key)
303 return self.p.map[key]
305 return self.p.map[key]
304 except KeyError:
306 except KeyError:
305 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
306 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
307 self.p.map[key] = val
309 self.p.map[key] = val
308 def __delitem__(self, key):
310 def __delitem__(self, key):
309 del self.p.map[key]
311 del self.p.map[key]
310
312
311 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
312 v0shaoffset = 56
314 v0shaoffset = 56
313
315
314 class revlogoldio(object):
316 class revlogoldio(object):
315 def __init__(self):
317 def __init__(self):
316 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
317
319
318 def parseindex(self, fp, inline):
320 def parseindex(self, fp, data, inline):
319 s = self.size
321 s = self.size
320 index = []
322 index = []
321 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
322 n = off = 0
324 n = off = 0
323 data = fp.read()
325 if len(data) < _prereadsize:
326 data += fp.read() # read the rest
324 l = len(data)
327 l = len(data)
325 while off + s <= l:
328 while off + s <= l:
326 cur = data[off:off + s]
329 cur = data[off:off + s]
327 off += s
330 off += s
328 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
329 # transform to revlogv1 format
332 # transform to revlogv1 format
330 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
331 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
332 index.append(e2)
335 index.append(e2)
333 nodemap[e[6]] = n
336 nodemap[e[6]] = n
334 n += 1
337 n += 1
335
338
336 return index, nodemap, None
339 return index, nodemap, None
337
340
338 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
339 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
340 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
341 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
342
345
343 # index ng:
346 # index ng:
344 # 6 bytes offset
347 # 6 bytes offset
345 # 2 bytes flags
348 # 2 bytes flags
346 # 4 bytes compressed length
349 # 4 bytes compressed length
347 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
348 # 4 bytes: base rev
351 # 4 bytes: base rev
349 # 4 bytes link rev
352 # 4 bytes link rev
350 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
351 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
352 # 32 bytes: nodeid
355 # 32 bytes: nodeid
353 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
354 ngshaoffset = 32
357 ngshaoffset = 32
355 versionformat = ">I"
358 versionformat = ">I"
356
359
357 class revlogio(object):
360 class revlogio(object):
358 def __init__(self):
361 def __init__(self):
359 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
360
363
361 def parseindex(self, fp, inline):
364 def parseindex(self, fp, data, inline):
362 try:
365 try:
363 size = util.fstat(fp).st_size
366 size = len(data)
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
364 except AttributeError:
369 except AttributeError:
365 size = 0
370 size = 0
366
371
367 if util.openhardlinks() and not inline and size > 1000000:
372 if util.openhardlinks() and not inline and size > _prereadsize:
368 # big index, let's parse it on demand
373 # big index, let's parse it on demand
369 parser = lazyparser(fp, size)
374 parser = lazyparser(fp, size)
370 index = lazyindex(parser)
375 index = lazyindex(parser)
371 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
372 e = list(index[0])
377 e = list(index[0])
373 type = gettype(e[0])
378 type = gettype(e[0])
374 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
375 index[0] = e
380 index[0] = e
376 return index, nodemap, None
381 return index, nodemap, None
377
382
378 data = fp.read()
379 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
380 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
381 return index, nodemap, cache
385 return index, nodemap, cache
382
386
383 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
384 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
385 if rev == 0:
389 if rev == 0:
386 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
387 return p
391 return p
388
392
389 class revlog(object):
393 class revlog(object):
390 """
394 """
391 the underlying revision storage object
395 the underlying revision storage object
392
396
393 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
394
398
395 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
396 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
397 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
398 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
399 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
400 data.
404 data.
401
405
402 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
403 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
404 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
405 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
406 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
407 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
408
412
409 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
410 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
411 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
412 for locking while reading.
416 for locking while reading.
413 """
417 """
414 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
415 """
419 """
416 create a revlog object
420 create a revlog object
417
421
418 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
419 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
420 """
424 """
421 self.indexfile = indexfile
425 self.indexfile = indexfile
422 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
423 self.opener = opener
427 self.opener = opener
424 self._cache = None
428 self._cache = None
425 self._chunkcache = None
429 self._chunkcache = None
426 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
427 self.index = []
431 self.index = []
428
432
429 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
430 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
431 v = opener.defversion
435 v = opener.defversion
432 if v & REVLOGNG:
436 if v & REVLOGNG:
433 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
434
438
435 i = ""
439 i = ''
436 try:
440 try:
437 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
438 i = f.read(4)
442 i = f.read(_prereadsize)
439 f.seek(0)
440 if len(i) > 0:
443 if len(i) > 0:
441 v = struct.unpack(versionformat, i)[0]
444 v = struct.unpack(versionformat, i[:4])[0]
442 except IOError, inst:
445 except IOError, inst:
443 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
444 raise
447 raise
445
448
446 self.version = v
449 self.version = v
447 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
448 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
449 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
450 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
451 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
452 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
453 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
454 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
455 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
456 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
457 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
458 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
459
462
460 self._io = revlogio()
463 self._io = revlogio()
461 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
462 self._io = revlogoldio()
465 self._io = revlogoldio()
463 if i:
466 if i:
464 try:
467 try:
465 d = self._io.parseindex(f, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
466 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
467 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
468 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
469
472
470 # add the magic null revision at -1 (if it hasn't been done already)
473 # add the magic null revision at -1 (if it hasn't been done already)
471 if (self.index == [] or isinstance(self.index, lazyindex) or
474 if (self.index == [] or isinstance(self.index, lazyindex) or
472 self.index[-1][7] != nullid) :
475 self.index[-1][7] != nullid) :
473 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
474
477
475 def _loadindex(self, start, end):
478 def _loadindex(self, start, end):
476 """load a block of indexes all at once from the lazy parser"""
479 """load a block of indexes all at once from the lazy parser"""
477 if isinstance(self.index, lazyindex):
480 if isinstance(self.index, lazyindex):
478 self.index.p.loadindex(start, end)
481 self.index.p.loadindex(start, end)
479
482
480 def _loadindexmap(self):
483 def _loadindexmap(self):
481 """loads both the map and the index from the lazy parser"""
484 """loads both the map and the index from the lazy parser"""
482 if isinstance(self.index, lazyindex):
485 if isinstance(self.index, lazyindex):
483 p = self.index.p
486 p = self.index.p
484 p.loadindex()
487 p.loadindex()
485 self.nodemap = p.map
488 self.nodemap = p.map
486
489
487 def _loadmap(self):
490 def _loadmap(self):
488 """loads the map from the lazy parser"""
491 """loads the map from the lazy parser"""
489 if isinstance(self.nodemap, lazymap):
492 if isinstance(self.nodemap, lazymap):
490 self.nodemap.p.loadmap()
493 self.nodemap.p.loadmap()
491 self.nodemap = self.nodemap.p.map
494 self.nodemap = self.nodemap.p.map
492
495
493 def tip(self):
496 def tip(self):
494 return self.node(len(self.index) - 2)
497 return self.node(len(self.index) - 2)
495 def __len__(self):
498 def __len__(self):
496 return len(self.index) - 1
499 return len(self.index) - 1
497 def __iter__(self):
500 def __iter__(self):
498 for i in xrange(len(self)):
501 for i in xrange(len(self)):
499 yield i
502 yield i
500 def rev(self, node):
503 def rev(self, node):
501 try:
504 try:
502 return self.nodemap[node]
505 return self.nodemap[node]
503 except KeyError:
506 except KeyError:
504 raise LookupError(node, self.indexfile, _('no node'))
507 raise LookupError(node, self.indexfile, _('no node'))
505 def node(self, rev):
508 def node(self, rev):
506 return self.index[rev][7]
509 return self.index[rev][7]
507 def linkrev(self, rev):
510 def linkrev(self, rev):
508 return self.index[rev][4]
511 return self.index[rev][4]
509 def parents(self, node):
512 def parents(self, node):
510 i = self.index
513 i = self.index
511 d = i[self.rev(node)]
514 d = i[self.rev(node)]
512 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
515 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
513 def parentrevs(self, rev):
516 def parentrevs(self, rev):
514 return self.index[rev][5:7]
517 return self.index[rev][5:7]
515 def start(self, rev):
518 def start(self, rev):
516 return int(self.index[rev][0] >> 16)
519 return int(self.index[rev][0] >> 16)
517 def end(self, rev):
520 def end(self, rev):
518 return self.start(rev) + self.length(rev)
521 return self.start(rev) + self.length(rev)
519 def length(self, rev):
522 def length(self, rev):
520 return self.index[rev][1]
523 return self.index[rev][1]
521 def base(self, rev):
524 def base(self, rev):
522 return self.index[rev][3]
525 return self.index[rev][3]
523
526
524 def size(self, rev):
527 def size(self, rev):
525 """return the length of the uncompressed text for a given revision"""
528 """return the length of the uncompressed text for a given revision"""
526 l = self.index[rev][2]
529 l = self.index[rev][2]
527 if l >= 0:
530 if l >= 0:
528 return l
531 return l
529
532
530 t = self.revision(self.node(rev))
533 t = self.revision(self.node(rev))
531 return len(t)
534 return len(t)
532
535
533 # alternate implementation, The advantage to this code is it
536 # alternate implementation, The advantage to this code is it
534 # will be faster for a single revision. But, the results are not
537 # will be faster for a single revision. But, the results are not
535 # cached, so finding the size of every revision will be slower.
538 # cached, so finding the size of every revision will be slower.
536 """
539 """
537 if self.cache and self.cache[1] == rev:
540 if self.cache and self.cache[1] == rev:
538 return len(self.cache[2])
541 return len(self.cache[2])
539
542
540 base = self.base(rev)
543 base = self.base(rev)
541 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
544 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
542 base = self.cache[1]
545 base = self.cache[1]
543 text = self.cache[2]
546 text = self.cache[2]
544 else:
547 else:
545 text = self.revision(self.node(base))
548 text = self.revision(self.node(base))
546
549
547 l = len(text)
550 l = len(text)
548 for x in xrange(base + 1, rev + 1):
551 for x in xrange(base + 1, rev + 1):
549 l = mdiff.patchedsize(l, self.chunk(x))
552 l = mdiff.patchedsize(l, self.chunk(x))
550 return l
553 return l
551 """
554 """
552
555
553 def reachable(self, node, stop=None):
556 def reachable(self, node, stop=None):
554 """return a hash of all nodes ancestral to a given node, including
557 """return a hash of all nodes ancestral to a given node, including
555 the node itself, stopping when stop is matched"""
558 the node itself, stopping when stop is matched"""
556 reachable = {}
559 reachable = {}
557 visit = [node]
560 visit = [node]
558 reachable[node] = 1
561 reachable[node] = 1
559 if stop:
562 if stop:
560 stopn = self.rev(stop)
563 stopn = self.rev(stop)
561 else:
564 else:
562 stopn = 0
565 stopn = 0
563 while visit:
566 while visit:
564 n = visit.pop(0)
567 n = visit.pop(0)
565 if n == stop:
568 if n == stop:
566 continue
569 continue
567 if n == nullid:
570 if n == nullid:
568 continue
571 continue
569 for p in self.parents(n):
572 for p in self.parents(n):
570 if self.rev(p) < stopn:
573 if self.rev(p) < stopn:
571 continue
574 continue
572 if p not in reachable:
575 if p not in reachable:
573 reachable[p] = 1
576 reachable[p] = 1
574 visit.append(p)
577 visit.append(p)
575 return reachable
578 return reachable
576
579
577 def ancestors(self, *revs):
580 def ancestors(self, *revs):
578 'Generate the ancestors of revs using a breadth-first visit'
581 'Generate the ancestors of revs using a breadth-first visit'
579 visit = list(revs)
582 visit = list(revs)
580 seen = set([nullrev])
583 seen = set([nullrev])
581 while visit:
584 while visit:
582 for parent in self.parentrevs(visit.pop(0)):
585 for parent in self.parentrevs(visit.pop(0)):
583 if parent not in seen:
586 if parent not in seen:
584 visit.append(parent)
587 visit.append(parent)
585 seen.add(parent)
588 seen.add(parent)
586 yield parent
589 yield parent
587
590
588 def descendants(self, *revs):
591 def descendants(self, *revs):
589 'Generate the descendants of revs in topological order'
592 'Generate the descendants of revs in topological order'
590 seen = set(revs)
593 seen = set(revs)
591 for i in xrange(min(revs) + 1, len(self)):
594 for i in xrange(min(revs) + 1, len(self)):
592 for x in self.parentrevs(i):
595 for x in self.parentrevs(i):
593 if x != nullrev and x in seen:
596 if x != nullrev and x in seen:
594 seen.add(i)
597 seen.add(i)
595 yield i
598 yield i
596 break
599 break
597
600
598 def findmissing(self, common=None, heads=None):
601 def findmissing(self, common=None, heads=None):
599 '''
602 '''
600 returns the topologically sorted list of nodes from the set:
603 returns the topologically sorted list of nodes from the set:
601 missing = (ancestors(heads) \ ancestors(common))
604 missing = (ancestors(heads) \ ancestors(common))
602
605
603 where ancestors() is the set of ancestors from heads, heads included
606 where ancestors() is the set of ancestors from heads, heads included
604
607
605 if heads is None, the heads of the revlog are used
608 if heads is None, the heads of the revlog are used
606 if common is None, nullid is assumed to be a common node
609 if common is None, nullid is assumed to be a common node
607 '''
610 '''
608 if common is None:
611 if common is None:
609 common = [nullid]
612 common = [nullid]
610 if heads is None:
613 if heads is None:
611 heads = self.heads()
614 heads = self.heads()
612
615
613 common = [self.rev(n) for n in common]
616 common = [self.rev(n) for n in common]
614 heads = [self.rev(n) for n in heads]
617 heads = [self.rev(n) for n in heads]
615
618
616 # we want the ancestors, but inclusive
619 # we want the ancestors, but inclusive
617 has = set(self.ancestors(*common))
620 has = set(self.ancestors(*common))
618 has.add(nullrev)
621 has.add(nullrev)
619 has.update(common)
622 has.update(common)
620
623
621 # take all ancestors from heads that aren't in has
624 # take all ancestors from heads that aren't in has
622 missing = {}
625 missing = {}
623 visit = [r for r in heads if r not in has]
626 visit = [r for r in heads if r not in has]
624 while visit:
627 while visit:
625 r = visit.pop(0)
628 r = visit.pop(0)
626 if r in missing:
629 if r in missing:
627 continue
630 continue
628 else:
631 else:
629 missing[r] = None
632 missing[r] = None
630 for p in self.parentrevs(r):
633 for p in self.parentrevs(r):
631 if p not in has:
634 if p not in has:
632 visit.append(p)
635 visit.append(p)
633 missing = missing.keys()
636 missing = missing.keys()
634 missing.sort()
637 missing.sort()
635 return [self.node(r) for r in missing]
638 return [self.node(r) for r in missing]
636
639
637 def nodesbetween(self, roots=None, heads=None):
640 def nodesbetween(self, roots=None, heads=None):
638 """Return a tuple containing three elements. Elements 1 and 2 contain
641 """Return a tuple containing three elements. Elements 1 and 2 contain
639 a final list bases and heads after all the unreachable ones have been
642 a final list bases and heads after all the unreachable ones have been
640 pruned. Element 0 contains a topologically sorted list of all
643 pruned. Element 0 contains a topologically sorted list of all
641
644
642 nodes that satisfy these constraints:
645 nodes that satisfy these constraints:
643 1. All nodes must be descended from a node in roots (the nodes on
646 1. All nodes must be descended from a node in roots (the nodes on
644 roots are considered descended from themselves).
647 roots are considered descended from themselves).
645 2. All nodes must also be ancestors of a node in heads (the nodes in
648 2. All nodes must also be ancestors of a node in heads (the nodes in
646 heads are considered to be their own ancestors).
649 heads are considered to be their own ancestors).
647
650
648 If roots is unspecified, nullid is assumed as the only root.
651 If roots is unspecified, nullid is assumed as the only root.
649 If heads is unspecified, it is taken to be the output of the
652 If heads is unspecified, it is taken to be the output of the
650 heads method (i.e. a list of all nodes in the repository that
653 heads method (i.e. a list of all nodes in the repository that
651 have no children)."""
654 have no children)."""
652 nonodes = ([], [], [])
655 nonodes = ([], [], [])
653 if roots is not None:
656 if roots is not None:
654 roots = list(roots)
657 roots = list(roots)
655 if not roots:
658 if not roots:
656 return nonodes
659 return nonodes
657 lowestrev = min([self.rev(n) for n in roots])
660 lowestrev = min([self.rev(n) for n in roots])
658 else:
661 else:
659 roots = [nullid] # Everybody's a descendent of nullid
662 roots = [nullid] # Everybody's a descendent of nullid
660 lowestrev = nullrev
663 lowestrev = nullrev
661 if (lowestrev == nullrev) and (heads is None):
664 if (lowestrev == nullrev) and (heads is None):
662 # We want _all_ the nodes!
665 # We want _all_ the nodes!
663 return ([self.node(r) for r in self], [nullid], list(self.heads()))
666 return ([self.node(r) for r in self], [nullid], list(self.heads()))
664 if heads is None:
667 if heads is None:
665 # All nodes are ancestors, so the latest ancestor is the last
668 # All nodes are ancestors, so the latest ancestor is the last
666 # node.
669 # node.
667 highestrev = len(self) - 1
670 highestrev = len(self) - 1
668 # Set ancestors to None to signal that every node is an ancestor.
671 # Set ancestors to None to signal that every node is an ancestor.
669 ancestors = None
672 ancestors = None
670 # Set heads to an empty dictionary for later discovery of heads
673 # Set heads to an empty dictionary for later discovery of heads
671 heads = {}
674 heads = {}
672 else:
675 else:
673 heads = list(heads)
676 heads = list(heads)
674 if not heads:
677 if not heads:
675 return nonodes
678 return nonodes
676 ancestors = {}
679 ancestors = {}
677 # Turn heads into a dictionary so we can remove 'fake' heads.
680 # Turn heads into a dictionary so we can remove 'fake' heads.
678 # Also, later we will be using it to filter out the heads we can't
681 # Also, later we will be using it to filter out the heads we can't
679 # find from roots.
682 # find from roots.
680 heads = dict.fromkeys(heads, 0)
683 heads = dict.fromkeys(heads, 0)
681 # Start at the top and keep marking parents until we're done.
684 # Start at the top and keep marking parents until we're done.
682 nodestotag = set(heads)
685 nodestotag = set(heads)
683 # Remember where the top was so we can use it as a limit later.
686 # Remember where the top was so we can use it as a limit later.
684 highestrev = max([self.rev(n) for n in nodestotag])
687 highestrev = max([self.rev(n) for n in nodestotag])
685 while nodestotag:
688 while nodestotag:
686 # grab a node to tag
689 # grab a node to tag
687 n = nodestotag.pop()
690 n = nodestotag.pop()
688 # Never tag nullid
691 # Never tag nullid
689 if n == nullid:
692 if n == nullid:
690 continue
693 continue
691 # A node's revision number represents its place in a
694 # A node's revision number represents its place in a
692 # topologically sorted list of nodes.
695 # topologically sorted list of nodes.
693 r = self.rev(n)
696 r = self.rev(n)
694 if r >= lowestrev:
697 if r >= lowestrev:
695 if n not in ancestors:
698 if n not in ancestors:
696 # If we are possibly a descendent of one of the roots
699 # If we are possibly a descendent of one of the roots
697 # and we haven't already been marked as an ancestor
700 # and we haven't already been marked as an ancestor
698 ancestors[n] = 1 # Mark as ancestor
701 ancestors[n] = 1 # Mark as ancestor
699 # Add non-nullid parents to list of nodes to tag.
702 # Add non-nullid parents to list of nodes to tag.
700 nodestotag.update([p for p in self.parents(n) if
703 nodestotag.update([p for p in self.parents(n) if
701 p != nullid])
704 p != nullid])
702 elif n in heads: # We've seen it before, is it a fake head?
705 elif n in heads: # We've seen it before, is it a fake head?
703 # So it is, real heads should not be the ancestors of
706 # So it is, real heads should not be the ancestors of
704 # any other heads.
707 # any other heads.
705 heads.pop(n)
708 heads.pop(n)
706 if not ancestors:
709 if not ancestors:
707 return nonodes
710 return nonodes
708 # Now that we have our set of ancestors, we want to remove any
711 # Now that we have our set of ancestors, we want to remove any
709 # roots that are not ancestors.
712 # roots that are not ancestors.
710
713
711 # If one of the roots was nullid, everything is included anyway.
714 # If one of the roots was nullid, everything is included anyway.
712 if lowestrev > nullrev:
715 if lowestrev > nullrev:
713 # But, since we weren't, let's recompute the lowest rev to not
716 # But, since we weren't, let's recompute the lowest rev to not
714 # include roots that aren't ancestors.
717 # include roots that aren't ancestors.
715
718
716 # Filter out roots that aren't ancestors of heads
719 # Filter out roots that aren't ancestors of heads
717 roots = [n for n in roots if n in ancestors]
720 roots = [n for n in roots if n in ancestors]
718 # Recompute the lowest revision
721 # Recompute the lowest revision
719 if roots:
722 if roots:
720 lowestrev = min([self.rev(n) for n in roots])
723 lowestrev = min([self.rev(n) for n in roots])
721 else:
724 else:
722 # No more roots? Return empty list
725 # No more roots? Return empty list
723 return nonodes
726 return nonodes
724 else:
727 else:
725 # We are descending from nullid, and don't need to care about
728 # We are descending from nullid, and don't need to care about
726 # any other roots.
729 # any other roots.
727 lowestrev = nullrev
730 lowestrev = nullrev
728 roots = [nullid]
731 roots = [nullid]
729 # Transform our roots list into a set.
732 # Transform our roots list into a set.
730 descendents = set(roots)
733 descendents = set(roots)
731 # Also, keep the original roots so we can filter out roots that aren't
734 # Also, keep the original roots so we can filter out roots that aren't
732 # 'real' roots (i.e. are descended from other roots).
735 # 'real' roots (i.e. are descended from other roots).
733 roots = descendents.copy()
736 roots = descendents.copy()
734 # Our topologically sorted list of output nodes.
737 # Our topologically sorted list of output nodes.
735 orderedout = []
738 orderedout = []
736 # Don't start at nullid since we don't want nullid in our output list,
739 # Don't start at nullid since we don't want nullid in our output list,
737 # and if nullid shows up in descedents, empty parents will look like
740 # and if nullid shows up in descedents, empty parents will look like
738 # they're descendents.
741 # they're descendents.
739 for r in xrange(max(lowestrev, 0), highestrev + 1):
742 for r in xrange(max(lowestrev, 0), highestrev + 1):
740 n = self.node(r)
743 n = self.node(r)
741 isdescendent = False
744 isdescendent = False
742 if lowestrev == nullrev: # Everybody is a descendent of nullid
745 if lowestrev == nullrev: # Everybody is a descendent of nullid
743 isdescendent = True
746 isdescendent = True
744 elif n in descendents:
747 elif n in descendents:
745 # n is already a descendent
748 # n is already a descendent
746 isdescendent = True
749 isdescendent = True
747 # This check only needs to be done here because all the roots
750 # This check only needs to be done here because all the roots
748 # will start being marked is descendents before the loop.
751 # will start being marked is descendents before the loop.
749 if n in roots:
752 if n in roots:
750 # If n was a root, check if it's a 'real' root.
753 # If n was a root, check if it's a 'real' root.
751 p = tuple(self.parents(n))
754 p = tuple(self.parents(n))
752 # If any of its parents are descendents, it's not a root.
755 # If any of its parents are descendents, it's not a root.
753 if (p[0] in descendents) or (p[1] in descendents):
756 if (p[0] in descendents) or (p[1] in descendents):
754 roots.remove(n)
757 roots.remove(n)
755 else:
758 else:
756 p = tuple(self.parents(n))
759 p = tuple(self.parents(n))
757 # A node is a descendent if either of its parents are
760 # A node is a descendent if either of its parents are
758 # descendents. (We seeded the dependents list with the roots
761 # descendents. (We seeded the dependents list with the roots
759 # up there, remember?)
762 # up there, remember?)
760 if (p[0] in descendents) or (p[1] in descendents):
763 if (p[0] in descendents) or (p[1] in descendents):
761 descendents.add(n)
764 descendents.add(n)
762 isdescendent = True
765 isdescendent = True
763 if isdescendent and ((ancestors is None) or (n in ancestors)):
766 if isdescendent and ((ancestors is None) or (n in ancestors)):
764 # Only include nodes that are both descendents and ancestors.
767 # Only include nodes that are both descendents and ancestors.
765 orderedout.append(n)
768 orderedout.append(n)
766 if (ancestors is not None) and (n in heads):
769 if (ancestors is not None) and (n in heads):
767 # We're trying to figure out which heads are reachable
770 # We're trying to figure out which heads are reachable
768 # from roots.
771 # from roots.
769 # Mark this head as having been reached
772 # Mark this head as having been reached
770 heads[n] = 1
773 heads[n] = 1
771 elif ancestors is None:
774 elif ancestors is None:
772 # Otherwise, we're trying to discover the heads.
775 # Otherwise, we're trying to discover the heads.
773 # Assume this is a head because if it isn't, the next step
776 # Assume this is a head because if it isn't, the next step
774 # will eventually remove it.
777 # will eventually remove it.
775 heads[n] = 1
778 heads[n] = 1
776 # But, obviously its parents aren't.
779 # But, obviously its parents aren't.
777 for p in self.parents(n):
780 for p in self.parents(n):
778 heads.pop(p, None)
781 heads.pop(p, None)
779 heads = [n for n in heads.iterkeys() if heads[n] != 0]
782 heads = [n for n in heads.iterkeys() if heads[n] != 0]
780 roots = list(roots)
783 roots = list(roots)
781 assert orderedout
784 assert orderedout
782 assert roots
785 assert roots
783 assert heads
786 assert heads
784 return (orderedout, roots, heads)
787 return (orderedout, roots, heads)
785
788
786 def heads(self, start=None, stop=None):
789 def heads(self, start=None, stop=None):
787 """return the list of all nodes that have no children
790 """return the list of all nodes that have no children
788
791
789 if start is specified, only heads that are descendants of
792 if start is specified, only heads that are descendants of
790 start will be returned
793 start will be returned
791 if stop is specified, it will consider all the revs from stop
794 if stop is specified, it will consider all the revs from stop
792 as if they had no children
795 as if they had no children
793 """
796 """
794 if start is None and stop is None:
797 if start is None and stop is None:
795 count = len(self)
798 count = len(self)
796 if not count:
799 if not count:
797 return [nullid]
800 return [nullid]
798 ishead = [1] * (count + 1)
801 ishead = [1] * (count + 1)
799 index = self.index
802 index = self.index
800 for r in xrange(count):
803 for r in xrange(count):
801 e = index[r]
804 e = index[r]
802 ishead[e[5]] = ishead[e[6]] = 0
805 ishead[e[5]] = ishead[e[6]] = 0
803 return [self.node(r) for r in xrange(count) if ishead[r]]
806 return [self.node(r) for r in xrange(count) if ishead[r]]
804
807
805 if start is None:
808 if start is None:
806 start = nullid
809 start = nullid
807 if stop is None:
810 if stop is None:
808 stop = []
811 stop = []
809 stoprevs = set([self.rev(n) for n in stop])
812 stoprevs = set([self.rev(n) for n in stop])
810 startrev = self.rev(start)
813 startrev = self.rev(start)
811 reachable = {startrev: 1}
814 reachable = {startrev: 1}
812 heads = {startrev: 1}
815 heads = {startrev: 1}
813
816
814 parentrevs = self.parentrevs
817 parentrevs = self.parentrevs
815 for r in xrange(startrev + 1, len(self)):
818 for r in xrange(startrev + 1, len(self)):
816 for p in parentrevs(r):
819 for p in parentrevs(r):
817 if p in reachable:
820 if p in reachable:
818 if r not in stoprevs:
821 if r not in stoprevs:
819 reachable[r] = 1
822 reachable[r] = 1
820 heads[r] = 1
823 heads[r] = 1
821 if p in heads and p not in stoprevs:
824 if p in heads and p not in stoprevs:
822 del heads[p]
825 del heads[p]
823
826
824 return [self.node(r) for r in heads]
827 return [self.node(r) for r in heads]
825
828
826 def children(self, node):
829 def children(self, node):
827 """find the children of a given node"""
830 """find the children of a given node"""
828 c = []
831 c = []
829 p = self.rev(node)
832 p = self.rev(node)
830 for r in range(p + 1, len(self)):
833 for r in range(p + 1, len(self)):
831 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
834 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
832 if prevs:
835 if prevs:
833 for pr in prevs:
836 for pr in prevs:
834 if pr == p:
837 if pr == p:
835 c.append(self.node(r))
838 c.append(self.node(r))
836 elif p == nullrev:
839 elif p == nullrev:
837 c.append(self.node(r))
840 c.append(self.node(r))
838 return c
841 return c
839
842
840 def _match(self, id):
843 def _match(self, id):
841 if isinstance(id, (long, int)):
844 if isinstance(id, (long, int)):
842 # rev
845 # rev
843 return self.node(id)
846 return self.node(id)
844 if len(id) == 20:
847 if len(id) == 20:
845 # possibly a binary node
848 # possibly a binary node
846 # odds of a binary node being all hex in ASCII are 1 in 10**25
849 # odds of a binary node being all hex in ASCII are 1 in 10**25
847 try:
850 try:
848 node = id
851 node = id
849 self.rev(node) # quick search the index
852 self.rev(node) # quick search the index
850 return node
853 return node
851 except LookupError:
854 except LookupError:
852 pass # may be partial hex id
855 pass # may be partial hex id
853 try:
856 try:
854 # str(rev)
857 # str(rev)
855 rev = int(id)
858 rev = int(id)
856 if str(rev) != id:
859 if str(rev) != id:
857 raise ValueError
860 raise ValueError
858 if rev < 0:
861 if rev < 0:
859 rev = len(self) + rev
862 rev = len(self) + rev
860 if rev < 0 or rev >= len(self):
863 if rev < 0 or rev >= len(self):
861 raise ValueError
864 raise ValueError
862 return self.node(rev)
865 return self.node(rev)
863 except (ValueError, OverflowError):
866 except (ValueError, OverflowError):
864 pass
867 pass
865 if len(id) == 40:
868 if len(id) == 40:
866 try:
869 try:
867 # a full hex nodeid?
870 # a full hex nodeid?
868 node = bin(id)
871 node = bin(id)
869 self.rev(node)
872 self.rev(node)
870 return node
873 return node
871 except (TypeError, LookupError):
874 except (TypeError, LookupError):
872 pass
875 pass
873
876
874 def _partialmatch(self, id):
877 def _partialmatch(self, id):
875 if len(id) < 40:
878 if len(id) < 40:
876 try:
879 try:
877 # hex(node)[:...]
880 # hex(node)[:...]
878 l = len(id) / 2 # grab an even number of digits
881 l = len(id) / 2 # grab an even number of digits
879 bin_id = bin(id[:l*2])
882 bin_id = bin(id[:l*2])
880 nl = [n for n in self.nodemap if n[:l] == bin_id]
883 nl = [n for n in self.nodemap if n[:l] == bin_id]
881 nl = [n for n in nl if hex(n).startswith(id)]
884 nl = [n for n in nl if hex(n).startswith(id)]
882 if len(nl) > 0:
885 if len(nl) > 0:
883 if len(nl) == 1:
886 if len(nl) == 1:
884 return nl[0]
887 return nl[0]
885 raise LookupError(id, self.indexfile,
888 raise LookupError(id, self.indexfile,
886 _('ambiguous identifier'))
889 _('ambiguous identifier'))
887 return None
890 return None
888 except TypeError:
891 except TypeError:
889 pass
892 pass
890
893
891 def lookup(self, id):
894 def lookup(self, id):
892 """locate a node based on:
895 """locate a node based on:
893 - revision number or str(revision number)
896 - revision number or str(revision number)
894 - nodeid or subset of hex nodeid
897 - nodeid or subset of hex nodeid
895 """
898 """
896 n = self._match(id)
899 n = self._match(id)
897 if n is not None:
900 if n is not None:
898 return n
901 return n
899 n = self._partialmatch(id)
902 n = self._partialmatch(id)
900 if n:
903 if n:
901 return n
904 return n
902
905
903 raise LookupError(id, self.indexfile, _('no match found'))
906 raise LookupError(id, self.indexfile, _('no match found'))
904
907
905 def cmp(self, node, text):
908 def cmp(self, node, text):
906 """compare text with a given file revision"""
909 """compare text with a given file revision"""
907 p1, p2 = self.parents(node)
910 p1, p2 = self.parents(node)
908 return hash(text, p1, p2) != node
911 return hash(text, p1, p2) != node
909
912
910 def chunk(self, rev, df=None):
913 def chunk(self, rev, df=None):
911 def loadcache(df):
914 def loadcache(df):
912 if not df:
915 if not df:
913 if self._inline:
916 if self._inline:
914 df = self.opener(self.indexfile)
917 df = self.opener(self.indexfile)
915 else:
918 else:
916 df = self.opener(self.datafile)
919 df = self.opener(self.datafile)
917 df.seek(start)
920 df.seek(start)
918 self._chunkcache = (start, df.read(cache_length))
921 self._chunkcache = (start, df.read(cache_length))
919
922
920 start, length = self.start(rev), self.length(rev)
923 start, length = self.start(rev), self.length(rev)
921 if self._inline:
924 if self._inline:
922 start += (rev + 1) * self._io.size
925 start += (rev + 1) * self._io.size
923 end = start + length
926 end = start + length
924
927
925 offset = 0
928 offset = 0
926 if not self._chunkcache:
929 if not self._chunkcache:
927 cache_length = max(65536, length)
930 cache_length = max(65536, length)
928 loadcache(df)
931 loadcache(df)
929 else:
932 else:
930 cache_start = self._chunkcache[0]
933 cache_start = self._chunkcache[0]
931 cache_length = len(self._chunkcache[1])
934 cache_length = len(self._chunkcache[1])
932 cache_end = cache_start + cache_length
935 cache_end = cache_start + cache_length
933 if start >= cache_start and end <= cache_end:
936 if start >= cache_start and end <= cache_end:
934 # it is cached
937 # it is cached
935 offset = start - cache_start
938 offset = start - cache_start
936 else:
939 else:
937 cache_length = max(65536, length)
940 cache_length = max(65536, length)
938 loadcache(df)
941 loadcache(df)
939
942
940 # avoid copying large chunks
943 # avoid copying large chunks
941 c = self._chunkcache[1]
944 c = self._chunkcache[1]
942 if cache_length != length:
945 if cache_length != length:
943 c = c[offset:offset + length]
946 c = c[offset:offset + length]
944
947
945 return decompress(c)
948 return decompress(c)
946
949
947 def revdiff(self, rev1, rev2):
950 def revdiff(self, rev1, rev2):
948 """return or calculate a delta between two revisions"""
951 """return or calculate a delta between two revisions"""
949 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
952 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
950 return self.chunk(rev2)
953 return self.chunk(rev2)
951
954
952 return mdiff.textdiff(self.revision(self.node(rev1)),
955 return mdiff.textdiff(self.revision(self.node(rev1)),
953 self.revision(self.node(rev2)))
956 self.revision(self.node(rev2)))
954
957
955 def revision(self, node):
958 def revision(self, node):
956 """return an uncompressed revision of a given node"""
959 """return an uncompressed revision of a given node"""
957 if node == nullid:
960 if node == nullid:
958 return ""
961 return ""
959 if self._cache and self._cache[0] == node:
962 if self._cache and self._cache[0] == node:
960 return str(self._cache[2])
963 return str(self._cache[2])
961
964
962 # look up what we need to read
965 # look up what we need to read
963 text = None
966 text = None
964 rev = self.rev(node)
967 rev = self.rev(node)
965 base = self.base(rev)
968 base = self.base(rev)
966
969
967 # check rev flags
970 # check rev flags
968 if self.index[rev][0] & 0xFFFF:
971 if self.index[rev][0] & 0xFFFF:
969 raise RevlogError(_('incompatible revision flag %x') %
972 raise RevlogError(_('incompatible revision flag %x') %
970 (self.index[rev][0] & 0xFFFF))
973 (self.index[rev][0] & 0xFFFF))
971
974
972 df = None
975 df = None
973
976
974 # do we have useful data cached?
977 # do we have useful data cached?
975 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
978 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
976 base = self._cache[1]
979 base = self._cache[1]
977 text = str(self._cache[2])
980 text = str(self._cache[2])
978 self._loadindex(base, rev + 1)
981 self._loadindex(base, rev + 1)
979 if not self._inline and rev > base + 1:
982 if not self._inline and rev > base + 1:
980 df = self.opener(self.datafile)
983 df = self.opener(self.datafile)
981 else:
984 else:
982 self._loadindex(base, rev + 1)
985 self._loadindex(base, rev + 1)
983 if not self._inline and rev > base:
986 if not self._inline and rev > base:
984 df = self.opener(self.datafile)
987 df = self.opener(self.datafile)
985 text = self.chunk(base, df=df)
988 text = self.chunk(base, df=df)
986
989
987 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
990 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
988 text = mdiff.patches(text, bins)
991 text = mdiff.patches(text, bins)
989 p1, p2 = self.parents(node)
992 p1, p2 = self.parents(node)
990 if node != hash(text, p1, p2):
993 if node != hash(text, p1, p2):
991 raise RevlogError(_("integrity check failed on %s:%d")
994 raise RevlogError(_("integrity check failed on %s:%d")
992 % (self.datafile, rev))
995 % (self.datafile, rev))
993
996
994 self._cache = (node, rev, text)
997 self._cache = (node, rev, text)
995 return text
998 return text
996
999
997 def checkinlinesize(self, tr, fp=None):
1000 def checkinlinesize(self, tr, fp=None):
998 if not self._inline:
1001 if not self._inline:
999 return
1002 return
1000 if not fp:
1003 if not fp:
1001 fp = self.opener(self.indexfile, 'r')
1004 fp = self.opener(self.indexfile, 'r')
1002 fp.seek(0, 2)
1005 fp.seek(0, 2)
1003 size = fp.tell()
1006 size = fp.tell()
1004 if size < 131072:
1007 if size < 131072:
1005 return
1008 return
1006 trinfo = tr.find(self.indexfile)
1009 trinfo = tr.find(self.indexfile)
1007 if trinfo == None:
1010 if trinfo == None:
1008 raise RevlogError(_("%s not found in the transaction")
1011 raise RevlogError(_("%s not found in the transaction")
1009 % self.indexfile)
1012 % self.indexfile)
1010
1013
1011 trindex = trinfo[2]
1014 trindex = trinfo[2]
1012 dataoff = self.start(trindex)
1015 dataoff = self.start(trindex)
1013
1016
1014 tr.add(self.datafile, dataoff)
1017 tr.add(self.datafile, dataoff)
1015 df = self.opener(self.datafile, 'w')
1018 df = self.opener(self.datafile, 'w')
1016 try:
1019 try:
1017 calc = self._io.size
1020 calc = self._io.size
1018 for r in self:
1021 for r in self:
1019 start = self.start(r) + (r + 1) * calc
1022 start = self.start(r) + (r + 1) * calc
1020 length = self.length(r)
1023 length = self.length(r)
1021 fp.seek(start)
1024 fp.seek(start)
1022 d = fp.read(length)
1025 d = fp.read(length)
1023 df.write(d)
1026 df.write(d)
1024 finally:
1027 finally:
1025 df.close()
1028 df.close()
1026
1029
1027 fp.close()
1030 fp.close()
1028 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1031 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1029 self.version &= ~(REVLOGNGINLINEDATA)
1032 self.version &= ~(REVLOGNGINLINEDATA)
1030 self._inline = False
1033 self._inline = False
1031 for i in self:
1034 for i in self:
1032 e = self._io.packentry(self.index[i], self.node, self.version, i)
1035 e = self._io.packentry(self.index[i], self.node, self.version, i)
1033 fp.write(e)
1036 fp.write(e)
1034
1037
1035 # if we don't call rename, the temp file will never replace the
1038 # if we don't call rename, the temp file will never replace the
1036 # real index
1039 # real index
1037 fp.rename()
1040 fp.rename()
1038
1041
1039 tr.replace(self.indexfile, trindex * calc)
1042 tr.replace(self.indexfile, trindex * calc)
1040 self._chunkcache = None
1043 self._chunkcache = None
1041
1044
1042 def addrevision(self, text, transaction, link, p1, p2, d=None):
1045 def addrevision(self, text, transaction, link, p1, p2, d=None):
1043 """add a revision to the log
1046 """add a revision to the log
1044
1047
1045 text - the revision data to add
1048 text - the revision data to add
1046 transaction - the transaction object used for rollback
1049 transaction - the transaction object used for rollback
1047 link - the linkrev data to add
1050 link - the linkrev data to add
1048 p1, p2 - the parent nodeids of the revision
1051 p1, p2 - the parent nodeids of the revision
1049 d - an optional precomputed delta
1052 d - an optional precomputed delta
1050 """
1053 """
1051 dfh = None
1054 dfh = None
1052 if not self._inline:
1055 if not self._inline:
1053 dfh = self.opener(self.datafile, "a")
1056 dfh = self.opener(self.datafile, "a")
1054 ifh = self.opener(self.indexfile, "a+")
1057 ifh = self.opener(self.indexfile, "a+")
1055 try:
1058 try:
1056 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1059 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1057 finally:
1060 finally:
1058 if dfh:
1061 if dfh:
1059 dfh.close()
1062 dfh.close()
1060 ifh.close()
1063 ifh.close()
1061
1064
1062 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1065 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1063 node = hash(text, p1, p2)
1066 node = hash(text, p1, p2)
1064 if node in self.nodemap:
1067 if node in self.nodemap:
1065 return node
1068 return node
1066
1069
1067 curr = len(self)
1070 curr = len(self)
1068 prev = curr - 1
1071 prev = curr - 1
1069 base = self.base(prev)
1072 base = self.base(prev)
1070 offset = self.end(prev)
1073 offset = self.end(prev)
1071
1074
1072 if curr:
1075 if curr:
1073 if not d:
1076 if not d:
1074 ptext = self.revision(self.node(prev))
1077 ptext = self.revision(self.node(prev))
1075 d = mdiff.textdiff(ptext, text)
1078 d = mdiff.textdiff(ptext, text)
1076 data = compress(d)
1079 data = compress(d)
1077 l = len(data[1]) + len(data[0])
1080 l = len(data[1]) + len(data[0])
1078 dist = l + offset - self.start(base)
1081 dist = l + offset - self.start(base)
1079
1082
1080 # full versions are inserted when the needed deltas
1083 # full versions are inserted when the needed deltas
1081 # become comparable to the uncompressed text
1084 # become comparable to the uncompressed text
1082 if not curr or dist > len(text) * 2:
1085 if not curr or dist > len(text) * 2:
1083 data = compress(text)
1086 data = compress(text)
1084 l = len(data[1]) + len(data[0])
1087 l = len(data[1]) + len(data[0])
1085 base = curr
1088 base = curr
1086
1089
1087 e = (offset_type(offset, 0), l, len(text),
1090 e = (offset_type(offset, 0), l, len(text),
1088 base, link, self.rev(p1), self.rev(p2), node)
1091 base, link, self.rev(p1), self.rev(p2), node)
1089 self.index.insert(-1, e)
1092 self.index.insert(-1, e)
1090 self.nodemap[node] = curr
1093 self.nodemap[node] = curr
1091
1094
1092 entry = self._io.packentry(e, self.node, self.version, curr)
1095 entry = self._io.packentry(e, self.node, self.version, curr)
1093 if not self._inline:
1096 if not self._inline:
1094 transaction.add(self.datafile, offset)
1097 transaction.add(self.datafile, offset)
1095 transaction.add(self.indexfile, curr * len(entry))
1098 transaction.add(self.indexfile, curr * len(entry))
1096 if data[0]:
1099 if data[0]:
1097 dfh.write(data[0])
1100 dfh.write(data[0])
1098 dfh.write(data[1])
1101 dfh.write(data[1])
1099 dfh.flush()
1102 dfh.flush()
1100 ifh.write(entry)
1103 ifh.write(entry)
1101 else:
1104 else:
1102 offset += curr * self._io.size
1105 offset += curr * self._io.size
1103 transaction.add(self.indexfile, offset, curr)
1106 transaction.add(self.indexfile, offset, curr)
1104 ifh.write(entry)
1107 ifh.write(entry)
1105 ifh.write(data[0])
1108 ifh.write(data[0])
1106 ifh.write(data[1])
1109 ifh.write(data[1])
1107 self.checkinlinesize(transaction, ifh)
1110 self.checkinlinesize(transaction, ifh)
1108
1111
1109 self._cache = (node, curr, text)
1112 self._cache = (node, curr, text)
1110 return node
1113 return node
1111
1114
1112 def ancestor(self, a, b):
1115 def ancestor(self, a, b):
1113 """calculate the least common ancestor of nodes a and b"""
1116 """calculate the least common ancestor of nodes a and b"""
1114
1117
1115 def parents(rev):
1118 def parents(rev):
1116 return [p for p in self.parentrevs(rev) if p != nullrev]
1119 return [p for p in self.parentrevs(rev) if p != nullrev]
1117
1120
1118 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1121 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1119 if c is None:
1122 if c is None:
1120 return nullid
1123 return nullid
1121
1124
1122 return self.node(c)
1125 return self.node(c)
1123
1126
1124 def group(self, nodelist, lookup, infocollect=None):
1127 def group(self, nodelist, lookup, infocollect=None):
1125 """calculate a delta group
1128 """calculate a delta group
1126
1129
1127 Given a list of changeset revs, return a set of deltas and
1130 Given a list of changeset revs, return a set of deltas and
1128 metadata corresponding to nodes. the first delta is
1131 metadata corresponding to nodes. the first delta is
1129 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1132 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1130 have this parent as it has all history before these
1133 have this parent as it has all history before these
1131 changesets. parent is parent[0]
1134 changesets. parent is parent[0]
1132 """
1135 """
1133 revs = [self.rev(n) for n in nodelist]
1136 revs = [self.rev(n) for n in nodelist]
1134
1137
1135 # if we don't have any revisions touched by these changesets, bail
1138 # if we don't have any revisions touched by these changesets, bail
1136 if not revs:
1139 if not revs:
1137 yield changegroup.closechunk()
1140 yield changegroup.closechunk()
1138 return
1141 return
1139
1142
1140 # add the parent of the first rev
1143 # add the parent of the first rev
1141 p = self.parents(self.node(revs[0]))[0]
1144 p = self.parents(self.node(revs[0]))[0]
1142 revs.insert(0, self.rev(p))
1145 revs.insert(0, self.rev(p))
1143
1146
1144 # build deltas
1147 # build deltas
1145 for d in xrange(0, len(revs) - 1):
1148 for d in xrange(0, len(revs) - 1):
1146 a, b = revs[d], revs[d + 1]
1149 a, b = revs[d], revs[d + 1]
1147 nb = self.node(b)
1150 nb = self.node(b)
1148
1151
1149 if infocollect is not None:
1152 if infocollect is not None:
1150 infocollect(nb)
1153 infocollect(nb)
1151
1154
1152 p = self.parents(nb)
1155 p = self.parents(nb)
1153 meta = nb + p[0] + p[1] + lookup(nb)
1156 meta = nb + p[0] + p[1] + lookup(nb)
1154 if a == -1:
1157 if a == -1:
1155 d = self.revision(nb)
1158 d = self.revision(nb)
1156 meta += mdiff.trivialdiffheader(len(d))
1159 meta += mdiff.trivialdiffheader(len(d))
1157 else:
1160 else:
1158 d = self.revdiff(a, b)
1161 d = self.revdiff(a, b)
1159 yield changegroup.chunkheader(len(meta) + len(d))
1162 yield changegroup.chunkheader(len(meta) + len(d))
1160 yield meta
1163 yield meta
1161 if len(d) > 2**20:
1164 if len(d) > 2**20:
1162 pos = 0
1165 pos = 0
1163 while pos < len(d):
1166 while pos < len(d):
1164 pos2 = pos + 2 ** 18
1167 pos2 = pos + 2 ** 18
1165 yield d[pos:pos2]
1168 yield d[pos:pos2]
1166 pos = pos2
1169 pos = pos2
1167 else:
1170 else:
1168 yield d
1171 yield d
1169
1172
1170 yield changegroup.closechunk()
1173 yield changegroup.closechunk()
1171
1174
1172 def addgroup(self, revs, linkmapper, transaction):
1175 def addgroup(self, revs, linkmapper, transaction):
1173 """
1176 """
1174 add a delta group
1177 add a delta group
1175
1178
1176 given a set of deltas, add them to the revision log. the
1179 given a set of deltas, add them to the revision log. the
1177 first delta is against its parent, which should be in our
1180 first delta is against its parent, which should be in our
1178 log, the rest are against the previous delta.
1181 log, the rest are against the previous delta.
1179 """
1182 """
1180
1183
1181 #track the base of the current delta log
1184 #track the base of the current delta log
1182 r = len(self)
1185 r = len(self)
1183 t = r - 1
1186 t = r - 1
1184 node = None
1187 node = None
1185
1188
1186 base = prev = nullrev
1189 base = prev = nullrev
1187 start = end = textlen = 0
1190 start = end = textlen = 0
1188 if r:
1191 if r:
1189 end = self.end(t)
1192 end = self.end(t)
1190
1193
1191 ifh = self.opener(self.indexfile, "a+")
1194 ifh = self.opener(self.indexfile, "a+")
1192 isize = r * self._io.size
1195 isize = r * self._io.size
1193 if self._inline:
1196 if self._inline:
1194 transaction.add(self.indexfile, end + isize, r)
1197 transaction.add(self.indexfile, end + isize, r)
1195 dfh = None
1198 dfh = None
1196 else:
1199 else:
1197 transaction.add(self.indexfile, isize, r)
1200 transaction.add(self.indexfile, isize, r)
1198 transaction.add(self.datafile, end)
1201 transaction.add(self.datafile, end)
1199 dfh = self.opener(self.datafile, "a")
1202 dfh = self.opener(self.datafile, "a")
1200
1203
1201 try:
1204 try:
1202 # loop through our set of deltas
1205 # loop through our set of deltas
1203 chain = None
1206 chain = None
1204 for chunk in revs:
1207 for chunk in revs:
1205 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1208 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1206 link = linkmapper(cs)
1209 link = linkmapper(cs)
1207 if node in self.nodemap:
1210 if node in self.nodemap:
1208 # this can happen if two branches make the same change
1211 # this can happen if two branches make the same change
1209 chain = node
1212 chain = node
1210 continue
1213 continue
1211 delta = buffer(chunk, 80)
1214 delta = buffer(chunk, 80)
1212 del chunk
1215 del chunk
1213
1216
1214 for p in (p1, p2):
1217 for p in (p1, p2):
1215 if not p in self.nodemap:
1218 if not p in self.nodemap:
1216 raise LookupError(p, self.indexfile, _('unknown parent'))
1219 raise LookupError(p, self.indexfile, _('unknown parent'))
1217
1220
1218 if not chain:
1221 if not chain:
1219 # retrieve the parent revision of the delta chain
1222 # retrieve the parent revision of the delta chain
1220 chain = p1
1223 chain = p1
1221 if not chain in self.nodemap:
1224 if not chain in self.nodemap:
1222 raise LookupError(chain, self.indexfile, _('unknown base'))
1225 raise LookupError(chain, self.indexfile, _('unknown base'))
1223
1226
1224 # full versions are inserted when the needed deltas become
1227 # full versions are inserted when the needed deltas become
1225 # comparable to the uncompressed text or when the previous
1228 # comparable to the uncompressed text or when the previous
1226 # version is not the one we have a delta against. We use
1229 # version is not the one we have a delta against. We use
1227 # the size of the previous full rev as a proxy for the
1230 # the size of the previous full rev as a proxy for the
1228 # current size.
1231 # current size.
1229
1232
1230 if chain == prev:
1233 if chain == prev:
1231 cdelta = compress(delta)
1234 cdelta = compress(delta)
1232 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1235 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1233 textlen = mdiff.patchedsize(textlen, delta)
1236 textlen = mdiff.patchedsize(textlen, delta)
1234
1237
1235 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1238 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1236 # flush our writes here so we can read it in revision
1239 # flush our writes here so we can read it in revision
1237 if dfh:
1240 if dfh:
1238 dfh.flush()
1241 dfh.flush()
1239 ifh.flush()
1242 ifh.flush()
1240 text = self.revision(chain)
1243 text = self.revision(chain)
1241 if len(text) == 0:
1244 if len(text) == 0:
1242 # skip over trivial delta header
1245 # skip over trivial delta header
1243 text = buffer(delta, 12)
1246 text = buffer(delta, 12)
1244 else:
1247 else:
1245 text = mdiff.patches(text, [delta])
1248 text = mdiff.patches(text, [delta])
1246 del delta
1249 del delta
1247 chk = self._addrevision(text, transaction, link, p1, p2, None,
1250 chk = self._addrevision(text, transaction, link, p1, p2, None,
1248 ifh, dfh)
1251 ifh, dfh)
1249 if not dfh and not self._inline:
1252 if not dfh and not self._inline:
1250 # addrevision switched from inline to conventional
1253 # addrevision switched from inline to conventional
1251 # reopen the index
1254 # reopen the index
1252 dfh = self.opener(self.datafile, "a")
1255 dfh = self.opener(self.datafile, "a")
1253 ifh = self.opener(self.indexfile, "a")
1256 ifh = self.opener(self.indexfile, "a")
1254 if chk != node:
1257 if chk != node:
1255 raise RevlogError(_("consistency error adding group"))
1258 raise RevlogError(_("consistency error adding group"))
1256 textlen = len(text)
1259 textlen = len(text)
1257 else:
1260 else:
1258 e = (offset_type(end, 0), cdeltalen, textlen, base,
1261 e = (offset_type(end, 0), cdeltalen, textlen, base,
1259 link, self.rev(p1), self.rev(p2), node)
1262 link, self.rev(p1), self.rev(p2), node)
1260 self.index.insert(-1, e)
1263 self.index.insert(-1, e)
1261 self.nodemap[node] = r
1264 self.nodemap[node] = r
1262 entry = self._io.packentry(e, self.node, self.version, r)
1265 entry = self._io.packentry(e, self.node, self.version, r)
1263 if self._inline:
1266 if self._inline:
1264 ifh.write(entry)
1267 ifh.write(entry)
1265 ifh.write(cdelta[0])
1268 ifh.write(cdelta[0])
1266 ifh.write(cdelta[1])
1269 ifh.write(cdelta[1])
1267 self.checkinlinesize(transaction, ifh)
1270 self.checkinlinesize(transaction, ifh)
1268 if not self._inline:
1271 if not self._inline:
1269 dfh = self.opener(self.datafile, "a")
1272 dfh = self.opener(self.datafile, "a")
1270 ifh = self.opener(self.indexfile, "a")
1273 ifh = self.opener(self.indexfile, "a")
1271 else:
1274 else:
1272 dfh.write(cdelta[0])
1275 dfh.write(cdelta[0])
1273 dfh.write(cdelta[1])
1276 dfh.write(cdelta[1])
1274 ifh.write(entry)
1277 ifh.write(entry)
1275
1278
1276 t, r, chain, prev = r, r + 1, node, node
1279 t, r, chain, prev = r, r + 1, node, node
1277 base = self.base(t)
1280 base = self.base(t)
1278 start = self.start(base)
1281 start = self.start(base)
1279 end = self.end(t)
1282 end = self.end(t)
1280 finally:
1283 finally:
1281 if dfh:
1284 if dfh:
1282 dfh.close()
1285 dfh.close()
1283 ifh.close()
1286 ifh.close()
1284
1287
1285 return node
1288 return node
1286
1289
1287 def strip(self, minlink, transaction):
1290 def strip(self, minlink, transaction):
1288 """truncate the revlog on the first revision with a linkrev >= minlink
1291 """truncate the revlog on the first revision with a linkrev >= minlink
1289
1292
1290 This function is called when we're stripping revision minlink and
1293 This function is called when we're stripping revision minlink and
1291 its descendants from the repository.
1294 its descendants from the repository.
1292
1295
1293 We have to remove all revisions with linkrev >= minlink, because
1296 We have to remove all revisions with linkrev >= minlink, because
1294 the equivalent changelog revisions will be renumbered after the
1297 the equivalent changelog revisions will be renumbered after the
1295 strip.
1298 strip.
1296
1299
1297 So we truncate the revlog on the first of these revisions, and
1300 So we truncate the revlog on the first of these revisions, and
1298 trust that the caller has saved the revisions that shouldn't be
1301 trust that the caller has saved the revisions that shouldn't be
1299 removed and that it'll readd them after this truncation.
1302 removed and that it'll readd them after this truncation.
1300 """
1303 """
1301 if len(self) == 0:
1304 if len(self) == 0:
1302 return
1305 return
1303
1306
1304 if isinstance(self.index, lazyindex):
1307 if isinstance(self.index, lazyindex):
1305 self._loadindexmap()
1308 self._loadindexmap()
1306
1309
1307 for rev in self:
1310 for rev in self:
1308 if self.index[rev][4] >= minlink:
1311 if self.index[rev][4] >= minlink:
1309 break
1312 break
1310 else:
1313 else:
1311 return
1314 return
1312
1315
1313 # first truncate the files on disk
1316 # first truncate the files on disk
1314 end = self.start(rev)
1317 end = self.start(rev)
1315 if not self._inline:
1318 if not self._inline:
1316 transaction.add(self.datafile, end)
1319 transaction.add(self.datafile, end)
1317 end = rev * self._io.size
1320 end = rev * self._io.size
1318 else:
1321 else:
1319 end += rev * self._io.size
1322 end += rev * self._io.size
1320
1323
1321 transaction.add(self.indexfile, end)
1324 transaction.add(self.indexfile, end)
1322
1325
1323 # then reset internal state in memory to forget those revisions
1326 # then reset internal state in memory to forget those revisions
1324 self._cache = None
1327 self._cache = None
1325 self._chunkcache = None
1328 self._chunkcache = None
1326 for x in xrange(rev, len(self)):
1329 for x in xrange(rev, len(self)):
1327 del self.nodemap[self.node(x)]
1330 del self.nodemap[self.node(x)]
1328
1331
1329 del self.index[rev:-1]
1332 del self.index[rev:-1]
1330
1333
1331 def checksize(self):
1334 def checksize(self):
1332 expected = 0
1335 expected = 0
1333 if len(self):
1336 if len(self):
1334 expected = max(0, self.end(len(self) - 1))
1337 expected = max(0, self.end(len(self) - 1))
1335
1338
1336 try:
1339 try:
1337 f = self.opener(self.datafile)
1340 f = self.opener(self.datafile)
1338 f.seek(0, 2)
1341 f.seek(0, 2)
1339 actual = f.tell()
1342 actual = f.tell()
1340 dd = actual - expected
1343 dd = actual - expected
1341 except IOError, inst:
1344 except IOError, inst:
1342 if inst.errno != errno.ENOENT:
1345 if inst.errno != errno.ENOENT:
1343 raise
1346 raise
1344 dd = 0
1347 dd = 0
1345
1348
1346 try:
1349 try:
1347 f = self.opener(self.indexfile)
1350 f = self.opener(self.indexfile)
1348 f.seek(0, 2)
1351 f.seek(0, 2)
1349 actual = f.tell()
1352 actual = f.tell()
1350 s = self._io.size
1353 s = self._io.size
1351 i = max(0, actual / s)
1354 i = max(0, actual / s)
1352 di = actual - (i * s)
1355 di = actual - (i * s)
1353 if self._inline:
1356 if self._inline:
1354 databytes = 0
1357 databytes = 0
1355 for r in self:
1358 for r in self:
1356 databytes += max(0, self.length(r))
1359 databytes += max(0, self.length(r))
1357 dd = 0
1360 dd = 0
1358 di = actual - len(self) * s - databytes
1361 di = actual - len(self) * s - databytes
1359 except IOError, inst:
1362 except IOError, inst:
1360 if inst.errno != errno.ENOENT:
1363 if inst.errno != errno.ENOENT:
1361 raise
1364 raise
1362 di = 0
1365 di = 0
1363
1366
1364 return (dd, di)
1367 return (dd, di)
1365
1368
1366 def files(self):
1369 def files(self):
1367 res = [ self.indexfile ]
1370 res = [ self.indexfile ]
1368 if not self._inline:
1371 if not self._inline:
1369 res.append(self.datafile)
1372 res.append(self.datafile)
1370 return res
1373 return res
General Comments 0
You need to be logged in to leave comments. Login now