##// END OF EJS Templates
revlog: use index to find index size
Matt Mackall -
r8315:c8493310 default
parent child Browse files
Show More
@@ -1,1373 +1,1372 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf, size):
122 def __init__(self, dataf, size):
123 self.dataf = dataf
123 self.dataf = dataf
124 self.s = struct.calcsize(indexformatng)
124 self.s = struct.calcsize(indexformatng)
125 self.datasize = size
125 self.datasize = size
126 self.l = size/self.s
126 self.l = size/self.s
127 self.index = [None] * self.l
127 self.index = [None] * self.l
128 self.map = {nullid: nullrev}
128 self.map = {nullid: nullrev}
129 self.allmap = 0
129 self.allmap = 0
130 self.all = 0
130 self.all = 0
131 self.mapfind_count = 0
131 self.mapfind_count = 0
132
132
133 def loadmap(self):
133 def loadmap(self):
134 """
134 """
135 during a commit, we need to make sure the rev being added is
135 during a commit, we need to make sure the rev being added is
136 not a duplicate. This requires loading the entire index,
136 not a duplicate. This requires loading the entire index,
137 which is fairly slow. loadmap can load up just the node map,
137 which is fairly slow. loadmap can load up just the node map,
138 which takes much less time.
138 which takes much less time.
139 """
139 """
140 if self.allmap:
140 if self.allmap:
141 return
141 return
142 end = self.datasize
142 end = self.datasize
143 self.allmap = 1
143 self.allmap = 1
144 cur = 0
144 cur = 0
145 count = 0
145 count = 0
146 blocksize = self.s * 256
146 blocksize = self.s * 256
147 self.dataf.seek(0)
147 self.dataf.seek(0)
148 while cur < end:
148 while cur < end:
149 data = self.dataf.read(blocksize)
149 data = self.dataf.read(blocksize)
150 off = 0
150 off = 0
151 for x in xrange(256):
151 for x in xrange(256):
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
152 n = data[off + ngshaoffset:off + ngshaoffset + 20]
153 self.map[n] = count
153 self.map[n] = count
154 count += 1
154 count += 1
155 if count >= self.l:
155 if count >= self.l:
156 break
156 break
157 off += self.s
157 off += self.s
158 cur += blocksize
158 cur += blocksize
159
159
160 def loadblock(self, blockstart, blocksize, data=None):
160 def loadblock(self, blockstart, blocksize, data=None):
161 if self.all:
161 if self.all:
162 return
162 return
163 if data is None:
163 if data is None:
164 self.dataf.seek(blockstart)
164 self.dataf.seek(blockstart)
165 if blockstart + blocksize > self.datasize:
165 if blockstart + blocksize > self.datasize:
166 # the revlog may have grown since we've started running,
166 # the revlog may have grown since we've started running,
167 # but we don't have space in self.index for more entries.
167 # but we don't have space in self.index for more entries.
168 # limit blocksize so that we don't get too much data.
168 # limit blocksize so that we don't get too much data.
169 blocksize = max(self.datasize - blockstart, 0)
169 blocksize = max(self.datasize - blockstart, 0)
170 data = self.dataf.read(blocksize)
170 data = self.dataf.read(blocksize)
171 lend = len(data) / self.s
171 lend = len(data) / self.s
172 i = blockstart / self.s
172 i = blockstart / self.s
173 off = 0
173 off = 0
174 # lazyindex supports __delitem__
174 # lazyindex supports __delitem__
175 if lend > len(self.index) - i:
175 if lend > len(self.index) - i:
176 lend = len(self.index) - i
176 lend = len(self.index) - i
177 for x in xrange(lend):
177 for x in xrange(lend):
178 if self.index[i + x] == None:
178 if self.index[i + x] == None:
179 b = data[off : off + self.s]
179 b = data[off : off + self.s]
180 self.index[i + x] = b
180 self.index[i + x] = b
181 n = b[ngshaoffset:ngshaoffset + 20]
181 n = b[ngshaoffset:ngshaoffset + 20]
182 self.map[n] = i + x
182 self.map[n] = i + x
183 off += self.s
183 off += self.s
184
184
185 def findnode(self, node):
185 def findnode(self, node):
186 """search backwards through the index file for a specific node"""
186 """search backwards through the index file for a specific node"""
187 if self.allmap:
187 if self.allmap:
188 return None
188 return None
189
189
190 # hg log will cause many many searches for the manifest
190 # hg log will cause many many searches for the manifest
191 # nodes. After we get called a few times, just load the whole
191 # nodes. After we get called a few times, just load the whole
192 # thing.
192 # thing.
193 if self.mapfind_count > 8:
193 if self.mapfind_count > 8:
194 self.loadmap()
194 self.loadmap()
195 if node in self.map:
195 if node in self.map:
196 return node
196 return node
197 return None
197 return None
198 self.mapfind_count += 1
198 self.mapfind_count += 1
199 last = self.l - 1
199 last = self.l - 1
200 while self.index[last] != None:
200 while self.index[last] != None:
201 if last == 0:
201 if last == 0:
202 self.all = 1
202 self.all = 1
203 self.allmap = 1
203 self.allmap = 1
204 return None
204 return None
205 last -= 1
205 last -= 1
206 end = (last + 1) * self.s
206 end = (last + 1) * self.s
207 blocksize = self.s * 256
207 blocksize = self.s * 256
208 while end >= 0:
208 while end >= 0:
209 start = max(end - blocksize, 0)
209 start = max(end - blocksize, 0)
210 self.dataf.seek(start)
210 self.dataf.seek(start)
211 data = self.dataf.read(end - start)
211 data = self.dataf.read(end - start)
212 findend = end - start
212 findend = end - start
213 while True:
213 while True:
214 # we're searching backwards, so we have to make sure
214 # we're searching backwards, so we have to make sure
215 # we don't find a changeset where this node is a parent
215 # we don't find a changeset where this node is a parent
216 off = data.find(node, 0, findend)
216 off = data.find(node, 0, findend)
217 findend = off
217 findend = off
218 if off >= 0:
218 if off >= 0:
219 i = off / self.s
219 i = off / self.s
220 off = i * self.s
220 off = i * self.s
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
221 n = data[off + ngshaoffset:off + ngshaoffset + 20]
222 if n == node:
222 if n == node:
223 self.map[n] = i + start / self.s
223 self.map[n] = i + start / self.s
224 return node
224 return node
225 else:
225 else:
226 break
226 break
227 end -= blocksize
227 end -= blocksize
228 return None
228 return None
229
229
230 def loadindex(self, i=None, end=None):
230 def loadindex(self, i=None, end=None):
231 if self.all:
231 if self.all:
232 return
232 return
233 all = False
233 all = False
234 if i == None:
234 if i == None:
235 blockstart = 0
235 blockstart = 0
236 blocksize = (65536 / self.s) * self.s
236 blocksize = (65536 / self.s) * self.s
237 end = self.datasize
237 end = self.datasize
238 all = True
238 all = True
239 else:
239 else:
240 if end:
240 if end:
241 blockstart = i * self.s
241 blockstart = i * self.s
242 end = end * self.s
242 end = end * self.s
243 blocksize = end - blockstart
243 blocksize = end - blockstart
244 else:
244 else:
245 blockstart = (i & ~1023) * self.s
245 blockstart = (i & ~1023) * self.s
246 blocksize = self.s * 1024
246 blocksize = self.s * 1024
247 end = blockstart + blocksize
247 end = blockstart + blocksize
248 while blockstart < end:
248 while blockstart < end:
249 self.loadblock(blockstart, blocksize)
249 self.loadblock(blockstart, blocksize)
250 blockstart += blocksize
250 blockstart += blocksize
251 if all:
251 if all:
252 self.all = True
252 self.all = True
253
253
254 class lazyindex(object):
254 class lazyindex(object):
255 """a lazy version of the index array"""
255 """a lazy version of the index array"""
256 def __init__(self, parser):
256 def __init__(self, parser):
257 self.p = parser
257 self.p = parser
258 def __len__(self):
258 def __len__(self):
259 return len(self.p.index)
259 return len(self.p.index)
260 def load(self, pos):
260 def load(self, pos):
261 if pos < 0:
261 if pos < 0:
262 pos += len(self.p.index)
262 pos += len(self.p.index)
263 self.p.loadindex(pos)
263 self.p.loadindex(pos)
264 return self.p.index[pos]
264 return self.p.index[pos]
265 def __getitem__(self, pos):
265 def __getitem__(self, pos):
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
266 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
267 def __setitem__(self, pos, item):
267 def __setitem__(self, pos, item):
268 self.p.index[pos] = _pack(indexformatng, *item)
268 self.p.index[pos] = _pack(indexformatng, *item)
269 def __delitem__(self, pos):
269 def __delitem__(self, pos):
270 del self.p.index[pos]
270 del self.p.index[pos]
271 def insert(self, pos, e):
271 def insert(self, pos, e):
272 self.p.index.insert(pos, _pack(indexformatng, *e))
272 self.p.index.insert(pos, _pack(indexformatng, *e))
273 def append(self, e):
273 def append(self, e):
274 self.p.index.append(_pack(indexformatng, *e))
274 self.p.index.append(_pack(indexformatng, *e))
275
275
276 class lazymap(object):
276 class lazymap(object):
277 """a lazy version of the node map"""
277 """a lazy version of the node map"""
278 def __init__(self, parser):
278 def __init__(self, parser):
279 self.p = parser
279 self.p = parser
280 def load(self, key):
280 def load(self, key):
281 n = self.p.findnode(key)
281 n = self.p.findnode(key)
282 if n == None:
282 if n == None:
283 raise KeyError(key)
283 raise KeyError(key)
284 def __contains__(self, key):
284 def __contains__(self, key):
285 if key in self.p.map:
285 if key in self.p.map:
286 return True
286 return True
287 self.p.loadmap()
287 self.p.loadmap()
288 return key in self.p.map
288 return key in self.p.map
289 def __iter__(self):
289 def __iter__(self):
290 yield nullid
290 yield nullid
291 for i in xrange(self.p.l):
291 for i in xrange(self.p.l):
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if not ret:
293 if not ret:
294 self.p.loadindex(i)
294 self.p.loadindex(i)
295 ret = self.p.index[i]
295 ret = self.p.index[i]
296 if isinstance(ret, str):
296 if isinstance(ret, str):
297 ret = _unpack(indexformatng, ret)
297 ret = _unpack(indexformatng, ret)
298 yield ret[7]
298 yield ret[7]
299 def __getitem__(self, key):
299 def __getitem__(self, key):
300 try:
300 try:
301 return self.p.map[key]
301 return self.p.map[key]
302 except KeyError:
302 except KeyError:
303 try:
303 try:
304 self.load(key)
304 self.load(key)
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 raise KeyError("node " + hex(key))
307 raise KeyError("node " + hex(key))
308 def __setitem__(self, key, val):
308 def __setitem__(self, key, val):
309 self.p.map[key] = val
309 self.p.map[key] = val
310 def __delitem__(self, key):
310 def __delitem__(self, key):
311 del self.p.map[key]
311 del self.p.map[key]
312
312
313 indexformatv0 = ">4l20s20s20s"
313 indexformatv0 = ">4l20s20s20s"
314 v0shaoffset = 56
314 v0shaoffset = 56
315
315
316 class revlogoldio(object):
316 class revlogoldio(object):
317 def __init__(self):
317 def __init__(self):
318 self.size = struct.calcsize(indexformatv0)
318 self.size = struct.calcsize(indexformatv0)
319
319
320 def parseindex(self, fp, data, inline):
320 def parseindex(self, fp, data, inline):
321 s = self.size
321 s = self.size
322 index = []
322 index = []
323 nodemap = {nullid: nullrev}
323 nodemap = {nullid: nullrev}
324 n = off = 0
324 n = off = 0
325 if len(data) < _prereadsize:
325 if len(data) < _prereadsize:
326 data += fp.read() # read the rest
326 data += fp.read() # read the rest
327 l = len(data)
327 l = len(data)
328 while off + s <= l:
328 while off + s <= l:
329 cur = data[off:off + s]
329 cur = data[off:off + s]
330 off += s
330 off += s
331 e = _unpack(indexformatv0, cur)
331 e = _unpack(indexformatv0, cur)
332 # transform to revlogv1 format
332 # transform to revlogv1 format
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
333 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
334 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
335 index.append(e2)
335 index.append(e2)
336 nodemap[e[6]] = n
336 nodemap[e[6]] = n
337 n += 1
337 n += 1
338
338
339 return index, nodemap, None
339 return index, nodemap, None
340
340
341 def packentry(self, entry, node, version, rev):
341 def packentry(self, entry, node, version, rev):
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
342 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
343 node(entry[5]), node(entry[6]), entry[7])
343 node(entry[5]), node(entry[6]), entry[7])
344 return _pack(indexformatv0, *e2)
344 return _pack(indexformatv0, *e2)
345
345
346 # index ng:
346 # index ng:
347 # 6 bytes offset
347 # 6 bytes offset
348 # 2 bytes flags
348 # 2 bytes flags
349 # 4 bytes compressed length
349 # 4 bytes compressed length
350 # 4 bytes uncompressed length
350 # 4 bytes uncompressed length
351 # 4 bytes: base rev
351 # 4 bytes: base rev
352 # 4 bytes link rev
352 # 4 bytes link rev
353 # 4 bytes parent 1 rev
353 # 4 bytes parent 1 rev
354 # 4 bytes parent 2 rev
354 # 4 bytes parent 2 rev
355 # 32 bytes: nodeid
355 # 32 bytes: nodeid
356 indexformatng = ">Qiiiiii20s12x"
356 indexformatng = ">Qiiiiii20s12x"
357 ngshaoffset = 32
357 ngshaoffset = 32
358 versionformat = ">I"
358 versionformat = ">I"
359
359
360 class revlogio(object):
360 class revlogio(object):
361 def __init__(self):
361 def __init__(self):
362 self.size = struct.calcsize(indexformatng)
362 self.size = struct.calcsize(indexformatng)
363
363
364 def parseindex(self, fp, data, inline):
364 def parseindex(self, fp, data, inline):
365 try:
365 try:
366 size = len(data)
366 size = len(data)
367 if size == _prereadsize:
367 if size == _prereadsize:
368 size = util.fstat(fp).st_size
368 size = util.fstat(fp).st_size
369 except AttributeError:
369 except AttributeError:
370 size = 0
370 size = 0
371
371
372 if util.openhardlinks() and not inline and size > _prereadsize:
372 if util.openhardlinks() and not inline and size > _prereadsize:
373 # big index, let's parse it on demand
373 # big index, let's parse it on demand
374 parser = lazyparser(fp, size)
374 parser = lazyparser(fp, size)
375 index = lazyindex(parser)
375 index = lazyindex(parser)
376 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
377 e = list(index[0])
377 e = list(index[0])
378 type = gettype(e[0])
378 type = gettype(e[0])
379 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
380 index[0] = e
380 index[0] = e
381 return index, nodemap, None
381 return index, nodemap, None
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = None
429 self._chunkcache = None
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472
472
473 # add the magic null revision at -1 (if it hasn't been done already)
473 # add the magic null revision at -1 (if it hasn't been done already)
474 if (self.index == [] or isinstance(self.index, lazyindex) or
474 if (self.index == [] or isinstance(self.index, lazyindex) or
475 self.index[-1][7] != nullid) :
475 self.index[-1][7] != nullid) :
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
476 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
477
477
478 def _loadindex(self, start, end):
478 def _loadindex(self, start, end):
479 """load a block of indexes all at once from the lazy parser"""
479 """load a block of indexes all at once from the lazy parser"""
480 if isinstance(self.index, lazyindex):
480 if isinstance(self.index, lazyindex):
481 self.index.p.loadindex(start, end)
481 self.index.p.loadindex(start, end)
482
482
483 def _loadindexmap(self):
483 def _loadindexmap(self):
484 """loads both the map and the index from the lazy parser"""
484 """loads both the map and the index from the lazy parser"""
485 if isinstance(self.index, lazyindex):
485 if isinstance(self.index, lazyindex):
486 p = self.index.p
486 p = self.index.p
487 p.loadindex()
487 p.loadindex()
488 self.nodemap = p.map
488 self.nodemap = p.map
489
489
490 def _loadmap(self):
490 def _loadmap(self):
491 """loads the map from the lazy parser"""
491 """loads the map from the lazy parser"""
492 if isinstance(self.nodemap, lazymap):
492 if isinstance(self.nodemap, lazymap):
493 self.nodemap.p.loadmap()
493 self.nodemap.p.loadmap()
494 self.nodemap = self.nodemap.p.map
494 self.nodemap = self.nodemap.p.map
495
495
496 def tip(self):
496 def tip(self):
497 return self.node(len(self.index) - 2)
497 return self.node(len(self.index) - 2)
498 def __len__(self):
498 def __len__(self):
499 return len(self.index) - 1
499 return len(self.index) - 1
500 def __iter__(self):
500 def __iter__(self):
501 for i in xrange(len(self)):
501 for i in xrange(len(self)):
502 yield i
502 yield i
503 def rev(self, node):
503 def rev(self, node):
504 try:
504 try:
505 return self.nodemap[node]
505 return self.nodemap[node]
506 except KeyError:
506 except KeyError:
507 raise LookupError(node, self.indexfile, _('no node'))
507 raise LookupError(node, self.indexfile, _('no node'))
508 def node(self, rev):
508 def node(self, rev):
509 return self.index[rev][7]
509 return self.index[rev][7]
510 def linkrev(self, rev):
510 def linkrev(self, rev):
511 return self.index[rev][4]
511 return self.index[rev][4]
512 def parents(self, node):
512 def parents(self, node):
513 i = self.index
513 i = self.index
514 d = i[self.rev(node)]
514 d = i[self.rev(node)]
515 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
515 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
516 def parentrevs(self, rev):
516 def parentrevs(self, rev):
517 return self.index[rev][5:7]
517 return self.index[rev][5:7]
518 def start(self, rev):
518 def start(self, rev):
519 return int(self.index[rev][0] >> 16)
519 return int(self.index[rev][0] >> 16)
520 def end(self, rev):
520 def end(self, rev):
521 return self.start(rev) + self.length(rev)
521 return self.start(rev) + self.length(rev)
522 def length(self, rev):
522 def length(self, rev):
523 return self.index[rev][1]
523 return self.index[rev][1]
524 def base(self, rev):
524 def base(self, rev):
525 return self.index[rev][3]
525 return self.index[rev][3]
526
526
527 def size(self, rev):
527 def size(self, rev):
528 """return the length of the uncompressed text for a given revision"""
528 """return the length of the uncompressed text for a given revision"""
529 l = self.index[rev][2]
529 l = self.index[rev][2]
530 if l >= 0:
530 if l >= 0:
531 return l
531 return l
532
532
533 t = self.revision(self.node(rev))
533 t = self.revision(self.node(rev))
534 return len(t)
534 return len(t)
535
535
536 # alternate implementation, The advantage to this code is it
536 # alternate implementation, The advantage to this code is it
537 # will be faster for a single revision. But, the results are not
537 # will be faster for a single revision. But, the results are not
538 # cached, so finding the size of every revision will be slower.
538 # cached, so finding the size of every revision will be slower.
539 """
539 """
540 if self.cache and self.cache[1] == rev:
540 if self.cache and self.cache[1] == rev:
541 return len(self.cache[2])
541 return len(self.cache[2])
542
542
543 base = self.base(rev)
543 base = self.base(rev)
544 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
544 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
545 base = self.cache[1]
545 base = self.cache[1]
546 text = self.cache[2]
546 text = self.cache[2]
547 else:
547 else:
548 text = self.revision(self.node(base))
548 text = self.revision(self.node(base))
549
549
550 l = len(text)
550 l = len(text)
551 for x in xrange(base + 1, rev + 1):
551 for x in xrange(base + 1, rev + 1):
552 l = mdiff.patchedsize(l, self.chunk(x))
552 l = mdiff.patchedsize(l, self.chunk(x))
553 return l
553 return l
554 """
554 """
555
555
556 def reachable(self, node, stop=None):
556 def reachable(self, node, stop=None):
557 """return a hash of all nodes ancestral to a given node, including
557 """return a hash of all nodes ancestral to a given node, including
558 the node itself, stopping when stop is matched"""
558 the node itself, stopping when stop is matched"""
559 reachable = {}
559 reachable = {}
560 visit = [node]
560 visit = [node]
561 reachable[node] = 1
561 reachable[node] = 1
562 if stop:
562 if stop:
563 stopn = self.rev(stop)
563 stopn = self.rev(stop)
564 else:
564 else:
565 stopn = 0
565 stopn = 0
566 while visit:
566 while visit:
567 n = visit.pop(0)
567 n = visit.pop(0)
568 if n == stop:
568 if n == stop:
569 continue
569 continue
570 if n == nullid:
570 if n == nullid:
571 continue
571 continue
572 for p in self.parents(n):
572 for p in self.parents(n):
573 if self.rev(p) < stopn:
573 if self.rev(p) < stopn:
574 continue
574 continue
575 if p not in reachable:
575 if p not in reachable:
576 reachable[p] = 1
576 reachable[p] = 1
577 visit.append(p)
577 visit.append(p)
578 return reachable
578 return reachable
579
579
580 def ancestors(self, *revs):
580 def ancestors(self, *revs):
581 'Generate the ancestors of revs using a breadth-first visit'
581 'Generate the ancestors of revs using a breadth-first visit'
582 visit = list(revs)
582 visit = list(revs)
583 seen = set([nullrev])
583 seen = set([nullrev])
584 while visit:
584 while visit:
585 for parent in self.parentrevs(visit.pop(0)):
585 for parent in self.parentrevs(visit.pop(0)):
586 if parent not in seen:
586 if parent not in seen:
587 visit.append(parent)
587 visit.append(parent)
588 seen.add(parent)
588 seen.add(parent)
589 yield parent
589 yield parent
590
590
591 def descendants(self, *revs):
591 def descendants(self, *revs):
592 'Generate the descendants of revs in topological order'
592 'Generate the descendants of revs in topological order'
593 seen = set(revs)
593 seen = set(revs)
594 for i in xrange(min(revs) + 1, len(self)):
594 for i in xrange(min(revs) + 1, len(self)):
595 for x in self.parentrevs(i):
595 for x in self.parentrevs(i):
596 if x != nullrev and x in seen:
596 if x != nullrev and x in seen:
597 seen.add(i)
597 seen.add(i)
598 yield i
598 yield i
599 break
599 break
600
600
601 def findmissing(self, common=None, heads=None):
601 def findmissing(self, common=None, heads=None):
602 '''
602 '''
603 returns the topologically sorted list of nodes from the set:
603 returns the topologically sorted list of nodes from the set:
604 missing = (ancestors(heads) \ ancestors(common))
604 missing = (ancestors(heads) \ ancestors(common))
605
605
606 where ancestors() is the set of ancestors from heads, heads included
606 where ancestors() is the set of ancestors from heads, heads included
607
607
608 if heads is None, the heads of the revlog are used
608 if heads is None, the heads of the revlog are used
609 if common is None, nullid is assumed to be a common node
609 if common is None, nullid is assumed to be a common node
610 '''
610 '''
611 if common is None:
611 if common is None:
612 common = [nullid]
612 common = [nullid]
613 if heads is None:
613 if heads is None:
614 heads = self.heads()
614 heads = self.heads()
615
615
616 common = [self.rev(n) for n in common]
616 common = [self.rev(n) for n in common]
617 heads = [self.rev(n) for n in heads]
617 heads = [self.rev(n) for n in heads]
618
618
619 # we want the ancestors, but inclusive
619 # we want the ancestors, but inclusive
620 has = set(self.ancestors(*common))
620 has = set(self.ancestors(*common))
621 has.add(nullrev)
621 has.add(nullrev)
622 has.update(common)
622 has.update(common)
623
623
624 # take all ancestors from heads that aren't in has
624 # take all ancestors from heads that aren't in has
625 missing = {}
625 missing = {}
626 visit = [r for r in heads if r not in has]
626 visit = [r for r in heads if r not in has]
627 while visit:
627 while visit:
628 r = visit.pop(0)
628 r = visit.pop(0)
629 if r in missing:
629 if r in missing:
630 continue
630 continue
631 else:
631 else:
632 missing[r] = None
632 missing[r] = None
633 for p in self.parentrevs(r):
633 for p in self.parentrevs(r):
634 if p not in has:
634 if p not in has:
635 visit.append(p)
635 visit.append(p)
636 missing = missing.keys()
636 missing = missing.keys()
637 missing.sort()
637 missing.sort()
638 return [self.node(r) for r in missing]
638 return [self.node(r) for r in missing]
639
639
640 def nodesbetween(self, roots=None, heads=None):
640 def nodesbetween(self, roots=None, heads=None):
641 """Return a tuple containing three elements. Elements 1 and 2 contain
641 """Return a tuple containing three elements. Elements 1 and 2 contain
642 a final list bases and heads after all the unreachable ones have been
642 a final list bases and heads after all the unreachable ones have been
643 pruned. Element 0 contains a topologically sorted list of all
643 pruned. Element 0 contains a topologically sorted list of all
644
644
645 nodes that satisfy these constraints:
645 nodes that satisfy these constraints:
646 1. All nodes must be descended from a node in roots (the nodes on
646 1. All nodes must be descended from a node in roots (the nodes on
647 roots are considered descended from themselves).
647 roots are considered descended from themselves).
648 2. All nodes must also be ancestors of a node in heads (the nodes in
648 2. All nodes must also be ancestors of a node in heads (the nodes in
649 heads are considered to be their own ancestors).
649 heads are considered to be their own ancestors).
650
650
651 If roots is unspecified, nullid is assumed as the only root.
651 If roots is unspecified, nullid is assumed as the only root.
652 If heads is unspecified, it is taken to be the output of the
652 If heads is unspecified, it is taken to be the output of the
653 heads method (i.e. a list of all nodes in the repository that
653 heads method (i.e. a list of all nodes in the repository that
654 have no children)."""
654 have no children)."""
655 nonodes = ([], [], [])
655 nonodes = ([], [], [])
656 if roots is not None:
656 if roots is not None:
657 roots = list(roots)
657 roots = list(roots)
658 if not roots:
658 if not roots:
659 return nonodes
659 return nonodes
660 lowestrev = min([self.rev(n) for n in roots])
660 lowestrev = min([self.rev(n) for n in roots])
661 else:
661 else:
662 roots = [nullid] # Everybody's a descendent of nullid
662 roots = [nullid] # Everybody's a descendent of nullid
663 lowestrev = nullrev
663 lowestrev = nullrev
664 if (lowestrev == nullrev) and (heads is None):
664 if (lowestrev == nullrev) and (heads is None):
665 # We want _all_ the nodes!
665 # We want _all_ the nodes!
666 return ([self.node(r) for r in self], [nullid], list(self.heads()))
666 return ([self.node(r) for r in self], [nullid], list(self.heads()))
667 if heads is None:
667 if heads is None:
668 # All nodes are ancestors, so the latest ancestor is the last
668 # All nodes are ancestors, so the latest ancestor is the last
669 # node.
669 # node.
670 highestrev = len(self) - 1
670 highestrev = len(self) - 1
671 # Set ancestors to None to signal that every node is an ancestor.
671 # Set ancestors to None to signal that every node is an ancestor.
672 ancestors = None
672 ancestors = None
673 # Set heads to an empty dictionary for later discovery of heads
673 # Set heads to an empty dictionary for later discovery of heads
674 heads = {}
674 heads = {}
675 else:
675 else:
676 heads = list(heads)
676 heads = list(heads)
677 if not heads:
677 if not heads:
678 return nonodes
678 return nonodes
679 ancestors = {}
679 ancestors = {}
680 # Turn heads into a dictionary so we can remove 'fake' heads.
680 # Turn heads into a dictionary so we can remove 'fake' heads.
681 # Also, later we will be using it to filter out the heads we can't
681 # Also, later we will be using it to filter out the heads we can't
682 # find from roots.
682 # find from roots.
683 heads = dict.fromkeys(heads, 0)
683 heads = dict.fromkeys(heads, 0)
684 # Start at the top and keep marking parents until we're done.
684 # Start at the top and keep marking parents until we're done.
685 nodestotag = set(heads)
685 nodestotag = set(heads)
686 # Remember where the top was so we can use it as a limit later.
686 # Remember where the top was so we can use it as a limit later.
687 highestrev = max([self.rev(n) for n in nodestotag])
687 highestrev = max([self.rev(n) for n in nodestotag])
688 while nodestotag:
688 while nodestotag:
689 # grab a node to tag
689 # grab a node to tag
690 n = nodestotag.pop()
690 n = nodestotag.pop()
691 # Never tag nullid
691 # Never tag nullid
692 if n == nullid:
692 if n == nullid:
693 continue
693 continue
694 # A node's revision number represents its place in a
694 # A node's revision number represents its place in a
695 # topologically sorted list of nodes.
695 # topologically sorted list of nodes.
696 r = self.rev(n)
696 r = self.rev(n)
697 if r >= lowestrev:
697 if r >= lowestrev:
698 if n not in ancestors:
698 if n not in ancestors:
699 # If we are possibly a descendent of one of the roots
699 # If we are possibly a descendent of one of the roots
700 # and we haven't already been marked as an ancestor
700 # and we haven't already been marked as an ancestor
701 ancestors[n] = 1 # Mark as ancestor
701 ancestors[n] = 1 # Mark as ancestor
702 # Add non-nullid parents to list of nodes to tag.
702 # Add non-nullid parents to list of nodes to tag.
703 nodestotag.update([p for p in self.parents(n) if
703 nodestotag.update([p for p in self.parents(n) if
704 p != nullid])
704 p != nullid])
705 elif n in heads: # We've seen it before, is it a fake head?
705 elif n in heads: # We've seen it before, is it a fake head?
706 # So it is, real heads should not be the ancestors of
706 # So it is, real heads should not be the ancestors of
707 # any other heads.
707 # any other heads.
708 heads.pop(n)
708 heads.pop(n)
709 if not ancestors:
709 if not ancestors:
710 return nonodes
710 return nonodes
711 # Now that we have our set of ancestors, we want to remove any
711 # Now that we have our set of ancestors, we want to remove any
712 # roots that are not ancestors.
712 # roots that are not ancestors.
713
713
714 # If one of the roots was nullid, everything is included anyway.
714 # If one of the roots was nullid, everything is included anyway.
715 if lowestrev > nullrev:
715 if lowestrev > nullrev:
716 # But, since we weren't, let's recompute the lowest rev to not
716 # But, since we weren't, let's recompute the lowest rev to not
717 # include roots that aren't ancestors.
717 # include roots that aren't ancestors.
718
718
719 # Filter out roots that aren't ancestors of heads
719 # Filter out roots that aren't ancestors of heads
720 roots = [n for n in roots if n in ancestors]
720 roots = [n for n in roots if n in ancestors]
721 # Recompute the lowest revision
721 # Recompute the lowest revision
722 if roots:
722 if roots:
723 lowestrev = min([self.rev(n) for n in roots])
723 lowestrev = min([self.rev(n) for n in roots])
724 else:
724 else:
725 # No more roots? Return empty list
725 # No more roots? Return empty list
726 return nonodes
726 return nonodes
727 else:
727 else:
728 # We are descending from nullid, and don't need to care about
728 # We are descending from nullid, and don't need to care about
729 # any other roots.
729 # any other roots.
730 lowestrev = nullrev
730 lowestrev = nullrev
731 roots = [nullid]
731 roots = [nullid]
732 # Transform our roots list into a set.
732 # Transform our roots list into a set.
733 descendents = set(roots)
733 descendents = set(roots)
734 # Also, keep the original roots so we can filter out roots that aren't
734 # Also, keep the original roots so we can filter out roots that aren't
735 # 'real' roots (i.e. are descended from other roots).
735 # 'real' roots (i.e. are descended from other roots).
736 roots = descendents.copy()
736 roots = descendents.copy()
737 # Our topologically sorted list of output nodes.
737 # Our topologically sorted list of output nodes.
738 orderedout = []
738 orderedout = []
739 # Don't start at nullid since we don't want nullid in our output list,
739 # Don't start at nullid since we don't want nullid in our output list,
740 # and if nullid shows up in descedents, empty parents will look like
740 # and if nullid shows up in descedents, empty parents will look like
741 # they're descendents.
741 # they're descendents.
742 for r in xrange(max(lowestrev, 0), highestrev + 1):
742 for r in xrange(max(lowestrev, 0), highestrev + 1):
743 n = self.node(r)
743 n = self.node(r)
744 isdescendent = False
744 isdescendent = False
745 if lowestrev == nullrev: # Everybody is a descendent of nullid
745 if lowestrev == nullrev: # Everybody is a descendent of nullid
746 isdescendent = True
746 isdescendent = True
747 elif n in descendents:
747 elif n in descendents:
748 # n is already a descendent
748 # n is already a descendent
749 isdescendent = True
749 isdescendent = True
750 # This check only needs to be done here because all the roots
750 # This check only needs to be done here because all the roots
751 # will start being marked is descendents before the loop.
751 # will start being marked is descendents before the loop.
752 if n in roots:
752 if n in roots:
753 # If n was a root, check if it's a 'real' root.
753 # If n was a root, check if it's a 'real' root.
754 p = tuple(self.parents(n))
754 p = tuple(self.parents(n))
755 # If any of its parents are descendents, it's not a root.
755 # If any of its parents are descendents, it's not a root.
756 if (p[0] in descendents) or (p[1] in descendents):
756 if (p[0] in descendents) or (p[1] in descendents):
757 roots.remove(n)
757 roots.remove(n)
758 else:
758 else:
759 p = tuple(self.parents(n))
759 p = tuple(self.parents(n))
760 # A node is a descendent if either of its parents are
760 # A node is a descendent if either of its parents are
761 # descendents. (We seeded the dependents list with the roots
761 # descendents. (We seeded the dependents list with the roots
762 # up there, remember?)
762 # up there, remember?)
763 if (p[0] in descendents) or (p[1] in descendents):
763 if (p[0] in descendents) or (p[1] in descendents):
764 descendents.add(n)
764 descendents.add(n)
765 isdescendent = True
765 isdescendent = True
766 if isdescendent and ((ancestors is None) or (n in ancestors)):
766 if isdescendent and ((ancestors is None) or (n in ancestors)):
767 # Only include nodes that are both descendents and ancestors.
767 # Only include nodes that are both descendents and ancestors.
768 orderedout.append(n)
768 orderedout.append(n)
769 if (ancestors is not None) and (n in heads):
769 if (ancestors is not None) and (n in heads):
770 # We're trying to figure out which heads are reachable
770 # We're trying to figure out which heads are reachable
771 # from roots.
771 # from roots.
772 # Mark this head as having been reached
772 # Mark this head as having been reached
773 heads[n] = 1
773 heads[n] = 1
774 elif ancestors is None:
774 elif ancestors is None:
775 # Otherwise, we're trying to discover the heads.
775 # Otherwise, we're trying to discover the heads.
776 # Assume this is a head because if it isn't, the next step
776 # Assume this is a head because if it isn't, the next step
777 # will eventually remove it.
777 # will eventually remove it.
778 heads[n] = 1
778 heads[n] = 1
779 # But, obviously its parents aren't.
779 # But, obviously its parents aren't.
780 for p in self.parents(n):
780 for p in self.parents(n):
781 heads.pop(p, None)
781 heads.pop(p, None)
782 heads = [n for n in heads.iterkeys() if heads[n] != 0]
782 heads = [n for n in heads.iterkeys() if heads[n] != 0]
783 roots = list(roots)
783 roots = list(roots)
784 assert orderedout
784 assert orderedout
785 assert roots
785 assert roots
786 assert heads
786 assert heads
787 return (orderedout, roots, heads)
787 return (orderedout, roots, heads)
788
788
789 def heads(self, start=None, stop=None):
789 def heads(self, start=None, stop=None):
790 """return the list of all nodes that have no children
790 """return the list of all nodes that have no children
791
791
792 if start is specified, only heads that are descendants of
792 if start is specified, only heads that are descendants of
793 start will be returned
793 start will be returned
794 if stop is specified, it will consider all the revs from stop
794 if stop is specified, it will consider all the revs from stop
795 as if they had no children
795 as if they had no children
796 """
796 """
797 if start is None and stop is None:
797 if start is None and stop is None:
798 count = len(self)
798 count = len(self)
799 if not count:
799 if not count:
800 return [nullid]
800 return [nullid]
801 ishead = [1] * (count + 1)
801 ishead = [1] * (count + 1)
802 index = self.index
802 index = self.index
803 for r in xrange(count):
803 for r in xrange(count):
804 e = index[r]
804 e = index[r]
805 ishead[e[5]] = ishead[e[6]] = 0
805 ishead[e[5]] = ishead[e[6]] = 0
806 return [self.node(r) for r in xrange(count) if ishead[r]]
806 return [self.node(r) for r in xrange(count) if ishead[r]]
807
807
808 if start is None:
808 if start is None:
809 start = nullid
809 start = nullid
810 if stop is None:
810 if stop is None:
811 stop = []
811 stop = []
812 stoprevs = set([self.rev(n) for n in stop])
812 stoprevs = set([self.rev(n) for n in stop])
813 startrev = self.rev(start)
813 startrev = self.rev(start)
814 reachable = {startrev: 1}
814 reachable = {startrev: 1}
815 heads = {startrev: 1}
815 heads = {startrev: 1}
816
816
817 parentrevs = self.parentrevs
817 parentrevs = self.parentrevs
818 for r in xrange(startrev + 1, len(self)):
818 for r in xrange(startrev + 1, len(self)):
819 for p in parentrevs(r):
819 for p in parentrevs(r):
820 if p in reachable:
820 if p in reachable:
821 if r not in stoprevs:
821 if r not in stoprevs:
822 reachable[r] = 1
822 reachable[r] = 1
823 heads[r] = 1
823 heads[r] = 1
824 if p in heads and p not in stoprevs:
824 if p in heads and p not in stoprevs:
825 del heads[p]
825 del heads[p]
826
826
827 return [self.node(r) for r in heads]
827 return [self.node(r) for r in heads]
828
828
829 def children(self, node):
829 def children(self, node):
830 """find the children of a given node"""
830 """find the children of a given node"""
831 c = []
831 c = []
832 p = self.rev(node)
832 p = self.rev(node)
833 for r in range(p + 1, len(self)):
833 for r in range(p + 1, len(self)):
834 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
834 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
835 if prevs:
835 if prevs:
836 for pr in prevs:
836 for pr in prevs:
837 if pr == p:
837 if pr == p:
838 c.append(self.node(r))
838 c.append(self.node(r))
839 elif p == nullrev:
839 elif p == nullrev:
840 c.append(self.node(r))
840 c.append(self.node(r))
841 return c
841 return c
842
842
843 def _match(self, id):
843 def _match(self, id):
844 if isinstance(id, (long, int)):
844 if isinstance(id, (long, int)):
845 # rev
845 # rev
846 return self.node(id)
846 return self.node(id)
847 if len(id) == 20:
847 if len(id) == 20:
848 # possibly a binary node
848 # possibly a binary node
849 # odds of a binary node being all hex in ASCII are 1 in 10**25
849 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 try:
850 try:
851 node = id
851 node = id
852 self.rev(node) # quick search the index
852 self.rev(node) # quick search the index
853 return node
853 return node
854 except LookupError:
854 except LookupError:
855 pass # may be partial hex id
855 pass # may be partial hex id
856 try:
856 try:
857 # str(rev)
857 # str(rev)
858 rev = int(id)
858 rev = int(id)
859 if str(rev) != id:
859 if str(rev) != id:
860 raise ValueError
860 raise ValueError
861 if rev < 0:
861 if rev < 0:
862 rev = len(self) + rev
862 rev = len(self) + rev
863 if rev < 0 or rev >= len(self):
863 if rev < 0 or rev >= len(self):
864 raise ValueError
864 raise ValueError
865 return self.node(rev)
865 return self.node(rev)
866 except (ValueError, OverflowError):
866 except (ValueError, OverflowError):
867 pass
867 pass
868 if len(id) == 40:
868 if len(id) == 40:
869 try:
869 try:
870 # a full hex nodeid?
870 # a full hex nodeid?
871 node = bin(id)
871 node = bin(id)
872 self.rev(node)
872 self.rev(node)
873 return node
873 return node
874 except (TypeError, LookupError):
874 except (TypeError, LookupError):
875 pass
875 pass
876
876
877 def _partialmatch(self, id):
877 def _partialmatch(self, id):
878 if len(id) < 40:
878 if len(id) < 40:
879 try:
879 try:
880 # hex(node)[:...]
880 # hex(node)[:...]
881 l = len(id) / 2 # grab an even number of digits
881 l = len(id) / 2 # grab an even number of digits
882 bin_id = bin(id[:l*2])
882 bin_id = bin(id[:l*2])
883 nl = [n for n in self.nodemap if n[:l] == bin_id]
883 nl = [n for n in self.nodemap if n[:l] == bin_id]
884 nl = [n for n in nl if hex(n).startswith(id)]
884 nl = [n for n in nl if hex(n).startswith(id)]
885 if len(nl) > 0:
885 if len(nl) > 0:
886 if len(nl) == 1:
886 if len(nl) == 1:
887 return nl[0]
887 return nl[0]
888 raise LookupError(id, self.indexfile,
888 raise LookupError(id, self.indexfile,
889 _('ambiguous identifier'))
889 _('ambiguous identifier'))
890 return None
890 return None
891 except TypeError:
891 except TypeError:
892 pass
892 pass
893
893
894 def lookup(self, id):
894 def lookup(self, id):
895 """locate a node based on:
895 """locate a node based on:
896 - revision number or str(revision number)
896 - revision number or str(revision number)
897 - nodeid or subset of hex nodeid
897 - nodeid or subset of hex nodeid
898 """
898 """
899 n = self._match(id)
899 n = self._match(id)
900 if n is not None:
900 if n is not None:
901 return n
901 return n
902 n = self._partialmatch(id)
902 n = self._partialmatch(id)
903 if n:
903 if n:
904 return n
904 return n
905
905
906 raise LookupError(id, self.indexfile, _('no match found'))
906 raise LookupError(id, self.indexfile, _('no match found'))
907
907
908 def cmp(self, node, text):
908 def cmp(self, node, text):
909 """compare text with a given file revision"""
909 """compare text with a given file revision"""
910 p1, p2 = self.parents(node)
910 p1, p2 = self.parents(node)
911 return hash(text, p1, p2) != node
911 return hash(text, p1, p2) != node
912
912
913 def chunk(self, rev, df=None):
913 def chunk(self, rev, df=None):
914 def loadcache(df):
914 def loadcache(df):
915 if not df:
915 if not df:
916 if self._inline:
916 if self._inline:
917 df = self.opener(self.indexfile)
917 df = self.opener(self.indexfile)
918 else:
918 else:
919 df = self.opener(self.datafile)
919 df = self.opener(self.datafile)
920 df.seek(start)
920 df.seek(start)
921 self._chunkcache = (start, df.read(cache_length))
921 self._chunkcache = (start, df.read(cache_length))
922
922
923 start, length = self.start(rev), self.length(rev)
923 start, length = self.start(rev), self.length(rev)
924 if self._inline:
924 if self._inline:
925 start += (rev + 1) * self._io.size
925 start += (rev + 1) * self._io.size
926 end = start + length
926 end = start + length
927
927
928 offset = 0
928 offset = 0
929 if not self._chunkcache:
929 if not self._chunkcache:
930 cache_length = max(65536, length)
930 cache_length = max(65536, length)
931 loadcache(df)
931 loadcache(df)
932 else:
932 else:
933 cache_start = self._chunkcache[0]
933 cache_start = self._chunkcache[0]
934 cache_length = len(self._chunkcache[1])
934 cache_length = len(self._chunkcache[1])
935 cache_end = cache_start + cache_length
935 cache_end = cache_start + cache_length
936 if start >= cache_start and end <= cache_end:
936 if start >= cache_start and end <= cache_end:
937 # it is cached
937 # it is cached
938 offset = start - cache_start
938 offset = start - cache_start
939 else:
939 else:
940 cache_length = max(65536, length)
940 cache_length = max(65536, length)
941 loadcache(df)
941 loadcache(df)
942
942
943 # avoid copying large chunks
943 # avoid copying large chunks
944 c = self._chunkcache[1]
944 c = self._chunkcache[1]
945 if cache_length != length:
945 if cache_length != length:
946 c = c[offset:offset + length]
946 c = c[offset:offset + length]
947
947
948 return decompress(c)
948 return decompress(c)
949
949
950 def revdiff(self, rev1, rev2):
950 def revdiff(self, rev1, rev2):
951 """return or calculate a delta between two revisions"""
951 """return or calculate a delta between two revisions"""
952 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
952 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
953 return self.chunk(rev2)
953 return self.chunk(rev2)
954
954
955 return mdiff.textdiff(self.revision(self.node(rev1)),
955 return mdiff.textdiff(self.revision(self.node(rev1)),
956 self.revision(self.node(rev2)))
956 self.revision(self.node(rev2)))
957
957
958 def revision(self, node):
958 def revision(self, node):
959 """return an uncompressed revision of a given node"""
959 """return an uncompressed revision of a given node"""
960 if node == nullid:
960 if node == nullid:
961 return ""
961 return ""
962 if self._cache and self._cache[0] == node:
962 if self._cache and self._cache[0] == node:
963 return str(self._cache[2])
963 return str(self._cache[2])
964
964
965 # look up what we need to read
965 # look up what we need to read
966 text = None
966 text = None
967 rev = self.rev(node)
967 rev = self.rev(node)
968 base = self.base(rev)
968 base = self.base(rev)
969
969
970 # check rev flags
970 # check rev flags
971 if self.index[rev][0] & 0xFFFF:
971 if self.index[rev][0] & 0xFFFF:
972 raise RevlogError(_('incompatible revision flag %x') %
972 raise RevlogError(_('incompatible revision flag %x') %
973 (self.index[rev][0] & 0xFFFF))
973 (self.index[rev][0] & 0xFFFF))
974
974
975 df = None
975 df = None
976
976
977 # do we have useful data cached?
977 # do we have useful data cached?
978 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
978 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
979 base = self._cache[1]
979 base = self._cache[1]
980 text = str(self._cache[2])
980 text = str(self._cache[2])
981 self._loadindex(base, rev + 1)
981 self._loadindex(base, rev + 1)
982 if not self._inline and rev > base + 1:
982 if not self._inline and rev > base + 1:
983 df = self.opener(self.datafile)
983 df = self.opener(self.datafile)
984 else:
984 else:
985 self._loadindex(base, rev + 1)
985 self._loadindex(base, rev + 1)
986 if not self._inline and rev > base:
986 if not self._inline and rev > base:
987 df = self.opener(self.datafile)
987 df = self.opener(self.datafile)
988 text = self.chunk(base, df=df)
988 text = self.chunk(base, df=df)
989
989
990 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
990 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
991 text = mdiff.patches(text, bins)
991 text = mdiff.patches(text, bins)
992 p1, p2 = self.parents(node)
992 p1, p2 = self.parents(node)
993 if node != hash(text, p1, p2):
993 if node != hash(text, p1, p2):
994 raise RevlogError(_("integrity check failed on %s:%d")
994 raise RevlogError(_("integrity check failed on %s:%d")
995 % (self.datafile, rev))
995 % (self.datafile, rev))
996
996
997 self._cache = (node, rev, text)
997 self._cache = (node, rev, text)
998 return text
998 return text
999
999
1000 def checkinlinesize(self, tr, fp=None):
1000 def checkinlinesize(self, tr, fp=None):
1001 if not self._inline:
1001 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1002 return
1002 return
1003 if not fp:
1003
1004 fp = self.opener(self.indexfile, 'r')
1005 fp.seek(0, 2)
1006 size = fp.tell()
1007 if size < 131072:
1008 return
1009 trinfo = tr.find(self.indexfile)
1004 trinfo = tr.find(self.indexfile)
1010 if trinfo == None:
1005 if trinfo == None:
1011 raise RevlogError(_("%s not found in the transaction")
1006 raise RevlogError(_("%s not found in the transaction")
1012 % self.indexfile)
1007 % self.indexfile)
1013
1008
1014 trindex = trinfo[2]
1009 trindex = trinfo[2]
1015 dataoff = self.start(trindex)
1010 dataoff = self.start(trindex)
1016
1011
1017 tr.add(self.datafile, dataoff)
1012 tr.add(self.datafile, dataoff)
1013
1014 if not fp:
1015 fp = self.opener(self.indexfile, 'r')
1016
1018 df = self.opener(self.datafile, 'w')
1017 df = self.opener(self.datafile, 'w')
1019 try:
1018 try:
1020 calc = self._io.size
1019 calc = self._io.size
1021 for r in self:
1020 for r in self:
1022 start = self.start(r) + (r + 1) * calc
1021 start = self.start(r) + (r + 1) * calc
1023 length = self.length(r)
1022 length = self.length(r)
1024 fp.seek(start)
1023 fp.seek(start)
1025 d = fp.read(length)
1024 d = fp.read(length)
1026 df.write(d)
1025 df.write(d)
1027 finally:
1026 finally:
1028 df.close()
1027 df.close()
1029
1028
1030 fp.close()
1029 fp.close()
1031 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1030 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1032 self.version &= ~(REVLOGNGINLINEDATA)
1031 self.version &= ~(REVLOGNGINLINEDATA)
1033 self._inline = False
1032 self._inline = False
1034 for i in self:
1033 for i in self:
1035 e = self._io.packentry(self.index[i], self.node, self.version, i)
1034 e = self._io.packentry(self.index[i], self.node, self.version, i)
1036 fp.write(e)
1035 fp.write(e)
1037
1036
1038 # if we don't call rename, the temp file will never replace the
1037 # if we don't call rename, the temp file will never replace the
1039 # real index
1038 # real index
1040 fp.rename()
1039 fp.rename()
1041
1040
1042 tr.replace(self.indexfile, trindex * calc)
1041 tr.replace(self.indexfile, trindex * calc)
1043 self._chunkcache = None
1042 self._chunkcache = None
1044
1043
1045 def addrevision(self, text, transaction, link, p1, p2, d=None):
1044 def addrevision(self, text, transaction, link, p1, p2, d=None):
1046 """add a revision to the log
1045 """add a revision to the log
1047
1046
1048 text - the revision data to add
1047 text - the revision data to add
1049 transaction - the transaction object used for rollback
1048 transaction - the transaction object used for rollback
1050 link - the linkrev data to add
1049 link - the linkrev data to add
1051 p1, p2 - the parent nodeids of the revision
1050 p1, p2 - the parent nodeids of the revision
1052 d - an optional precomputed delta
1051 d - an optional precomputed delta
1053 """
1052 """
1054 dfh = None
1053 dfh = None
1055 if not self._inline:
1054 if not self._inline:
1056 dfh = self.opener(self.datafile, "a")
1055 dfh = self.opener(self.datafile, "a")
1057 ifh = self.opener(self.indexfile, "a+")
1056 ifh = self.opener(self.indexfile, "a+")
1058 try:
1057 try:
1059 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1058 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1060 finally:
1059 finally:
1061 if dfh:
1060 if dfh:
1062 dfh.close()
1061 dfh.close()
1063 ifh.close()
1062 ifh.close()
1064
1063
1065 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1064 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1066 node = hash(text, p1, p2)
1065 node = hash(text, p1, p2)
1067 if node in self.nodemap:
1066 if node in self.nodemap:
1068 return node
1067 return node
1069
1068
1070 curr = len(self)
1069 curr = len(self)
1071 prev = curr - 1
1070 prev = curr - 1
1072 base = self.base(prev)
1071 base = self.base(prev)
1073 offset = self.end(prev)
1072 offset = self.end(prev)
1074
1073
1075 if curr:
1074 if curr:
1076 if not d:
1075 if not d:
1077 ptext = self.revision(self.node(prev))
1076 ptext = self.revision(self.node(prev))
1078 d = mdiff.textdiff(ptext, text)
1077 d = mdiff.textdiff(ptext, text)
1079 data = compress(d)
1078 data = compress(d)
1080 l = len(data[1]) + len(data[0])
1079 l = len(data[1]) + len(data[0])
1081 dist = l + offset - self.start(base)
1080 dist = l + offset - self.start(base)
1082
1081
1083 # full versions are inserted when the needed deltas
1082 # full versions are inserted when the needed deltas
1084 # become comparable to the uncompressed text
1083 # become comparable to the uncompressed text
1085 if not curr or dist > len(text) * 2:
1084 if not curr or dist > len(text) * 2:
1086 data = compress(text)
1085 data = compress(text)
1087 l = len(data[1]) + len(data[0])
1086 l = len(data[1]) + len(data[0])
1088 base = curr
1087 base = curr
1089
1088
1090 e = (offset_type(offset, 0), l, len(text),
1089 e = (offset_type(offset, 0), l, len(text),
1091 base, link, self.rev(p1), self.rev(p2), node)
1090 base, link, self.rev(p1), self.rev(p2), node)
1092 self.index.insert(-1, e)
1091 self.index.insert(-1, e)
1093 self.nodemap[node] = curr
1092 self.nodemap[node] = curr
1094
1093
1095 entry = self._io.packentry(e, self.node, self.version, curr)
1094 entry = self._io.packentry(e, self.node, self.version, curr)
1096 if not self._inline:
1095 if not self._inline:
1097 transaction.add(self.datafile, offset)
1096 transaction.add(self.datafile, offset)
1098 transaction.add(self.indexfile, curr * len(entry))
1097 transaction.add(self.indexfile, curr * len(entry))
1099 if data[0]:
1098 if data[0]:
1100 dfh.write(data[0])
1099 dfh.write(data[0])
1101 dfh.write(data[1])
1100 dfh.write(data[1])
1102 dfh.flush()
1101 dfh.flush()
1103 ifh.write(entry)
1102 ifh.write(entry)
1104 else:
1103 else:
1105 offset += curr * self._io.size
1104 offset += curr * self._io.size
1106 transaction.add(self.indexfile, offset, curr)
1105 transaction.add(self.indexfile, offset, curr)
1107 ifh.write(entry)
1106 ifh.write(entry)
1108 ifh.write(data[0])
1107 ifh.write(data[0])
1109 ifh.write(data[1])
1108 ifh.write(data[1])
1110 self.checkinlinesize(transaction, ifh)
1109 self.checkinlinesize(transaction, ifh)
1111
1110
1112 self._cache = (node, curr, text)
1111 self._cache = (node, curr, text)
1113 return node
1112 return node
1114
1113
1115 def ancestor(self, a, b):
1114 def ancestor(self, a, b):
1116 """calculate the least common ancestor of nodes a and b"""
1115 """calculate the least common ancestor of nodes a and b"""
1117
1116
1118 def parents(rev):
1117 def parents(rev):
1119 return [p for p in self.parentrevs(rev) if p != nullrev]
1118 return [p for p in self.parentrevs(rev) if p != nullrev]
1120
1119
1121 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1120 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1122 if c is None:
1121 if c is None:
1123 return nullid
1122 return nullid
1124
1123
1125 return self.node(c)
1124 return self.node(c)
1126
1125
1127 def group(self, nodelist, lookup, infocollect=None):
1126 def group(self, nodelist, lookup, infocollect=None):
1128 """calculate a delta group
1127 """calculate a delta group
1129
1128
1130 Given a list of changeset revs, return a set of deltas and
1129 Given a list of changeset revs, return a set of deltas and
1131 metadata corresponding to nodes. the first delta is
1130 metadata corresponding to nodes. the first delta is
1132 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1131 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1133 have this parent as it has all history before these
1132 have this parent as it has all history before these
1134 changesets. parent is parent[0]
1133 changesets. parent is parent[0]
1135 """
1134 """
1136 revs = [self.rev(n) for n in nodelist]
1135 revs = [self.rev(n) for n in nodelist]
1137
1136
1138 # if we don't have any revisions touched by these changesets, bail
1137 # if we don't have any revisions touched by these changesets, bail
1139 if not revs:
1138 if not revs:
1140 yield changegroup.closechunk()
1139 yield changegroup.closechunk()
1141 return
1140 return
1142
1141
1143 # add the parent of the first rev
1142 # add the parent of the first rev
1144 p = self.parents(self.node(revs[0]))[0]
1143 p = self.parents(self.node(revs[0]))[0]
1145 revs.insert(0, self.rev(p))
1144 revs.insert(0, self.rev(p))
1146
1145
1147 # build deltas
1146 # build deltas
1148 for d in xrange(0, len(revs) - 1):
1147 for d in xrange(0, len(revs) - 1):
1149 a, b = revs[d], revs[d + 1]
1148 a, b = revs[d], revs[d + 1]
1150 nb = self.node(b)
1149 nb = self.node(b)
1151
1150
1152 if infocollect is not None:
1151 if infocollect is not None:
1153 infocollect(nb)
1152 infocollect(nb)
1154
1153
1155 p = self.parents(nb)
1154 p = self.parents(nb)
1156 meta = nb + p[0] + p[1] + lookup(nb)
1155 meta = nb + p[0] + p[1] + lookup(nb)
1157 if a == -1:
1156 if a == -1:
1158 d = self.revision(nb)
1157 d = self.revision(nb)
1159 meta += mdiff.trivialdiffheader(len(d))
1158 meta += mdiff.trivialdiffheader(len(d))
1160 else:
1159 else:
1161 d = self.revdiff(a, b)
1160 d = self.revdiff(a, b)
1162 yield changegroup.chunkheader(len(meta) + len(d))
1161 yield changegroup.chunkheader(len(meta) + len(d))
1163 yield meta
1162 yield meta
1164 if len(d) > 2**20:
1163 if len(d) > 2**20:
1165 pos = 0
1164 pos = 0
1166 while pos < len(d):
1165 while pos < len(d):
1167 pos2 = pos + 2 ** 18
1166 pos2 = pos + 2 ** 18
1168 yield d[pos:pos2]
1167 yield d[pos:pos2]
1169 pos = pos2
1168 pos = pos2
1170 else:
1169 else:
1171 yield d
1170 yield d
1172
1171
1173 yield changegroup.closechunk()
1172 yield changegroup.closechunk()
1174
1173
1175 def addgroup(self, revs, linkmapper, transaction):
1174 def addgroup(self, revs, linkmapper, transaction):
1176 """
1175 """
1177 add a delta group
1176 add a delta group
1178
1177
1179 given a set of deltas, add them to the revision log. the
1178 given a set of deltas, add them to the revision log. the
1180 first delta is against its parent, which should be in our
1179 first delta is against its parent, which should be in our
1181 log, the rest are against the previous delta.
1180 log, the rest are against the previous delta.
1182 """
1181 """
1183
1182
1184 #track the base of the current delta log
1183 #track the base of the current delta log
1185 r = len(self)
1184 r = len(self)
1186 t = r - 1
1185 t = r - 1
1187 node = None
1186 node = None
1188
1187
1189 base = prev = nullrev
1188 base = prev = nullrev
1190 start = end = textlen = 0
1189 start = end = textlen = 0
1191 if r:
1190 if r:
1192 end = self.end(t)
1191 end = self.end(t)
1193
1192
1194 ifh = self.opener(self.indexfile, "a+")
1193 ifh = self.opener(self.indexfile, "a+")
1195 isize = r * self._io.size
1194 isize = r * self._io.size
1196 if self._inline:
1195 if self._inline:
1197 transaction.add(self.indexfile, end + isize, r)
1196 transaction.add(self.indexfile, end + isize, r)
1198 dfh = None
1197 dfh = None
1199 else:
1198 else:
1200 transaction.add(self.indexfile, isize, r)
1199 transaction.add(self.indexfile, isize, r)
1201 transaction.add(self.datafile, end)
1200 transaction.add(self.datafile, end)
1202 dfh = self.opener(self.datafile, "a")
1201 dfh = self.opener(self.datafile, "a")
1203
1202
1204 try:
1203 try:
1205 # loop through our set of deltas
1204 # loop through our set of deltas
1206 chain = None
1205 chain = None
1207 for chunk in revs:
1206 for chunk in revs:
1208 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1207 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1209 link = linkmapper(cs)
1208 link = linkmapper(cs)
1210 if node in self.nodemap:
1209 if node in self.nodemap:
1211 # this can happen if two branches make the same change
1210 # this can happen if two branches make the same change
1212 chain = node
1211 chain = node
1213 continue
1212 continue
1214 delta = buffer(chunk, 80)
1213 delta = buffer(chunk, 80)
1215 del chunk
1214 del chunk
1216
1215
1217 for p in (p1, p2):
1216 for p in (p1, p2):
1218 if not p in self.nodemap:
1217 if not p in self.nodemap:
1219 raise LookupError(p, self.indexfile, _('unknown parent'))
1218 raise LookupError(p, self.indexfile, _('unknown parent'))
1220
1219
1221 if not chain:
1220 if not chain:
1222 # retrieve the parent revision of the delta chain
1221 # retrieve the parent revision of the delta chain
1223 chain = p1
1222 chain = p1
1224 if not chain in self.nodemap:
1223 if not chain in self.nodemap:
1225 raise LookupError(chain, self.indexfile, _('unknown base'))
1224 raise LookupError(chain, self.indexfile, _('unknown base'))
1226
1225
1227 # full versions are inserted when the needed deltas become
1226 # full versions are inserted when the needed deltas become
1228 # comparable to the uncompressed text or when the previous
1227 # comparable to the uncompressed text or when the previous
1229 # version is not the one we have a delta against. We use
1228 # version is not the one we have a delta against. We use
1230 # the size of the previous full rev as a proxy for the
1229 # the size of the previous full rev as a proxy for the
1231 # current size.
1230 # current size.
1232
1231
1233 if chain == prev:
1232 if chain == prev:
1234 cdelta = compress(delta)
1233 cdelta = compress(delta)
1235 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1234 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1236 textlen = mdiff.patchedsize(textlen, delta)
1235 textlen = mdiff.patchedsize(textlen, delta)
1237
1236
1238 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1237 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1239 # flush our writes here so we can read it in revision
1238 # flush our writes here so we can read it in revision
1240 if dfh:
1239 if dfh:
1241 dfh.flush()
1240 dfh.flush()
1242 ifh.flush()
1241 ifh.flush()
1243 text = self.revision(chain)
1242 text = self.revision(chain)
1244 if len(text) == 0:
1243 if len(text) == 0:
1245 # skip over trivial delta header
1244 # skip over trivial delta header
1246 text = buffer(delta, 12)
1245 text = buffer(delta, 12)
1247 else:
1246 else:
1248 text = mdiff.patches(text, [delta])
1247 text = mdiff.patches(text, [delta])
1249 del delta
1248 del delta
1250 chk = self._addrevision(text, transaction, link, p1, p2, None,
1249 chk = self._addrevision(text, transaction, link, p1, p2, None,
1251 ifh, dfh)
1250 ifh, dfh)
1252 if not dfh and not self._inline:
1251 if not dfh and not self._inline:
1253 # addrevision switched from inline to conventional
1252 # addrevision switched from inline to conventional
1254 # reopen the index
1253 # reopen the index
1255 dfh = self.opener(self.datafile, "a")
1254 dfh = self.opener(self.datafile, "a")
1256 ifh = self.opener(self.indexfile, "a")
1255 ifh = self.opener(self.indexfile, "a")
1257 if chk != node:
1256 if chk != node:
1258 raise RevlogError(_("consistency error adding group"))
1257 raise RevlogError(_("consistency error adding group"))
1259 textlen = len(text)
1258 textlen = len(text)
1260 else:
1259 else:
1261 e = (offset_type(end, 0), cdeltalen, textlen, base,
1260 e = (offset_type(end, 0), cdeltalen, textlen, base,
1262 link, self.rev(p1), self.rev(p2), node)
1261 link, self.rev(p1), self.rev(p2), node)
1263 self.index.insert(-1, e)
1262 self.index.insert(-1, e)
1264 self.nodemap[node] = r
1263 self.nodemap[node] = r
1265 entry = self._io.packentry(e, self.node, self.version, r)
1264 entry = self._io.packentry(e, self.node, self.version, r)
1266 if self._inline:
1265 if self._inline:
1267 ifh.write(entry)
1266 ifh.write(entry)
1268 ifh.write(cdelta[0])
1267 ifh.write(cdelta[0])
1269 ifh.write(cdelta[1])
1268 ifh.write(cdelta[1])
1270 self.checkinlinesize(transaction, ifh)
1269 self.checkinlinesize(transaction, ifh)
1271 if not self._inline:
1270 if not self._inline:
1272 dfh = self.opener(self.datafile, "a")
1271 dfh = self.opener(self.datafile, "a")
1273 ifh = self.opener(self.indexfile, "a")
1272 ifh = self.opener(self.indexfile, "a")
1274 else:
1273 else:
1275 dfh.write(cdelta[0])
1274 dfh.write(cdelta[0])
1276 dfh.write(cdelta[1])
1275 dfh.write(cdelta[1])
1277 ifh.write(entry)
1276 ifh.write(entry)
1278
1277
1279 t, r, chain, prev = r, r + 1, node, node
1278 t, r, chain, prev = r, r + 1, node, node
1280 base = self.base(t)
1279 base = self.base(t)
1281 start = self.start(base)
1280 start = self.start(base)
1282 end = self.end(t)
1281 end = self.end(t)
1283 finally:
1282 finally:
1284 if dfh:
1283 if dfh:
1285 dfh.close()
1284 dfh.close()
1286 ifh.close()
1285 ifh.close()
1287
1286
1288 return node
1287 return node
1289
1288
1290 def strip(self, minlink, transaction):
1289 def strip(self, minlink, transaction):
1291 """truncate the revlog on the first revision with a linkrev >= minlink
1290 """truncate the revlog on the first revision with a linkrev >= minlink
1292
1291
1293 This function is called when we're stripping revision minlink and
1292 This function is called when we're stripping revision minlink and
1294 its descendants from the repository.
1293 its descendants from the repository.
1295
1294
1296 We have to remove all revisions with linkrev >= minlink, because
1295 We have to remove all revisions with linkrev >= minlink, because
1297 the equivalent changelog revisions will be renumbered after the
1296 the equivalent changelog revisions will be renumbered after the
1298 strip.
1297 strip.
1299
1298
1300 So we truncate the revlog on the first of these revisions, and
1299 So we truncate the revlog on the first of these revisions, and
1301 trust that the caller has saved the revisions that shouldn't be
1300 trust that the caller has saved the revisions that shouldn't be
1302 removed and that it'll readd them after this truncation.
1301 removed and that it'll readd them after this truncation.
1303 """
1302 """
1304 if len(self) == 0:
1303 if len(self) == 0:
1305 return
1304 return
1306
1305
1307 if isinstance(self.index, lazyindex):
1306 if isinstance(self.index, lazyindex):
1308 self._loadindexmap()
1307 self._loadindexmap()
1309
1308
1310 for rev in self:
1309 for rev in self:
1311 if self.index[rev][4] >= minlink:
1310 if self.index[rev][4] >= minlink:
1312 break
1311 break
1313 else:
1312 else:
1314 return
1313 return
1315
1314
1316 # first truncate the files on disk
1315 # first truncate the files on disk
1317 end = self.start(rev)
1316 end = self.start(rev)
1318 if not self._inline:
1317 if not self._inline:
1319 transaction.add(self.datafile, end)
1318 transaction.add(self.datafile, end)
1320 end = rev * self._io.size
1319 end = rev * self._io.size
1321 else:
1320 else:
1322 end += rev * self._io.size
1321 end += rev * self._io.size
1323
1322
1324 transaction.add(self.indexfile, end)
1323 transaction.add(self.indexfile, end)
1325
1324
1326 # then reset internal state in memory to forget those revisions
1325 # then reset internal state in memory to forget those revisions
1327 self._cache = None
1326 self._cache = None
1328 self._chunkcache = None
1327 self._chunkcache = None
1329 for x in xrange(rev, len(self)):
1328 for x in xrange(rev, len(self)):
1330 del self.nodemap[self.node(x)]
1329 del self.nodemap[self.node(x)]
1331
1330
1332 del self.index[rev:-1]
1331 del self.index[rev:-1]
1333
1332
1334 def checksize(self):
1333 def checksize(self):
1335 expected = 0
1334 expected = 0
1336 if len(self):
1335 if len(self):
1337 expected = max(0, self.end(len(self) - 1))
1336 expected = max(0, self.end(len(self) - 1))
1338
1337
1339 try:
1338 try:
1340 f = self.opener(self.datafile)
1339 f = self.opener(self.datafile)
1341 f.seek(0, 2)
1340 f.seek(0, 2)
1342 actual = f.tell()
1341 actual = f.tell()
1343 dd = actual - expected
1342 dd = actual - expected
1344 except IOError, inst:
1343 except IOError, inst:
1345 if inst.errno != errno.ENOENT:
1344 if inst.errno != errno.ENOENT:
1346 raise
1345 raise
1347 dd = 0
1346 dd = 0
1348
1347
1349 try:
1348 try:
1350 f = self.opener(self.indexfile)
1349 f = self.opener(self.indexfile)
1351 f.seek(0, 2)
1350 f.seek(0, 2)
1352 actual = f.tell()
1351 actual = f.tell()
1353 s = self._io.size
1352 s = self._io.size
1354 i = max(0, actual / s)
1353 i = max(0, actual / s)
1355 di = actual - (i * s)
1354 di = actual - (i * s)
1356 if self._inline:
1355 if self._inline:
1357 databytes = 0
1356 databytes = 0
1358 for r in self:
1357 for r in self:
1359 databytes += max(0, self.length(r))
1358 databytes += max(0, self.length(r))
1360 dd = 0
1359 dd = 0
1361 di = actual - len(self) * s - databytes
1360 di = actual - len(self) * s - databytes
1362 except IOError, inst:
1361 except IOError, inst:
1363 if inst.errno != errno.ENOENT:
1362 if inst.errno != errno.ENOENT:
1364 raise
1363 raise
1365 di = 0
1364 di = 0
1366
1365
1367 return (dd, di)
1366 return (dd, di)
1368
1367
1369 def files(self):
1368 def files(self):
1370 res = [ self.indexfile ]
1369 res = [ self.indexfile ]
1371 if not self._inline:
1370 if not self._inline:
1372 res.append(self.datafile)
1371 res.append(self.datafile)
1373 return res
1372 return res
General Comments 0
You need to be logged in to leave comments. Login now