##// END OF EJS Templates
revlog: make triple-quoted string a real comment
Martin Geisler -
r8658:782a85ee default
parent child Browse files
Show More
@@ -1,1376 +1,1376 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf):
122 def __init__(self, dataf):
123 try:
123 try:
124 size = util.fstat(dataf).st_size
124 size = util.fstat(dataf).st_size
125 except AttributeError:
125 except AttributeError:
126 size = 0
126 size = 0
127 self.dataf = dataf
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
129 self.datasize = size
130 self.l = size/self.s
130 self.l = size/self.s
131 self.index = [None] * self.l
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
132 self.map = {nullid: nullrev}
133 self.allmap = 0
133 self.allmap = 0
134 self.all = 0
134 self.all = 0
135 self.mapfind_count = 0
135 self.mapfind_count = 0
136
136
137 def loadmap(self):
137 def loadmap(self):
138 """
138 """
139 during a commit, we need to make sure the rev being added is
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
142 which takes much less time.
143 """
143 """
144 if self.allmap:
144 if self.allmap:
145 return
145 return
146 end = self.datasize
146 end = self.datasize
147 self.allmap = 1
147 self.allmap = 1
148 cur = 0
148 cur = 0
149 count = 0
149 count = 0
150 blocksize = self.s * 256
150 blocksize = self.s * 256
151 self.dataf.seek(0)
151 self.dataf.seek(0)
152 while cur < end:
152 while cur < end:
153 data = self.dataf.read(blocksize)
153 data = self.dataf.read(blocksize)
154 off = 0
154 off = 0
155 for x in xrange(256):
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
157 self.map[n] = count
158 count += 1
158 count += 1
159 if count >= self.l:
159 if count >= self.l:
160 break
160 break
161 off += self.s
161 off += self.s
162 cur += blocksize
162 cur += blocksize
163
163
164 def loadblock(self, blockstart, blocksize, data=None):
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
165 if self.all:
166 return
166 return
167 if data is None:
167 if data is None:
168 self.dataf.seek(blockstart)
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
175 lend = len(data) / self.s
176 i = blockstart / self.s
176 i = blockstart / self.s
177 off = 0
177 off = 0
178 # lazyindex supports __delitem__
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
180 lend = len(self.index) - i
181 for x in xrange(lend):
181 for x in xrange(lend):
182 if self.index[i + x] is None:
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
183 b = data[off : off + self.s]
184 self.index[i + x] = b
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
186 self.map[n] = i + x
187 off += self.s
187 off += self.s
188
188
189 def findnode(self, node):
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
191 if self.allmap:
192 return None
192 return None
193
193
194 # hg log will cause many many searches for the manifest
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
195 # nodes. After we get called a few times, just load the whole
196 # thing.
196 # thing.
197 if self.mapfind_count > 8:
197 if self.mapfind_count > 8:
198 self.loadmap()
198 self.loadmap()
199 if node in self.map:
199 if node in self.map:
200 return node
200 return node
201 return None
201 return None
202 self.mapfind_count += 1
202 self.mapfind_count += 1
203 last = self.l - 1
203 last = self.l - 1
204 while self.index[last] != None:
204 while self.index[last] != None:
205 if last == 0:
205 if last == 0:
206 self.all = 1
206 self.all = 1
207 self.allmap = 1
207 self.allmap = 1
208 return None
208 return None
209 last -= 1
209 last -= 1
210 end = (last + 1) * self.s
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
211 blocksize = self.s * 256
212 while end >= 0:
212 while end >= 0:
213 start = max(end - blocksize, 0)
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
215 data = self.dataf.read(end - start)
216 findend = end - start
216 findend = end - start
217 while True:
217 while True:
218 # we're searching backwards, so we have to make sure
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
220 off = data.find(node, 0, findend)
221 findend = off
221 findend = off
222 if off >= 0:
222 if off >= 0:
223 i = off / self.s
223 i = off / self.s
224 off = i * self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
226 if n == node:
227 self.map[n] = i + start / self.s
227 self.map[n] = i + start / self.s
228 return node
228 return node
229 else:
229 else:
230 break
230 break
231 end -= blocksize
231 end -= blocksize
232 return None
232 return None
233
233
234 def loadindex(self, i=None, end=None):
234 def loadindex(self, i=None, end=None):
235 if self.all:
235 if self.all:
236 return
236 return
237 all = False
237 all = False
238 if i is None:
238 if i is None:
239 blockstart = 0
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
241 end = self.datasize
242 all = True
242 all = True
243 else:
243 else:
244 if end:
244 if end:
245 blockstart = i * self.s
245 blockstart = i * self.s
246 end = end * self.s
246 end = end * self.s
247 blocksize = end - blockstart
247 blocksize = end - blockstart
248 else:
248 else:
249 blockstart = (i & ~1023) * self.s
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
251 end = blockstart + blocksize
252 while blockstart < end:
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
254 blockstart += blocksize
255 if all:
255 if all:
256 self.all = True
256 self.all = True
257
257
258 class lazyindex(object):
258 class lazyindex(object):
259 """a lazy version of the index array"""
259 """a lazy version of the index array"""
260 def __init__(self, parser):
260 def __init__(self, parser):
261 self.p = parser
261 self.p = parser
262 def __len__(self):
262 def __len__(self):
263 return len(self.p.index)
263 return len(self.p.index)
264 def load(self, pos):
264 def load(self, pos):
265 if pos < 0:
265 if pos < 0:
266 pos += len(self.p.index)
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
268 return self.p.index[pos]
269 def __getitem__(self, pos):
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
273 def __delitem__(self, pos):
274 del self.p.index[pos]
274 del self.p.index[pos]
275 def insert(self, pos, e):
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
278 self.p.index.append(_pack(indexformatng, *e))
279
279
280 class lazymap(object):
280 class lazymap(object):
281 """a lazy version of the node map"""
281 """a lazy version of the node map"""
282 def __init__(self, parser):
282 def __init__(self, parser):
283 self.p = parser
283 self.p = parser
284 def load(self, key):
284 def load(self, key):
285 n = self.p.findnode(key)
285 n = self.p.findnode(key)
286 if n is None:
286 if n is None:
287 raise KeyError(key)
287 raise KeyError(key)
288 def __contains__(self, key):
288 def __contains__(self, key):
289 if key in self.p.map:
289 if key in self.p.map:
290 return True
290 return True
291 self.p.loadmap()
291 self.p.loadmap()
292 return key in self.p.map
292 return key in self.p.map
293 def __iter__(self):
293 def __iter__(self):
294 yield nullid
294 yield nullid
295 for i in xrange(self.p.l):
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
296 ret = self.p.index[i]
297 if not ret:
297 if not ret:
298 self.p.loadindex(i)
298 self.p.loadindex(i)
299 ret = self.p.index[i]
299 ret = self.p.index[i]
300 if isinstance(ret, str):
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
302 yield ret[7]
303 def __getitem__(self, key):
303 def __getitem__(self, key):
304 try:
304 try:
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 try:
307 try:
308 self.load(key)
308 self.load(key)
309 return self.p.map[key]
309 return self.p.map[key]
310 except KeyError:
310 except KeyError:
311 raise KeyError("node " + hex(key))
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
313 self.p.map[key] = val
314 def __delitem__(self, key):
314 def __delitem__(self, key):
315 del self.p.map[key]
315 del self.p.map[key]
316
316
317 indexformatv0 = ">4l20s20s20s"
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
318 v0shaoffset = 56
319
319
320 class revlogoldio(object):
320 class revlogoldio(object):
321 def __init__(self):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
322 self.size = struct.calcsize(indexformatv0)
323
323
324 def parseindex(self, fp, data, inline):
324 def parseindex(self, fp, data, inline):
325 s = self.size
325 s = self.size
326 index = []
326 index = []
327 nodemap = {nullid: nullrev}
327 nodemap = {nullid: nullrev}
328 n = off = 0
328 n = off = 0
329 if len(data) == _prereadsize:
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
330 data += fp.read() # read the rest
331 l = len(data)
331 l = len(data)
332 while off + s <= l:
332 while off + s <= l:
333 cur = data[off:off + s]
333 cur = data[off:off + s]
334 off += s
334 off += s
335 e = _unpack(indexformatv0, cur)
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
339 index.append(e2)
340 nodemap[e[6]] = n
340 nodemap[e[6]] = n
341 n += 1
341 n += 1
342
342
343 return index, nodemap, None
343 return index, nodemap, None
344
344
345 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
347 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
348 return _pack(indexformatv0, *e2)
349
349
350 # index ng:
350 # index ng:
351 # 6 bytes offset
351 # 6 bytes offset
352 # 2 bytes flags
352 # 2 bytes flags
353 # 4 bytes compressed length
353 # 4 bytes compressed length
354 # 4 bytes uncompressed length
354 # 4 bytes uncompressed length
355 # 4 bytes: base rev
355 # 4 bytes: base rev
356 # 4 bytes link rev
356 # 4 bytes link rev
357 # 4 bytes parent 1 rev
357 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
358 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
359 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
360 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
361 ngshaoffset = 32
362 versionformat = ">I"
362 versionformat = ">I"
363
363
364 class revlogio(object):
364 class revlogio(object):
365 def __init__(self):
365 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
366 self.size = struct.calcsize(indexformatng)
367
367
368 def parseindex(self, fp, data, inline):
368 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
369 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
370 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
371 # big index, let's parse it on demand
372 parser = lazyparser(fp)
372 parser = lazyparser(fp)
373 index = lazyindex(parser)
373 index = lazyindex(parser)
374 nodemap = lazymap(parser)
374 nodemap = lazymap(parser)
375 e = list(index[0])
375 e = list(index[0])
376 type = gettype(e[0])
376 type = gettype(e[0])
377 e[0] = offset_type(0, type)
377 e[0] = offset_type(0, type)
378 index[0] = e
378 index[0] = e
379 return index, nodemap, None
379 return index, nodemap, None
380 else:
380 else:
381 data += fp.read()
381 data += fp.read()
382
382
383 # call the C implementation to parse the index data
383 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
384 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
385 return index, nodemap, cache
386
386
387 def packentry(self, entry, node, version, rev):
387 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
388 p = _pack(indexformatng, *entry)
389 if rev == 0:
389 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
390 p = _pack(versionformat, version) + p[4:]
391 return p
391 return p
392
392
393 class revlog(object):
393 class revlog(object):
394 """
394 """
395 the underlying revision storage object
395 the underlying revision storage object
396
396
397 A revlog consists of two parts, an index and the revision data.
397 A revlog consists of two parts, an index and the revision data.
398
398
399 The index is a file with a fixed record size containing
399 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
400 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
401 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
402 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
403 contains a linkrev entry that can serve as a pointer to external
404 data.
404 data.
405
405
406 The revision data itself is a linear collection of data chunks.
406 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
407 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
408 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
409 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
410 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
411 its size, or O(1) relative to the number of revisions.
412
412
413 Both pieces of the revlog are written to in an append-only
413 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
414 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
415 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
416 for locking while reading.
417 """
417 """
418 def __init__(self, opener, indexfile):
418 def __init__(self, opener, indexfile):
419 """
419 """
420 create a revlog object
420 create a revlog object
421
421
422 opener is a function that abstracts the file opening operation
422 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
423 and can be used to implement COW semantics or the like.
424 """
424 """
425 self.indexfile = indexfile
425 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
426 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
427 self.opener = opener
428 self._cache = None
428 self._cache = None
429 self._chunkcache = (0, '')
429 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
430 self.nodemap = {nullid: nullrev}
431 self.index = []
431 self.index = []
432
432
433 v = REVLOG_DEFAULT_VERSION
433 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, "defversion"):
434 if hasattr(opener, "defversion"):
435 v = opener.defversion
435 v = opener.defversion
436 if v & REVLOGNG:
436 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
437 v |= REVLOGNGINLINEDATA
438
438
439 i = ''
439 i = ''
440 try:
440 try:
441 f = self.opener(self.indexfile)
441 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
442 i = f.read(_prereadsize)
443 if len(i) > 0:
443 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
444 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
445 except IOError, inst:
446 if inst.errno != errno.ENOENT:
446 if inst.errno != errno.ENOENT:
447 raise
447 raise
448
448
449 self.version = v
449 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
450 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
451 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
452 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
453 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
455 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
458 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
459 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
460 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
461 % (self.indexfile, fmt))
462
462
463 self._io = revlogio()
463 self._io = revlogio()
464 if self.version == REVLOGV0:
464 if self.version == REVLOGV0:
465 self._io = revlogoldio()
465 self._io = revlogoldio()
466 if i:
466 if i:
467 try:
467 try:
468 d = self._io.parseindex(f, i, self._inline)
468 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError), e:
469 except (ValueError, IndexError), e:
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
471 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
472 if not self._chunkcache:
473 self._chunkclear()
473 self._chunkclear()
474
474
475 # add the magic null revision at -1 (if it hasn't been done already)
475 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
476 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
477 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
479
480 def _loadindex(self, start, end):
480 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
481 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
482 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
483 self.index.p.loadindex(start, end)
484
484
485 def _loadindexmap(self):
485 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
486 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
487 if isinstance(self.index, lazyindex):
488 p = self.index.p
488 p = self.index.p
489 p.loadindex()
489 p.loadindex()
490 self.nodemap = p.map
490 self.nodemap = p.map
491
491
492 def _loadmap(self):
492 def _loadmap(self):
493 """loads the map from the lazy parser"""
493 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
494 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
495 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
496 self.nodemap = self.nodemap.p.map
497
497
498 def tip(self):
498 def tip(self):
499 return self.node(len(self.index) - 2)
499 return self.node(len(self.index) - 2)
500 def __len__(self):
500 def __len__(self):
501 return len(self.index) - 1
501 return len(self.index) - 1
502 def __iter__(self):
502 def __iter__(self):
503 for i in xrange(len(self)):
503 for i in xrange(len(self)):
504 yield i
504 yield i
505 def rev(self, node):
505 def rev(self, node):
506 try:
506 try:
507 return self.nodemap[node]
507 return self.nodemap[node]
508 except KeyError:
508 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
509 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
510 def node(self, rev):
511 return self.index[rev][7]
511 return self.index[rev][7]
512 def linkrev(self, rev):
512 def linkrev(self, rev):
513 return self.index[rev][4]
513 return self.index[rev][4]
514 def parents(self, node):
514 def parents(self, node):
515 i = self.index
515 i = self.index
516 d = i[self.rev(node)]
516 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
518 def parentrevs(self, rev):
519 return self.index[rev][5:7]
519 return self.index[rev][5:7]
520 def start(self, rev):
520 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
521 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
522 def end(self, rev):
523 return self.start(rev) + self.length(rev)
523 return self.start(rev) + self.length(rev)
524 def length(self, rev):
524 def length(self, rev):
525 return self.index[rev][1]
525 return self.index[rev][1]
526 def base(self, rev):
526 def base(self, rev):
527 return self.index[rev][3]
527 return self.index[rev][3]
528
528
529 def size(self, rev):
529 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
530 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
531 l = self.index[rev][2]
532 if l >= 0:
532 if l >= 0:
533 return l
533 return l
534
534
535 t = self.revision(self.node(rev))
535 t = self.revision(self.node(rev))
536 return len(t)
536 return len(t)
537
537
538 # alternate implementation, The advantage to this code is it
538 # Alternate implementation. The advantage to this code is it
539 # will be faster for a single revision. But, the results are not
539 # will be faster for a single revision. However, the results
540 # cached, so finding the size of every revision will be slower.
540 # are not cached, so finding the size of every revision will
541 """
541 # be slower.
542 if self.cache and self.cache[1] == rev:
542 #
543 return len(self.cache[2])
543 # if self.cache and self.cache[1] == rev:
544
544 # return len(self.cache[2])
545 base = self.base(rev)
545 #
546 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
546 # base = self.base(rev)
547 base = self.cache[1]
547 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
548 text = self.cache[2]
548 # base = self.cache[1]
549 else:
549 # text = self.cache[2]
550 text = self.revision(self.node(base))
550 # else:
551
551 # text = self.revision(self.node(base))
552 l = len(text)
552 #
553 for x in xrange(base + 1, rev + 1):
553 # l = len(text)
554 l = mdiff.patchedsize(l, self._chunk(x))
554 # for x in xrange(base + 1, rev + 1):
555 return l
555 # l = mdiff.patchedsize(l, self._chunk(x))
556 """
556 # return l
557
557
558 def reachable(self, node, stop=None):
558 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
559 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
560 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
561 reachable = set((node,))
562 visit = [node]
562 visit = [node]
563 if stop:
563 if stop:
564 stopn = self.rev(stop)
564 stopn = self.rev(stop)
565 else:
565 else:
566 stopn = 0
566 stopn = 0
567 while visit:
567 while visit:
568 n = visit.pop(0)
568 n = visit.pop(0)
569 if n == stop:
569 if n == stop:
570 continue
570 continue
571 if n == nullid:
571 if n == nullid:
572 continue
572 continue
573 for p in self.parents(n):
573 for p in self.parents(n):
574 if self.rev(p) < stopn:
574 if self.rev(p) < stopn:
575 continue
575 continue
576 if p not in reachable:
576 if p not in reachable:
577 reachable.add(p)
577 reachable.add(p)
578 visit.append(p)
578 visit.append(p)
579 return reachable
579 return reachable
580
580
581 def ancestors(self, *revs):
581 def ancestors(self, *revs):
582 'Generate the ancestors of revs using a breadth-first visit'
582 'Generate the ancestors of revs using a breadth-first visit'
583 visit = list(revs)
583 visit = list(revs)
584 seen = set([nullrev])
584 seen = set([nullrev])
585 while visit:
585 while visit:
586 for parent in self.parentrevs(visit.pop(0)):
586 for parent in self.parentrevs(visit.pop(0)):
587 if parent not in seen:
587 if parent not in seen:
588 visit.append(parent)
588 visit.append(parent)
589 seen.add(parent)
589 seen.add(parent)
590 yield parent
590 yield parent
591
591
592 def descendants(self, *revs):
592 def descendants(self, *revs):
593 'Generate the descendants of revs in topological order'
593 'Generate the descendants of revs in topological order'
594 seen = set(revs)
594 seen = set(revs)
595 for i in xrange(min(revs) + 1, len(self)):
595 for i in xrange(min(revs) + 1, len(self)):
596 for x in self.parentrevs(i):
596 for x in self.parentrevs(i):
597 if x != nullrev and x in seen:
597 if x != nullrev and x in seen:
598 seen.add(i)
598 seen.add(i)
599 yield i
599 yield i
600 break
600 break
601
601
602 def findmissing(self, common=None, heads=None):
602 def findmissing(self, common=None, heads=None):
603 '''
603 '''
604 returns the topologically sorted list of nodes from the set:
604 returns the topologically sorted list of nodes from the set:
605 missing = (ancestors(heads) \ ancestors(common))
605 missing = (ancestors(heads) \ ancestors(common))
606
606
607 where ancestors() is the set of ancestors from heads, heads included
607 where ancestors() is the set of ancestors from heads, heads included
608
608
609 if heads is None, the heads of the revlog are used
609 if heads is None, the heads of the revlog are used
610 if common is None, nullid is assumed to be a common node
610 if common is None, nullid is assumed to be a common node
611 '''
611 '''
612 if common is None:
612 if common is None:
613 common = [nullid]
613 common = [nullid]
614 if heads is None:
614 if heads is None:
615 heads = self.heads()
615 heads = self.heads()
616
616
617 common = [self.rev(n) for n in common]
617 common = [self.rev(n) for n in common]
618 heads = [self.rev(n) for n in heads]
618 heads = [self.rev(n) for n in heads]
619
619
620 # we want the ancestors, but inclusive
620 # we want the ancestors, but inclusive
621 has = set(self.ancestors(*common))
621 has = set(self.ancestors(*common))
622 has.add(nullrev)
622 has.add(nullrev)
623 has.update(common)
623 has.update(common)
624
624
625 # take all ancestors from heads that aren't in has
625 # take all ancestors from heads that aren't in has
626 missing = set()
626 missing = set()
627 visit = [r for r in heads if r not in has]
627 visit = [r for r in heads if r not in has]
628 while visit:
628 while visit:
629 r = visit.pop(0)
629 r = visit.pop(0)
630 if r in missing:
630 if r in missing:
631 continue
631 continue
632 else:
632 else:
633 missing.add(r)
633 missing.add(r)
634 for p in self.parentrevs(r):
634 for p in self.parentrevs(r):
635 if p not in has:
635 if p not in has:
636 visit.append(p)
636 visit.append(p)
637 missing = list(missing)
637 missing = list(missing)
638 missing.sort()
638 missing.sort()
639 return [self.node(r) for r in missing]
639 return [self.node(r) for r in missing]
640
640
641 def nodesbetween(self, roots=None, heads=None):
641 def nodesbetween(self, roots=None, heads=None):
642 """Return a tuple containing three elements. Elements 1 and 2 contain
642 """Return a tuple containing three elements. Elements 1 and 2 contain
643 a final list bases and heads after all the unreachable ones have been
643 a final list bases and heads after all the unreachable ones have been
644 pruned. Element 0 contains a topologically sorted list of all
644 pruned. Element 0 contains a topologically sorted list of all
645
645
646 nodes that satisfy these constraints:
646 nodes that satisfy these constraints:
647 1. All nodes must be descended from a node in roots (the nodes on
647 1. All nodes must be descended from a node in roots (the nodes on
648 roots are considered descended from themselves).
648 roots are considered descended from themselves).
649 2. All nodes must also be ancestors of a node in heads (the nodes in
649 2. All nodes must also be ancestors of a node in heads (the nodes in
650 heads are considered to be their own ancestors).
650 heads are considered to be their own ancestors).
651
651
652 If roots is unspecified, nullid is assumed as the only root.
652 If roots is unspecified, nullid is assumed as the only root.
653 If heads is unspecified, it is taken to be the output of the
653 If heads is unspecified, it is taken to be the output of the
654 heads method (i.e. a list of all nodes in the repository that
654 heads method (i.e. a list of all nodes in the repository that
655 have no children)."""
655 have no children)."""
656 nonodes = ([], [], [])
656 nonodes = ([], [], [])
657 if roots is not None:
657 if roots is not None:
658 roots = list(roots)
658 roots = list(roots)
659 if not roots:
659 if not roots:
660 return nonodes
660 return nonodes
661 lowestrev = min([self.rev(n) for n in roots])
661 lowestrev = min([self.rev(n) for n in roots])
662 else:
662 else:
663 roots = [nullid] # Everybody's a descendent of nullid
663 roots = [nullid] # Everybody's a descendent of nullid
664 lowestrev = nullrev
664 lowestrev = nullrev
665 if (lowestrev == nullrev) and (heads is None):
665 if (lowestrev == nullrev) and (heads is None):
666 # We want _all_ the nodes!
666 # We want _all_ the nodes!
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
667 return ([self.node(r) for r in self], [nullid], list(self.heads()))
668 if heads is None:
668 if heads is None:
669 # All nodes are ancestors, so the latest ancestor is the last
669 # All nodes are ancestors, so the latest ancestor is the last
670 # node.
670 # node.
671 highestrev = len(self) - 1
671 highestrev = len(self) - 1
672 # Set ancestors to None to signal that every node is an ancestor.
672 # Set ancestors to None to signal that every node is an ancestor.
673 ancestors = None
673 ancestors = None
674 # Set heads to an empty dictionary for later discovery of heads
674 # Set heads to an empty dictionary for later discovery of heads
675 heads = {}
675 heads = {}
676 else:
676 else:
677 heads = list(heads)
677 heads = list(heads)
678 if not heads:
678 if not heads:
679 return nonodes
679 return nonodes
680 ancestors = set()
680 ancestors = set()
681 # Turn heads into a dictionary so we can remove 'fake' heads.
681 # Turn heads into a dictionary so we can remove 'fake' heads.
682 # Also, later we will be using it to filter out the heads we can't
682 # Also, later we will be using it to filter out the heads we can't
683 # find from roots.
683 # find from roots.
684 heads = dict.fromkeys(heads, 0)
684 heads = dict.fromkeys(heads, 0)
685 # Start at the top and keep marking parents until we're done.
685 # Start at the top and keep marking parents until we're done.
686 nodestotag = set(heads)
686 nodestotag = set(heads)
687 # Remember where the top was so we can use it as a limit later.
687 # Remember where the top was so we can use it as a limit later.
688 highestrev = max([self.rev(n) for n in nodestotag])
688 highestrev = max([self.rev(n) for n in nodestotag])
689 while nodestotag:
689 while nodestotag:
690 # grab a node to tag
690 # grab a node to tag
691 n = nodestotag.pop()
691 n = nodestotag.pop()
692 # Never tag nullid
692 # Never tag nullid
693 if n == nullid:
693 if n == nullid:
694 continue
694 continue
695 # A node's revision number represents its place in a
695 # A node's revision number represents its place in a
696 # topologically sorted list of nodes.
696 # topologically sorted list of nodes.
697 r = self.rev(n)
697 r = self.rev(n)
698 if r >= lowestrev:
698 if r >= lowestrev:
699 if n not in ancestors:
699 if n not in ancestors:
700 # If we are possibly a descendent of one of the roots
700 # If we are possibly a descendent of one of the roots
701 # and we haven't already been marked as an ancestor
701 # and we haven't already been marked as an ancestor
702 ancestors.add(n) # Mark as ancestor
702 ancestors.add(n) # Mark as ancestor
703 # Add non-nullid parents to list of nodes to tag.
703 # Add non-nullid parents to list of nodes to tag.
704 nodestotag.update([p for p in self.parents(n) if
704 nodestotag.update([p for p in self.parents(n) if
705 p != nullid])
705 p != nullid])
706 elif n in heads: # We've seen it before, is it a fake head?
706 elif n in heads: # We've seen it before, is it a fake head?
707 # So it is, real heads should not be the ancestors of
707 # So it is, real heads should not be the ancestors of
708 # any other heads.
708 # any other heads.
709 heads.pop(n)
709 heads.pop(n)
710 if not ancestors:
710 if not ancestors:
711 return nonodes
711 return nonodes
712 # Now that we have our set of ancestors, we want to remove any
712 # Now that we have our set of ancestors, we want to remove any
713 # roots that are not ancestors.
713 # roots that are not ancestors.
714
714
715 # If one of the roots was nullid, everything is included anyway.
715 # If one of the roots was nullid, everything is included anyway.
716 if lowestrev > nullrev:
716 if lowestrev > nullrev:
717 # But, since we weren't, let's recompute the lowest rev to not
717 # But, since we weren't, let's recompute the lowest rev to not
718 # include roots that aren't ancestors.
718 # include roots that aren't ancestors.
719
719
720 # Filter out roots that aren't ancestors of heads
720 # Filter out roots that aren't ancestors of heads
721 roots = [n for n in roots if n in ancestors]
721 roots = [n for n in roots if n in ancestors]
722 # Recompute the lowest revision
722 # Recompute the lowest revision
723 if roots:
723 if roots:
724 lowestrev = min([self.rev(n) for n in roots])
724 lowestrev = min([self.rev(n) for n in roots])
725 else:
725 else:
726 # No more roots? Return empty list
726 # No more roots? Return empty list
727 return nonodes
727 return nonodes
728 else:
728 else:
729 # We are descending from nullid, and don't need to care about
729 # We are descending from nullid, and don't need to care about
730 # any other roots.
730 # any other roots.
731 lowestrev = nullrev
731 lowestrev = nullrev
732 roots = [nullid]
732 roots = [nullid]
733 # Transform our roots list into a set.
733 # Transform our roots list into a set.
734 descendents = set(roots)
734 descendents = set(roots)
735 # Also, keep the original roots so we can filter out roots that aren't
735 # Also, keep the original roots so we can filter out roots that aren't
736 # 'real' roots (i.e. are descended from other roots).
736 # 'real' roots (i.e. are descended from other roots).
737 roots = descendents.copy()
737 roots = descendents.copy()
738 # Our topologically sorted list of output nodes.
738 # Our topologically sorted list of output nodes.
739 orderedout = []
739 orderedout = []
740 # Don't start at nullid since we don't want nullid in our output list,
740 # Don't start at nullid since we don't want nullid in our output list,
741 # and if nullid shows up in descedents, empty parents will look like
741 # and if nullid shows up in descedents, empty parents will look like
742 # they're descendents.
742 # they're descendents.
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
743 for r in xrange(max(lowestrev, 0), highestrev + 1):
744 n = self.node(r)
744 n = self.node(r)
745 isdescendent = False
745 isdescendent = False
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
746 if lowestrev == nullrev: # Everybody is a descendent of nullid
747 isdescendent = True
747 isdescendent = True
748 elif n in descendents:
748 elif n in descendents:
749 # n is already a descendent
749 # n is already a descendent
750 isdescendent = True
750 isdescendent = True
751 # This check only needs to be done here because all the roots
751 # This check only needs to be done here because all the roots
752 # will start being marked is descendents before the loop.
752 # will start being marked is descendents before the loop.
753 if n in roots:
753 if n in roots:
754 # If n was a root, check if it's a 'real' root.
754 # If n was a root, check if it's a 'real' root.
755 p = tuple(self.parents(n))
755 p = tuple(self.parents(n))
756 # If any of its parents are descendents, it's not a root.
756 # If any of its parents are descendents, it's not a root.
757 if (p[0] in descendents) or (p[1] in descendents):
757 if (p[0] in descendents) or (p[1] in descendents):
758 roots.remove(n)
758 roots.remove(n)
759 else:
759 else:
760 p = tuple(self.parents(n))
760 p = tuple(self.parents(n))
761 # A node is a descendent if either of its parents are
761 # A node is a descendent if either of its parents are
762 # descendents. (We seeded the dependents list with the roots
762 # descendents. (We seeded the dependents list with the roots
763 # up there, remember?)
763 # up there, remember?)
764 if (p[0] in descendents) or (p[1] in descendents):
764 if (p[0] in descendents) or (p[1] in descendents):
765 descendents.add(n)
765 descendents.add(n)
766 isdescendent = True
766 isdescendent = True
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
767 if isdescendent and ((ancestors is None) or (n in ancestors)):
768 # Only include nodes that are both descendents and ancestors.
768 # Only include nodes that are both descendents and ancestors.
769 orderedout.append(n)
769 orderedout.append(n)
770 if (ancestors is not None) and (n in heads):
770 if (ancestors is not None) and (n in heads):
771 # We're trying to figure out which heads are reachable
771 # We're trying to figure out which heads are reachable
772 # from roots.
772 # from roots.
773 # Mark this head as having been reached
773 # Mark this head as having been reached
774 heads[n] = 1
774 heads[n] = 1
775 elif ancestors is None:
775 elif ancestors is None:
776 # Otherwise, we're trying to discover the heads.
776 # Otherwise, we're trying to discover the heads.
777 # Assume this is a head because if it isn't, the next step
777 # Assume this is a head because if it isn't, the next step
778 # will eventually remove it.
778 # will eventually remove it.
779 heads[n] = 1
779 heads[n] = 1
780 # But, obviously its parents aren't.
780 # But, obviously its parents aren't.
781 for p in self.parents(n):
781 for p in self.parents(n):
782 heads.pop(p, None)
782 heads.pop(p, None)
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
783 heads = [n for n in heads.iterkeys() if heads[n] != 0]
784 roots = list(roots)
784 roots = list(roots)
785 assert orderedout
785 assert orderedout
786 assert roots
786 assert roots
787 assert heads
787 assert heads
788 return (orderedout, roots, heads)
788 return (orderedout, roots, heads)
789
789
790 def heads(self, start=None, stop=None):
790 def heads(self, start=None, stop=None):
791 """return the list of all nodes that have no children
791 """return the list of all nodes that have no children
792
792
793 if start is specified, only heads that are descendants of
793 if start is specified, only heads that are descendants of
794 start will be returned
794 start will be returned
795 if stop is specified, it will consider all the revs from stop
795 if stop is specified, it will consider all the revs from stop
796 as if they had no children
796 as if they had no children
797 """
797 """
798 if start is None and stop is None:
798 if start is None and stop is None:
799 count = len(self)
799 count = len(self)
800 if not count:
800 if not count:
801 return [nullid]
801 return [nullid]
802 ishead = [1] * (count + 1)
802 ishead = [1] * (count + 1)
803 index = self.index
803 index = self.index
804 for r in xrange(count):
804 for r in xrange(count):
805 e = index[r]
805 e = index[r]
806 ishead[e[5]] = ishead[e[6]] = 0
806 ishead[e[5]] = ishead[e[6]] = 0
807 return [self.node(r) for r in xrange(count) if ishead[r]]
807 return [self.node(r) for r in xrange(count) if ishead[r]]
808
808
809 if start is None:
809 if start is None:
810 start = nullid
810 start = nullid
811 if stop is None:
811 if stop is None:
812 stop = []
812 stop = []
813 stoprevs = set([self.rev(n) for n in stop])
813 stoprevs = set([self.rev(n) for n in stop])
814 startrev = self.rev(start)
814 startrev = self.rev(start)
815 reachable = set((startrev,))
815 reachable = set((startrev,))
816 heads = set((startrev,))
816 heads = set((startrev,))
817
817
818 parentrevs = self.parentrevs
818 parentrevs = self.parentrevs
819 for r in xrange(startrev + 1, len(self)):
819 for r in xrange(startrev + 1, len(self)):
820 for p in parentrevs(r):
820 for p in parentrevs(r):
821 if p in reachable:
821 if p in reachable:
822 if r not in stoprevs:
822 if r not in stoprevs:
823 reachable.add(r)
823 reachable.add(r)
824 heads.add(r)
824 heads.add(r)
825 if p in heads and p not in stoprevs:
825 if p in heads and p not in stoprevs:
826 heads.remove(p)
826 heads.remove(p)
827
827
828 return [self.node(r) for r in heads]
828 return [self.node(r) for r in heads]
829
829
830 def children(self, node):
830 def children(self, node):
831 """find the children of a given node"""
831 """find the children of a given node"""
832 c = []
832 c = []
833 p = self.rev(node)
833 p = self.rev(node)
834 for r in range(p + 1, len(self)):
834 for r in range(p + 1, len(self)):
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
835 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
836 if prevs:
836 if prevs:
837 for pr in prevs:
837 for pr in prevs:
838 if pr == p:
838 if pr == p:
839 c.append(self.node(r))
839 c.append(self.node(r))
840 elif p == nullrev:
840 elif p == nullrev:
841 c.append(self.node(r))
841 c.append(self.node(r))
842 return c
842 return c
843
843
844 def _match(self, id):
844 def _match(self, id):
845 if isinstance(id, (long, int)):
845 if isinstance(id, (long, int)):
846 # rev
846 # rev
847 return self.node(id)
847 return self.node(id)
848 if len(id) == 20:
848 if len(id) == 20:
849 # possibly a binary node
849 # possibly a binary node
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
850 # odds of a binary node being all hex in ASCII are 1 in 10**25
851 try:
851 try:
852 node = id
852 node = id
853 self.rev(node) # quick search the index
853 self.rev(node) # quick search the index
854 return node
854 return node
855 except LookupError:
855 except LookupError:
856 pass # may be partial hex id
856 pass # may be partial hex id
857 try:
857 try:
858 # str(rev)
858 # str(rev)
859 rev = int(id)
859 rev = int(id)
860 if str(rev) != id:
860 if str(rev) != id:
861 raise ValueError
861 raise ValueError
862 if rev < 0:
862 if rev < 0:
863 rev = len(self) + rev
863 rev = len(self) + rev
864 if rev < 0 or rev >= len(self):
864 if rev < 0 or rev >= len(self):
865 raise ValueError
865 raise ValueError
866 return self.node(rev)
866 return self.node(rev)
867 except (ValueError, OverflowError):
867 except (ValueError, OverflowError):
868 pass
868 pass
869 if len(id) == 40:
869 if len(id) == 40:
870 try:
870 try:
871 # a full hex nodeid?
871 # a full hex nodeid?
872 node = bin(id)
872 node = bin(id)
873 self.rev(node)
873 self.rev(node)
874 return node
874 return node
875 except (TypeError, LookupError):
875 except (TypeError, LookupError):
876 pass
876 pass
877
877
878 def _partialmatch(self, id):
878 def _partialmatch(self, id):
879 if len(id) < 40:
879 if len(id) < 40:
880 try:
880 try:
881 # hex(node)[:...]
881 # hex(node)[:...]
882 l = len(id) / 2 # grab an even number of digits
882 l = len(id) / 2 # grab an even number of digits
883 bin_id = bin(id[:l*2])
883 bin_id = bin(id[:l*2])
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
884 nl = [n for n in self.nodemap if n[:l] == bin_id]
885 nl = [n for n in nl if hex(n).startswith(id)]
885 nl = [n for n in nl if hex(n).startswith(id)]
886 if len(nl) > 0:
886 if len(nl) > 0:
887 if len(nl) == 1:
887 if len(nl) == 1:
888 return nl[0]
888 return nl[0]
889 raise LookupError(id, self.indexfile,
889 raise LookupError(id, self.indexfile,
890 _('ambiguous identifier'))
890 _('ambiguous identifier'))
891 return None
891 return None
892 except TypeError:
892 except TypeError:
893 pass
893 pass
894
894
895 def lookup(self, id):
895 def lookup(self, id):
896 """locate a node based on:
896 """locate a node based on:
897 - revision number or str(revision number)
897 - revision number or str(revision number)
898 - nodeid or subset of hex nodeid
898 - nodeid or subset of hex nodeid
899 """
899 """
900 n = self._match(id)
900 n = self._match(id)
901 if n is not None:
901 if n is not None:
902 return n
902 return n
903 n = self._partialmatch(id)
903 n = self._partialmatch(id)
904 if n:
904 if n:
905 return n
905 return n
906
906
907 raise LookupError(id, self.indexfile, _('no match found'))
907 raise LookupError(id, self.indexfile, _('no match found'))
908
908
909 def cmp(self, node, text):
909 def cmp(self, node, text):
910 """compare text with a given file revision"""
910 """compare text with a given file revision"""
911 p1, p2 = self.parents(node)
911 p1, p2 = self.parents(node)
912 return hash(text, p1, p2) != node
912 return hash(text, p1, p2) != node
913
913
914 def _addchunk(self, offset, data):
914 def _addchunk(self, offset, data):
915 o, d = self._chunkcache
915 o, d = self._chunkcache
916 # try to add to existing cache
916 # try to add to existing cache
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
917 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
918 self._chunkcache = o, d + data
918 self._chunkcache = o, d + data
919 else:
919 else:
920 self._chunkcache = offset, data
920 self._chunkcache = offset, data
921
921
922 def _loadchunk(self, offset, length):
922 def _loadchunk(self, offset, length):
923 if self._inline:
923 if self._inline:
924 df = self.opener(self.indexfile)
924 df = self.opener(self.indexfile)
925 else:
925 else:
926 df = self.opener(self.datafile)
926 df = self.opener(self.datafile)
927
927
928 readahead = max(65536, length)
928 readahead = max(65536, length)
929 df.seek(offset)
929 df.seek(offset)
930 d = df.read(readahead)
930 d = df.read(readahead)
931 self._addchunk(offset, d)
931 self._addchunk(offset, d)
932 if readahead > length:
932 if readahead > length:
933 return d[:length]
933 return d[:length]
934 return d
934 return d
935
935
936 def _getchunk(self, offset, length):
936 def _getchunk(self, offset, length):
937 o, d = self._chunkcache
937 o, d = self._chunkcache
938 l = len(d)
938 l = len(d)
939
939
940 # is it in the cache?
940 # is it in the cache?
941 cachestart = offset - o
941 cachestart = offset - o
942 cacheend = cachestart + length
942 cacheend = cachestart + length
943 if cachestart >= 0 and cacheend <= l:
943 if cachestart >= 0 and cacheend <= l:
944 if cachestart == 0 and cacheend == l:
944 if cachestart == 0 and cacheend == l:
945 return d # avoid a copy
945 return d # avoid a copy
946 return d[cachestart:cacheend]
946 return d[cachestart:cacheend]
947
947
948 return self._loadchunk(offset, length)
948 return self._loadchunk(offset, length)
949
949
950 def _chunkraw(self, startrev, endrev):
950 def _chunkraw(self, startrev, endrev):
951 start = self.start(startrev)
951 start = self.start(startrev)
952 length = self.end(endrev) - start
952 length = self.end(endrev) - start
953 if self._inline:
953 if self._inline:
954 start += (startrev + 1) * self._io.size
954 start += (startrev + 1) * self._io.size
955 return self._getchunk(start, length)
955 return self._getchunk(start, length)
956
956
957 def _chunk(self, rev):
957 def _chunk(self, rev):
958 return decompress(self._chunkraw(rev, rev))
958 return decompress(self._chunkraw(rev, rev))
959
959
960 def _chunkclear(self):
960 def _chunkclear(self):
961 self._chunkcache = (0, '')
961 self._chunkcache = (0, '')
962
962
963 def revdiff(self, rev1, rev2):
963 def revdiff(self, rev1, rev2):
964 """return or calculate a delta between two revisions"""
964 """return or calculate a delta between two revisions"""
965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
965 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
966 return self._chunk(rev2)
966 return self._chunk(rev2)
967
967
968 return mdiff.textdiff(self.revision(self.node(rev1)),
968 return mdiff.textdiff(self.revision(self.node(rev1)),
969 self.revision(self.node(rev2)))
969 self.revision(self.node(rev2)))
970
970
971 def revision(self, node):
971 def revision(self, node):
972 """return an uncompressed revision of a given node"""
972 """return an uncompressed revision of a given node"""
973 if node == nullid:
973 if node == nullid:
974 return ""
974 return ""
975 if self._cache and self._cache[0] == node:
975 if self._cache and self._cache[0] == node:
976 return str(self._cache[2])
976 return str(self._cache[2])
977
977
978 # look up what we need to read
978 # look up what we need to read
979 text = None
979 text = None
980 rev = self.rev(node)
980 rev = self.rev(node)
981 base = self.base(rev)
981 base = self.base(rev)
982
982
983 # check rev flags
983 # check rev flags
984 if self.index[rev][0] & 0xFFFF:
984 if self.index[rev][0] & 0xFFFF:
985 raise RevlogError(_('incompatible revision flag %x') %
985 raise RevlogError(_('incompatible revision flag %x') %
986 (self.index[rev][0] & 0xFFFF))
986 (self.index[rev][0] & 0xFFFF))
987
987
988 # do we have useful data cached?
988 # do we have useful data cached?
989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
989 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
990 base = self._cache[1]
990 base = self._cache[1]
991 text = str(self._cache[2])
991 text = str(self._cache[2])
992
992
993 self._loadindex(base, rev + 1)
993 self._loadindex(base, rev + 1)
994 self._chunkraw(base, rev)
994 self._chunkraw(base, rev)
995 if text is None:
995 if text is None:
996 text = self._chunk(base)
996 text = self._chunk(base)
997
997
998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
998 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
999 text = mdiff.patches(text, bins)
999 text = mdiff.patches(text, bins)
1000 p1, p2 = self.parents(node)
1000 p1, p2 = self.parents(node)
1001 if node != hash(text, p1, p2):
1001 if node != hash(text, p1, p2):
1002 raise RevlogError(_("integrity check failed on %s:%d")
1002 raise RevlogError(_("integrity check failed on %s:%d")
1003 % (self.indexfile, rev))
1003 % (self.indexfile, rev))
1004
1004
1005 self._cache = (node, rev, text)
1005 self._cache = (node, rev, text)
1006 return text
1006 return text
1007
1007
1008 def checkinlinesize(self, tr, fp=None):
1008 def checkinlinesize(self, tr, fp=None):
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1009 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1010 return
1010 return
1011
1011
1012 trinfo = tr.find(self.indexfile)
1012 trinfo = tr.find(self.indexfile)
1013 if trinfo is None:
1013 if trinfo is None:
1014 raise RevlogError(_("%s not found in the transaction")
1014 raise RevlogError(_("%s not found in the transaction")
1015 % self.indexfile)
1015 % self.indexfile)
1016
1016
1017 trindex = trinfo[2]
1017 trindex = trinfo[2]
1018 dataoff = self.start(trindex)
1018 dataoff = self.start(trindex)
1019
1019
1020 tr.add(self.datafile, dataoff)
1020 tr.add(self.datafile, dataoff)
1021
1021
1022 if fp:
1022 if fp:
1023 fp.flush()
1023 fp.flush()
1024 fp.close()
1024 fp.close()
1025
1025
1026 df = self.opener(self.datafile, 'w')
1026 df = self.opener(self.datafile, 'w')
1027 try:
1027 try:
1028 for r in self:
1028 for r in self:
1029 df.write(self._chunkraw(r, r))
1029 df.write(self._chunkraw(r, r))
1030 finally:
1030 finally:
1031 df.close()
1031 df.close()
1032
1032
1033 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1033 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1034 self.version &= ~(REVLOGNGINLINEDATA)
1034 self.version &= ~(REVLOGNGINLINEDATA)
1035 self._inline = False
1035 self._inline = False
1036 for i in self:
1036 for i in self:
1037 e = self._io.packentry(self.index[i], self.node, self.version, i)
1037 e = self._io.packentry(self.index[i], self.node, self.version, i)
1038 fp.write(e)
1038 fp.write(e)
1039
1039
1040 # if we don't call rename, the temp file will never replace the
1040 # if we don't call rename, the temp file will never replace the
1041 # real index
1041 # real index
1042 fp.rename()
1042 fp.rename()
1043
1043
1044 tr.replace(self.indexfile, trindex * self._io.size)
1044 tr.replace(self.indexfile, trindex * self._io.size)
1045 self._chunkclear()
1045 self._chunkclear()
1046
1046
1047 def addrevision(self, text, transaction, link, p1, p2, d=None):
1047 def addrevision(self, text, transaction, link, p1, p2, d=None):
1048 """add a revision to the log
1048 """add a revision to the log
1049
1049
1050 text - the revision data to add
1050 text - the revision data to add
1051 transaction - the transaction object used for rollback
1051 transaction - the transaction object used for rollback
1052 link - the linkrev data to add
1052 link - the linkrev data to add
1053 p1, p2 - the parent nodeids of the revision
1053 p1, p2 - the parent nodeids of the revision
1054 d - an optional precomputed delta
1054 d - an optional precomputed delta
1055 """
1055 """
1056 dfh = None
1056 dfh = None
1057 if not self._inline:
1057 if not self._inline:
1058 dfh = self.opener(self.datafile, "a")
1058 dfh = self.opener(self.datafile, "a")
1059 ifh = self.opener(self.indexfile, "a+")
1059 ifh = self.opener(self.indexfile, "a+")
1060 try:
1060 try:
1061 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1061 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1062 finally:
1062 finally:
1063 if dfh:
1063 if dfh:
1064 dfh.close()
1064 dfh.close()
1065 ifh.close()
1065 ifh.close()
1066
1066
1067 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1067 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1068 node = hash(text, p1, p2)
1068 node = hash(text, p1, p2)
1069 if node in self.nodemap:
1069 if node in self.nodemap:
1070 return node
1070 return node
1071
1071
1072 curr = len(self)
1072 curr = len(self)
1073 prev = curr - 1
1073 prev = curr - 1
1074 base = self.base(prev)
1074 base = self.base(prev)
1075 offset = self.end(prev)
1075 offset = self.end(prev)
1076
1076
1077 if curr:
1077 if curr:
1078 if not d:
1078 if not d:
1079 ptext = self.revision(self.node(prev))
1079 ptext = self.revision(self.node(prev))
1080 d = mdiff.textdiff(ptext, text)
1080 d = mdiff.textdiff(ptext, text)
1081 data = compress(d)
1081 data = compress(d)
1082 l = len(data[1]) + len(data[0])
1082 l = len(data[1]) + len(data[0])
1083 dist = l + offset - self.start(base)
1083 dist = l + offset - self.start(base)
1084
1084
1085 # full versions are inserted when the needed deltas
1085 # full versions are inserted when the needed deltas
1086 # become comparable to the uncompressed text
1086 # become comparable to the uncompressed text
1087 if not curr or dist > len(text) * 2:
1087 if not curr or dist > len(text) * 2:
1088 data = compress(text)
1088 data = compress(text)
1089 l = len(data[1]) + len(data[0])
1089 l = len(data[1]) + len(data[0])
1090 base = curr
1090 base = curr
1091
1091
1092 e = (offset_type(offset, 0), l, len(text),
1092 e = (offset_type(offset, 0), l, len(text),
1093 base, link, self.rev(p1), self.rev(p2), node)
1093 base, link, self.rev(p1), self.rev(p2), node)
1094 self.index.insert(-1, e)
1094 self.index.insert(-1, e)
1095 self.nodemap[node] = curr
1095 self.nodemap[node] = curr
1096
1096
1097 entry = self._io.packentry(e, self.node, self.version, curr)
1097 entry = self._io.packentry(e, self.node, self.version, curr)
1098 if not self._inline:
1098 if not self._inline:
1099 transaction.add(self.datafile, offset)
1099 transaction.add(self.datafile, offset)
1100 transaction.add(self.indexfile, curr * len(entry))
1100 transaction.add(self.indexfile, curr * len(entry))
1101 if data[0]:
1101 if data[0]:
1102 dfh.write(data[0])
1102 dfh.write(data[0])
1103 dfh.write(data[1])
1103 dfh.write(data[1])
1104 dfh.flush()
1104 dfh.flush()
1105 ifh.write(entry)
1105 ifh.write(entry)
1106 else:
1106 else:
1107 offset += curr * self._io.size
1107 offset += curr * self._io.size
1108 transaction.add(self.indexfile, offset, curr)
1108 transaction.add(self.indexfile, offset, curr)
1109 ifh.write(entry)
1109 ifh.write(entry)
1110 ifh.write(data[0])
1110 ifh.write(data[0])
1111 ifh.write(data[1])
1111 ifh.write(data[1])
1112 self.checkinlinesize(transaction, ifh)
1112 self.checkinlinesize(transaction, ifh)
1113
1113
1114 self._cache = (node, curr, text)
1114 self._cache = (node, curr, text)
1115 return node
1115 return node
1116
1116
1117 def ancestor(self, a, b):
1117 def ancestor(self, a, b):
1118 """calculate the least common ancestor of nodes a and b"""
1118 """calculate the least common ancestor of nodes a and b"""
1119
1119
1120 def parents(rev):
1120 def parents(rev):
1121 return [p for p in self.parentrevs(rev) if p != nullrev]
1121 return [p for p in self.parentrevs(rev) if p != nullrev]
1122
1122
1123 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1123 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1124 if c is None:
1124 if c is None:
1125 return nullid
1125 return nullid
1126
1126
1127 return self.node(c)
1127 return self.node(c)
1128
1128
1129 def group(self, nodelist, lookup, infocollect=None):
1129 def group(self, nodelist, lookup, infocollect=None):
1130 """calculate a delta group
1130 """calculate a delta group
1131
1131
1132 Given a list of changeset revs, return a set of deltas and
1132 Given a list of changeset revs, return a set of deltas and
1133 metadata corresponding to nodes. the first delta is
1133 metadata corresponding to nodes. the first delta is
1134 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1134 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1135 have this parent as it has all history before these
1135 have this parent as it has all history before these
1136 changesets. parent is parent[0]
1136 changesets. parent is parent[0]
1137 """
1137 """
1138
1138
1139 revs = [self.rev(n) for n in nodelist]
1139 revs = [self.rev(n) for n in nodelist]
1140
1140
1141 # if we don't have any revisions touched by these changesets, bail
1141 # if we don't have any revisions touched by these changesets, bail
1142 if not revs:
1142 if not revs:
1143 yield changegroup.closechunk()
1143 yield changegroup.closechunk()
1144 return
1144 return
1145
1145
1146 # add the parent of the first rev
1146 # add the parent of the first rev
1147 p = self.parentrevs(revs[0])[0]
1147 p = self.parentrevs(revs[0])[0]
1148 revs.insert(0, p)
1148 revs.insert(0, p)
1149
1149
1150 # build deltas
1150 # build deltas
1151 for d in xrange(len(revs) - 1):
1151 for d in xrange(len(revs) - 1):
1152 a, b = revs[d], revs[d + 1]
1152 a, b = revs[d], revs[d + 1]
1153 nb = self.node(b)
1153 nb = self.node(b)
1154
1154
1155 if infocollect is not None:
1155 if infocollect is not None:
1156 infocollect(nb)
1156 infocollect(nb)
1157
1157
1158 p = self.parents(nb)
1158 p = self.parents(nb)
1159 meta = nb + p[0] + p[1] + lookup(nb)
1159 meta = nb + p[0] + p[1] + lookup(nb)
1160 if a == -1:
1160 if a == -1:
1161 d = self.revision(nb)
1161 d = self.revision(nb)
1162 meta += mdiff.trivialdiffheader(len(d))
1162 meta += mdiff.trivialdiffheader(len(d))
1163 else:
1163 else:
1164 d = self.revdiff(a, b)
1164 d = self.revdiff(a, b)
1165 yield changegroup.chunkheader(len(meta) + len(d))
1165 yield changegroup.chunkheader(len(meta) + len(d))
1166 yield meta
1166 yield meta
1167 if len(d) > 2**20:
1167 if len(d) > 2**20:
1168 pos = 0
1168 pos = 0
1169 while pos < len(d):
1169 while pos < len(d):
1170 pos2 = pos + 2 ** 18
1170 pos2 = pos + 2 ** 18
1171 yield d[pos:pos2]
1171 yield d[pos:pos2]
1172 pos = pos2
1172 pos = pos2
1173 else:
1173 else:
1174 yield d
1174 yield d
1175
1175
1176 yield changegroup.closechunk()
1176 yield changegroup.closechunk()
1177
1177
1178 def addgroup(self, revs, linkmapper, transaction):
1178 def addgroup(self, revs, linkmapper, transaction):
1179 """
1179 """
1180 add a delta group
1180 add a delta group
1181
1181
1182 given a set of deltas, add them to the revision log. the
1182 given a set of deltas, add them to the revision log. the
1183 first delta is against its parent, which should be in our
1183 first delta is against its parent, which should be in our
1184 log, the rest are against the previous delta.
1184 log, the rest are against the previous delta.
1185 """
1185 """
1186
1186
1187 #track the base of the current delta log
1187 #track the base of the current delta log
1188 r = len(self)
1188 r = len(self)
1189 t = r - 1
1189 t = r - 1
1190 node = None
1190 node = None
1191
1191
1192 base = prev = nullrev
1192 base = prev = nullrev
1193 start = end = textlen = 0
1193 start = end = textlen = 0
1194 if r:
1194 if r:
1195 end = self.end(t)
1195 end = self.end(t)
1196
1196
1197 ifh = self.opener(self.indexfile, "a+")
1197 ifh = self.opener(self.indexfile, "a+")
1198 isize = r * self._io.size
1198 isize = r * self._io.size
1199 if self._inline:
1199 if self._inline:
1200 transaction.add(self.indexfile, end + isize, r)
1200 transaction.add(self.indexfile, end + isize, r)
1201 dfh = None
1201 dfh = None
1202 else:
1202 else:
1203 transaction.add(self.indexfile, isize, r)
1203 transaction.add(self.indexfile, isize, r)
1204 transaction.add(self.datafile, end)
1204 transaction.add(self.datafile, end)
1205 dfh = self.opener(self.datafile, "a")
1205 dfh = self.opener(self.datafile, "a")
1206
1206
1207 try:
1207 try:
1208 # loop through our set of deltas
1208 # loop through our set of deltas
1209 chain = None
1209 chain = None
1210 for chunk in revs:
1210 for chunk in revs:
1211 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1211 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1212 link = linkmapper(cs)
1212 link = linkmapper(cs)
1213 if node in self.nodemap:
1213 if node in self.nodemap:
1214 # this can happen if two branches make the same change
1214 # this can happen if two branches make the same change
1215 chain = node
1215 chain = node
1216 continue
1216 continue
1217 delta = buffer(chunk, 80)
1217 delta = buffer(chunk, 80)
1218 del chunk
1218 del chunk
1219
1219
1220 for p in (p1, p2):
1220 for p in (p1, p2):
1221 if not p in self.nodemap:
1221 if not p in self.nodemap:
1222 raise LookupError(p, self.indexfile, _('unknown parent'))
1222 raise LookupError(p, self.indexfile, _('unknown parent'))
1223
1223
1224 if not chain:
1224 if not chain:
1225 # retrieve the parent revision of the delta chain
1225 # retrieve the parent revision of the delta chain
1226 chain = p1
1226 chain = p1
1227 if not chain in self.nodemap:
1227 if not chain in self.nodemap:
1228 raise LookupError(chain, self.indexfile, _('unknown base'))
1228 raise LookupError(chain, self.indexfile, _('unknown base'))
1229
1229
1230 # full versions are inserted when the needed deltas become
1230 # full versions are inserted when the needed deltas become
1231 # comparable to the uncompressed text or when the previous
1231 # comparable to the uncompressed text or when the previous
1232 # version is not the one we have a delta against. We use
1232 # version is not the one we have a delta against. We use
1233 # the size of the previous full rev as a proxy for the
1233 # the size of the previous full rev as a proxy for the
1234 # current size.
1234 # current size.
1235
1235
1236 if chain == prev:
1236 if chain == prev:
1237 cdelta = compress(delta)
1237 cdelta = compress(delta)
1238 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1238 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1239 textlen = mdiff.patchedsize(textlen, delta)
1239 textlen = mdiff.patchedsize(textlen, delta)
1240
1240
1241 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1241 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1242 # flush our writes here so we can read it in revision
1242 # flush our writes here so we can read it in revision
1243 if dfh:
1243 if dfh:
1244 dfh.flush()
1244 dfh.flush()
1245 ifh.flush()
1245 ifh.flush()
1246 text = self.revision(chain)
1246 text = self.revision(chain)
1247 if len(text) == 0:
1247 if len(text) == 0:
1248 # skip over trivial delta header
1248 # skip over trivial delta header
1249 text = buffer(delta, 12)
1249 text = buffer(delta, 12)
1250 else:
1250 else:
1251 text = mdiff.patches(text, [delta])
1251 text = mdiff.patches(text, [delta])
1252 del delta
1252 del delta
1253 chk = self._addrevision(text, transaction, link, p1, p2, None,
1253 chk = self._addrevision(text, transaction, link, p1, p2, None,
1254 ifh, dfh)
1254 ifh, dfh)
1255 if not dfh and not self._inline:
1255 if not dfh and not self._inline:
1256 # addrevision switched from inline to conventional
1256 # addrevision switched from inline to conventional
1257 # reopen the index
1257 # reopen the index
1258 dfh = self.opener(self.datafile, "a")
1258 dfh = self.opener(self.datafile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1259 ifh = self.opener(self.indexfile, "a")
1260 if chk != node:
1260 if chk != node:
1261 raise RevlogError(_("consistency error adding group"))
1261 raise RevlogError(_("consistency error adding group"))
1262 textlen = len(text)
1262 textlen = len(text)
1263 else:
1263 else:
1264 e = (offset_type(end, 0), cdeltalen, textlen, base,
1264 e = (offset_type(end, 0), cdeltalen, textlen, base,
1265 link, self.rev(p1), self.rev(p2), node)
1265 link, self.rev(p1), self.rev(p2), node)
1266 self.index.insert(-1, e)
1266 self.index.insert(-1, e)
1267 self.nodemap[node] = r
1267 self.nodemap[node] = r
1268 entry = self._io.packentry(e, self.node, self.version, r)
1268 entry = self._io.packentry(e, self.node, self.version, r)
1269 if self._inline:
1269 if self._inline:
1270 ifh.write(entry)
1270 ifh.write(entry)
1271 ifh.write(cdelta[0])
1271 ifh.write(cdelta[0])
1272 ifh.write(cdelta[1])
1272 ifh.write(cdelta[1])
1273 self.checkinlinesize(transaction, ifh)
1273 self.checkinlinesize(transaction, ifh)
1274 if not self._inline:
1274 if not self._inline:
1275 dfh = self.opener(self.datafile, "a")
1275 dfh = self.opener(self.datafile, "a")
1276 ifh = self.opener(self.indexfile, "a")
1276 ifh = self.opener(self.indexfile, "a")
1277 else:
1277 else:
1278 dfh.write(cdelta[0])
1278 dfh.write(cdelta[0])
1279 dfh.write(cdelta[1])
1279 dfh.write(cdelta[1])
1280 ifh.write(entry)
1280 ifh.write(entry)
1281
1281
1282 t, r, chain, prev = r, r + 1, node, node
1282 t, r, chain, prev = r, r + 1, node, node
1283 base = self.base(t)
1283 base = self.base(t)
1284 start = self.start(base)
1284 start = self.start(base)
1285 end = self.end(t)
1285 end = self.end(t)
1286 finally:
1286 finally:
1287 if dfh:
1287 if dfh:
1288 dfh.close()
1288 dfh.close()
1289 ifh.close()
1289 ifh.close()
1290
1290
1291 return node
1291 return node
1292
1292
1293 def strip(self, minlink, transaction):
1293 def strip(self, minlink, transaction):
1294 """truncate the revlog on the first revision with a linkrev >= minlink
1294 """truncate the revlog on the first revision with a linkrev >= minlink
1295
1295
1296 This function is called when we're stripping revision minlink and
1296 This function is called when we're stripping revision minlink and
1297 its descendants from the repository.
1297 its descendants from the repository.
1298
1298
1299 We have to remove all revisions with linkrev >= minlink, because
1299 We have to remove all revisions with linkrev >= minlink, because
1300 the equivalent changelog revisions will be renumbered after the
1300 the equivalent changelog revisions will be renumbered after the
1301 strip.
1301 strip.
1302
1302
1303 So we truncate the revlog on the first of these revisions, and
1303 So we truncate the revlog on the first of these revisions, and
1304 trust that the caller has saved the revisions that shouldn't be
1304 trust that the caller has saved the revisions that shouldn't be
1305 removed and that it'll readd them after this truncation.
1305 removed and that it'll readd them after this truncation.
1306 """
1306 """
1307 if len(self) == 0:
1307 if len(self) == 0:
1308 return
1308 return
1309
1309
1310 if isinstance(self.index, lazyindex):
1310 if isinstance(self.index, lazyindex):
1311 self._loadindexmap()
1311 self._loadindexmap()
1312
1312
1313 for rev in self:
1313 for rev in self:
1314 if self.index[rev][4] >= minlink:
1314 if self.index[rev][4] >= minlink:
1315 break
1315 break
1316 else:
1316 else:
1317 return
1317 return
1318
1318
1319 # first truncate the files on disk
1319 # first truncate the files on disk
1320 end = self.start(rev)
1320 end = self.start(rev)
1321 if not self._inline:
1321 if not self._inline:
1322 transaction.add(self.datafile, end)
1322 transaction.add(self.datafile, end)
1323 end = rev * self._io.size
1323 end = rev * self._io.size
1324 else:
1324 else:
1325 end += rev * self._io.size
1325 end += rev * self._io.size
1326
1326
1327 transaction.add(self.indexfile, end)
1327 transaction.add(self.indexfile, end)
1328
1328
1329 # then reset internal state in memory to forget those revisions
1329 # then reset internal state in memory to forget those revisions
1330 self._cache = None
1330 self._cache = None
1331 self._chunkclear()
1331 self._chunkclear()
1332 for x in xrange(rev, len(self)):
1332 for x in xrange(rev, len(self)):
1333 del self.nodemap[self.node(x)]
1333 del self.nodemap[self.node(x)]
1334
1334
1335 del self.index[rev:-1]
1335 del self.index[rev:-1]
1336
1336
1337 def checksize(self):
1337 def checksize(self):
1338 expected = 0
1338 expected = 0
1339 if len(self):
1339 if len(self):
1340 expected = max(0, self.end(len(self) - 1))
1340 expected = max(0, self.end(len(self) - 1))
1341
1341
1342 try:
1342 try:
1343 f = self.opener(self.datafile)
1343 f = self.opener(self.datafile)
1344 f.seek(0, 2)
1344 f.seek(0, 2)
1345 actual = f.tell()
1345 actual = f.tell()
1346 dd = actual - expected
1346 dd = actual - expected
1347 except IOError, inst:
1347 except IOError, inst:
1348 if inst.errno != errno.ENOENT:
1348 if inst.errno != errno.ENOENT:
1349 raise
1349 raise
1350 dd = 0
1350 dd = 0
1351
1351
1352 try:
1352 try:
1353 f = self.opener(self.indexfile)
1353 f = self.opener(self.indexfile)
1354 f.seek(0, 2)
1354 f.seek(0, 2)
1355 actual = f.tell()
1355 actual = f.tell()
1356 s = self._io.size
1356 s = self._io.size
1357 i = max(0, actual / s)
1357 i = max(0, actual / s)
1358 di = actual - (i * s)
1358 di = actual - (i * s)
1359 if self._inline:
1359 if self._inline:
1360 databytes = 0
1360 databytes = 0
1361 for r in self:
1361 for r in self:
1362 databytes += max(0, self.length(r))
1362 databytes += max(0, self.length(r))
1363 dd = 0
1363 dd = 0
1364 di = actual - len(self) * s - databytes
1364 di = actual - len(self) * s - databytes
1365 except IOError, inst:
1365 except IOError, inst:
1366 if inst.errno != errno.ENOENT:
1366 if inst.errno != errno.ENOENT:
1367 raise
1367 raise
1368 di = 0
1368 di = 0
1369
1369
1370 return (dd, di)
1370 return (dd, di)
1371
1371
1372 def files(self):
1372 def files(self):
1373 res = [ self.indexfile ]
1373 res = [ self.indexfile ]
1374 if not self._inline:
1374 if not self._inline:
1375 res.append(self.datafile)
1375 res.append(self.datafile)
1376 return res
1376 return res
General Comments 0
You need to be logged in to leave comments. Login now