##// END OF EJS Templates
revlog: factor out _maxinline global....
Greg Ward -
r10900:cf016c98 default
parent child Browse files
Show More
@@ -1,1396 +1,1398 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 _maxinline = 131072 # max size of revlog with inline data
37
36 RevlogError = error.RevlogError
38 RevlogError = error.RevlogError
37 LookupError = error.LookupError
39 LookupError = error.LookupError
38
40
39 def getoffset(q):
41 def getoffset(q):
40 return int(q >> 16)
42 return int(q >> 16)
41
43
42 def gettype(q):
44 def gettype(q):
43 return int(q & 0xFFFF)
45 return int(q & 0xFFFF)
44
46
45 def offset_type(offset, type):
47 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
48 return long(long(offset) << 16 | type)
47
49
48 nullhash = _sha(nullid)
50 nullhash = _sha(nullid)
49
51
50 def hash(text, p1, p2):
52 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
53 """generate a hash from the given text and its parent hashes
52
54
53 This hash combines both the current file contents and its history
55 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
56 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
57 content in the revision graph.
56 """
58 """
57 # As of now, if one of the parent node is null, p2 is null
59 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
60 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
61 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
62 s = nullhash.copy()
61 s.update(p1)
63 s.update(p1)
62 else:
64 else:
63 # none of the parent nodes are nullid
65 # none of the parent nodes are nullid
64 l = [p1, p2]
66 l = [p1, p2]
65 l.sort()
67 l.sort()
66 s = _sha(l[0])
68 s = _sha(l[0])
67 s.update(l[1])
69 s.update(l[1])
68 s.update(text)
70 s.update(text)
69 return s.digest()
71 return s.digest()
70
72
71 def compress(text):
73 def compress(text):
72 """ generate a possibly-compressed representation of text """
74 """ generate a possibly-compressed representation of text """
73 if not text:
75 if not text:
74 return ("", text)
76 return ("", text)
75 l = len(text)
77 l = len(text)
76 bin = None
78 bin = None
77 if l < 44:
79 if l < 44:
78 pass
80 pass
79 elif l > 1000000:
81 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
82 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
83 # large files, so lets do this in pieces
82 z = zlib.compressobj()
84 z = zlib.compressobj()
83 p = []
85 p = []
84 pos = 0
86 pos = 0
85 while pos < l:
87 while pos < l:
86 pos2 = pos + 2**20
88 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
89 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
90 pos = pos2
89 p.append(z.flush())
91 p.append(z.flush())
90 if sum(map(len, p)) < l:
92 if sum(map(len, p)) < l:
91 bin = "".join(p)
93 bin = "".join(p)
92 else:
94 else:
93 bin = _compress(text)
95 bin = _compress(text)
94 if bin is None or len(bin) > l:
96 if bin is None or len(bin) > l:
95 if text[0] == '\0':
97 if text[0] == '\0':
96 return ("", text)
98 return ("", text)
97 return ('u', text)
99 return ('u', text)
98 return ("", bin)
100 return ("", bin)
99
101
100 def decompress(bin):
102 def decompress(bin):
101 """ decompress the given input """
103 """ decompress the given input """
102 if not bin:
104 if not bin:
103 return bin
105 return bin
104 t = bin[0]
106 t = bin[0]
105 if t == '\0':
107 if t == '\0':
106 return bin
108 return bin
107 if t == 'x':
109 if t == 'x':
108 return _decompress(bin)
110 return _decompress(bin)
109 if t == 'u':
111 if t == 'u':
110 return bin[1:]
112 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
113 raise RevlogError(_("unknown compression type %r") % t)
112
114
113 class lazyparser(object):
115 class lazyparser(object):
114 """
116 """
115 this class avoids the need to parse the entirety of large indices
117 this class avoids the need to parse the entirety of large indices
116 """
118 """
117
119
118 # lazyparser is not safe to use on windows if win32 extensions not
120 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
121 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
122 # to break hardlinks on local cloned repos.
121
123
122 def __init__(self, dataf):
124 def __init__(self, dataf):
123 try:
125 try:
124 size = util.fstat(dataf).st_size
126 size = util.fstat(dataf).st_size
125 except AttributeError:
127 except AttributeError:
126 size = 0
128 size = 0
127 self.dataf = dataf
129 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
130 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
131 self.datasize = size
130 self.l = size / self.s
132 self.l = size / self.s
131 self.index = [None] * self.l
133 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
134 self.map = {nullid: nullrev}
133 self.allmap = 0
135 self.allmap = 0
134 self.all = 0
136 self.all = 0
135 self.mapfind_count = 0
137 self.mapfind_count = 0
136
138
137 def loadmap(self):
139 def loadmap(self):
138 """
140 """
139 during a commit, we need to make sure the rev being added is
141 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
142 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
143 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
144 which takes much less time.
143 """
145 """
144 if self.allmap:
146 if self.allmap:
145 return
147 return
146 end = self.datasize
148 end = self.datasize
147 self.allmap = 1
149 self.allmap = 1
148 cur = 0
150 cur = 0
149 count = 0
151 count = 0
150 blocksize = self.s * 256
152 blocksize = self.s * 256
151 self.dataf.seek(0)
153 self.dataf.seek(0)
152 while cur < end:
154 while cur < end:
153 data = self.dataf.read(blocksize)
155 data = self.dataf.read(blocksize)
154 off = 0
156 off = 0
155 for x in xrange(256):
157 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
158 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
159 self.map[n] = count
158 count += 1
160 count += 1
159 if count >= self.l:
161 if count >= self.l:
160 break
162 break
161 off += self.s
163 off += self.s
162 cur += blocksize
164 cur += blocksize
163
165
164 def loadblock(self, blockstart, blocksize, data=None):
166 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
167 if self.all:
166 return
168 return
167 if data is None:
169 if data is None:
168 self.dataf.seek(blockstart)
170 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
171 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
172 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
173 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
174 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
175 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
176 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
177 lend = len(data) / self.s
176 i = blockstart / self.s
178 i = blockstart / self.s
177 off = 0
179 off = 0
178 # lazyindex supports __delitem__
180 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
181 if lend > len(self.index) - i:
180 lend = len(self.index) - i
182 lend = len(self.index) - i
181 for x in xrange(lend):
183 for x in xrange(lend):
182 if self.index[i + x] is None:
184 if self.index[i + x] is None:
183 b = data[off : off + self.s]
185 b = data[off : off + self.s]
184 self.index[i + x] = b
186 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
187 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
188 self.map[n] = i + x
187 off += self.s
189 off += self.s
188
190
189 def findnode(self, node):
191 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
192 """search backwards through the index file for a specific node"""
191 if self.allmap:
193 if self.allmap:
192 return None
194 return None
193
195
194 # hg log will cause many many searches for the manifest
196 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
197 # nodes. After we get called a few times, just load the whole
196 # thing.
198 # thing.
197 if self.mapfind_count > 8:
199 if self.mapfind_count > 8:
198 self.loadmap()
200 self.loadmap()
199 if node in self.map:
201 if node in self.map:
200 return node
202 return node
201 return None
203 return None
202 self.mapfind_count += 1
204 self.mapfind_count += 1
203 last = self.l - 1
205 last = self.l - 1
204 while self.index[last] != None:
206 while self.index[last] != None:
205 if last == 0:
207 if last == 0:
206 self.all = 1
208 self.all = 1
207 self.allmap = 1
209 self.allmap = 1
208 return None
210 return None
209 last -= 1
211 last -= 1
210 end = (last + 1) * self.s
212 end = (last + 1) * self.s
211 blocksize = self.s * 256
213 blocksize = self.s * 256
212 while end >= 0:
214 while end >= 0:
213 start = max(end - blocksize, 0)
215 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
216 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
217 data = self.dataf.read(end - start)
216 findend = end - start
218 findend = end - start
217 while True:
219 while True:
218 # we're searching backwards, so we have to make sure
220 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
221 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
222 off = data.find(node, 0, findend)
221 findend = off
223 findend = off
222 if off >= 0:
224 if off >= 0:
223 i = off / self.s
225 i = off / self.s
224 off = i * self.s
226 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
227 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
228 if n == node:
227 self.map[n] = i + start / self.s
229 self.map[n] = i + start / self.s
228 return node
230 return node
229 else:
231 else:
230 break
232 break
231 end -= blocksize
233 end -= blocksize
232 return None
234 return None
233
235
234 def loadindex(self, i=None, end=None):
236 def loadindex(self, i=None, end=None):
235 if self.all:
237 if self.all:
236 return
238 return
237 all = False
239 all = False
238 if i is None:
240 if i is None:
239 blockstart = 0
241 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
242 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
243 end = self.datasize
242 all = True
244 all = True
243 else:
245 else:
244 if end:
246 if end:
245 blockstart = i * self.s
247 blockstart = i * self.s
246 end = end * self.s
248 end = end * self.s
247 blocksize = end - blockstart
249 blocksize = end - blockstart
248 else:
250 else:
249 blockstart = (i & ~1023) * self.s
251 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
252 blocksize = self.s * 1024
251 end = blockstart + blocksize
253 end = blockstart + blocksize
252 while blockstart < end:
254 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
255 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
256 blockstart += blocksize
255 if all:
257 if all:
256 self.all = True
258 self.all = True
257
259
258 class lazyindex(object):
260 class lazyindex(object):
259 """a lazy version of the index array"""
261 """a lazy version of the index array"""
260 def __init__(self, parser):
262 def __init__(self, parser):
261 self.p = parser
263 self.p = parser
262 def __len__(self):
264 def __len__(self):
263 return len(self.p.index)
265 return len(self.p.index)
264 def load(self, pos):
266 def load(self, pos):
265 if pos < 0:
267 if pos < 0:
266 pos += len(self.p.index)
268 pos += len(self.p.index)
267 self.p.loadindex(pos)
269 self.p.loadindex(pos)
268 return self.p.index[pos]
270 return self.p.index[pos]
269 def __getitem__(self, pos):
271 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
272 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
273 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
274 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
275 def __delitem__(self, pos):
274 del self.p.index[pos]
276 del self.p.index[pos]
275 def insert(self, pos, e):
277 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
278 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
279 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
280 self.p.index.append(_pack(indexformatng, *e))
279
281
280 class lazymap(object):
282 class lazymap(object):
281 """a lazy version of the node map"""
283 """a lazy version of the node map"""
282 def __init__(self, parser):
284 def __init__(self, parser):
283 self.p = parser
285 self.p = parser
284 def load(self, key):
286 def load(self, key):
285 n = self.p.findnode(key)
287 n = self.p.findnode(key)
286 if n is None:
288 if n is None:
287 raise KeyError(key)
289 raise KeyError(key)
288 def __contains__(self, key):
290 def __contains__(self, key):
289 if key in self.p.map:
291 if key in self.p.map:
290 return True
292 return True
291 self.p.loadmap()
293 self.p.loadmap()
292 return key in self.p.map
294 return key in self.p.map
293 def __iter__(self):
295 def __iter__(self):
294 yield nullid
296 yield nullid
295 for i in xrange(self.p.l):
297 for i in xrange(self.p.l):
296 ret = self.p.index[i]
298 ret = self.p.index[i]
297 if not ret:
299 if not ret:
298 self.p.loadindex(i)
300 self.p.loadindex(i)
299 ret = self.p.index[i]
301 ret = self.p.index[i]
300 if isinstance(ret, str):
302 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
303 ret = _unpack(indexformatng, ret)
302 yield ret[7]
304 yield ret[7]
303 def __getitem__(self, key):
305 def __getitem__(self, key):
304 try:
306 try:
305 return self.p.map[key]
307 return self.p.map[key]
306 except KeyError:
308 except KeyError:
307 try:
309 try:
308 self.load(key)
310 self.load(key)
309 return self.p.map[key]
311 return self.p.map[key]
310 except KeyError:
312 except KeyError:
311 raise KeyError("node " + hex(key))
313 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
314 def __setitem__(self, key, val):
313 self.p.map[key] = val
315 self.p.map[key] = val
314 def __delitem__(self, key):
316 def __delitem__(self, key):
315 del self.p.map[key]
317 del self.p.map[key]
316
318
317 indexformatv0 = ">4l20s20s20s"
319 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
320 v0shaoffset = 56
319
321
320 class revlogoldio(object):
322 class revlogoldio(object):
321 def __init__(self):
323 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
324 self.size = struct.calcsize(indexformatv0)
323
325
324 def parseindex(self, fp, data, inline):
326 def parseindex(self, fp, data, inline):
325 s = self.size
327 s = self.size
326 index = []
328 index = []
327 nodemap = {nullid: nullrev}
329 nodemap = {nullid: nullrev}
328 n = off = 0
330 n = off = 0
329 if len(data) == _prereadsize:
331 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
332 data += fp.read() # read the rest
331 l = len(data)
333 l = len(data)
332 while off + s <= l:
334 while off + s <= l:
333 cur = data[off:off + s]
335 cur = data[off:off + s]
334 off += s
336 off += s
335 e = _unpack(indexformatv0, cur)
337 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
338 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
339 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
340 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
341 index.append(e2)
340 nodemap[e[6]] = n
342 nodemap[e[6]] = n
341 n += 1
343 n += 1
342
344
343 return index, nodemap, None
345 return index, nodemap, None
344
346
345 def packentry(self, entry, node, version, rev):
347 def packentry(self, entry, node, version, rev):
346 if gettype(entry[0]):
348 if gettype(entry[0]):
347 raise RevlogError(_("index entry flags need RevlogNG"))
349 raise RevlogError(_("index entry flags need RevlogNG"))
348 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
350 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
349 node(entry[5]), node(entry[6]), entry[7])
351 node(entry[5]), node(entry[6]), entry[7])
350 return _pack(indexformatv0, *e2)
352 return _pack(indexformatv0, *e2)
351
353
352 # index ng:
354 # index ng:
353 # 6 bytes offset
355 # 6 bytes offset
354 # 2 bytes flags
356 # 2 bytes flags
355 # 4 bytes compressed length
357 # 4 bytes compressed length
356 # 4 bytes uncompressed length
358 # 4 bytes uncompressed length
357 # 4 bytes: base rev
359 # 4 bytes: base rev
358 # 4 bytes link rev
360 # 4 bytes link rev
359 # 4 bytes parent 1 rev
361 # 4 bytes parent 1 rev
360 # 4 bytes parent 2 rev
362 # 4 bytes parent 2 rev
361 # 32 bytes: nodeid
363 # 32 bytes: nodeid
362 indexformatng = ">Qiiiiii20s12x"
364 indexformatng = ">Qiiiiii20s12x"
363 ngshaoffset = 32
365 ngshaoffset = 32
364 versionformat = ">I"
366 versionformat = ">I"
365
367
366 class revlogio(object):
368 class revlogio(object):
367 def __init__(self):
369 def __init__(self):
368 self.size = struct.calcsize(indexformatng)
370 self.size = struct.calcsize(indexformatng)
369
371
370 def parseindex(self, fp, data, inline):
372 def parseindex(self, fp, data, inline):
371 if len(data) == _prereadsize:
373 if len(data) == _prereadsize:
372 if util.openhardlinks() and not inline:
374 if util.openhardlinks() and not inline:
373 # big index, let's parse it on demand
375 # big index, let's parse it on demand
374 parser = lazyparser(fp)
376 parser = lazyparser(fp)
375 index = lazyindex(parser)
377 index = lazyindex(parser)
376 nodemap = lazymap(parser)
378 nodemap = lazymap(parser)
377 e = list(index[0])
379 e = list(index[0])
378 type = gettype(e[0])
380 type = gettype(e[0])
379 e[0] = offset_type(0, type)
381 e[0] = offset_type(0, type)
380 index[0] = e
382 index[0] = e
381 return index, nodemap, None
383 return index, nodemap, None
382 else:
384 else:
383 data += fp.read()
385 data += fp.read()
384
386
385 # call the C implementation to parse the index data
387 # call the C implementation to parse the index data
386 index, nodemap, cache = parsers.parse_index(data, inline)
388 index, nodemap, cache = parsers.parse_index(data, inline)
387 return index, nodemap, cache
389 return index, nodemap, cache
388
390
389 def packentry(self, entry, node, version, rev):
391 def packentry(self, entry, node, version, rev):
390 p = _pack(indexformatng, *entry)
392 p = _pack(indexformatng, *entry)
391 if rev == 0:
393 if rev == 0:
392 p = _pack(versionformat, version) + p[4:]
394 p = _pack(versionformat, version) + p[4:]
393 return p
395 return p
394
396
395 class revlog(object):
397 class revlog(object):
396 """
398 """
397 the underlying revision storage object
399 the underlying revision storage object
398
400
399 A revlog consists of two parts, an index and the revision data.
401 A revlog consists of two parts, an index and the revision data.
400
402
401 The index is a file with a fixed record size containing
403 The index is a file with a fixed record size containing
402 information on each revision, including its nodeid (hash), the
404 information on each revision, including its nodeid (hash), the
403 nodeids of its parents, the position and offset of its data within
405 nodeids of its parents, the position and offset of its data within
404 the data file, and the revision it's based on. Finally, each entry
406 the data file, and the revision it's based on. Finally, each entry
405 contains a linkrev entry that can serve as a pointer to external
407 contains a linkrev entry that can serve as a pointer to external
406 data.
408 data.
407
409
408 The revision data itself is a linear collection of data chunks.
410 The revision data itself is a linear collection of data chunks.
409 Each chunk represents a revision and is usually represented as a
411 Each chunk represents a revision and is usually represented as a
410 delta against the previous chunk. To bound lookup time, runs of
412 delta against the previous chunk. To bound lookup time, runs of
411 deltas are limited to about 2 times the length of the original
413 deltas are limited to about 2 times the length of the original
412 version data. This makes retrieval of a version proportional to
414 version data. This makes retrieval of a version proportional to
413 its size, or O(1) relative to the number of revisions.
415 its size, or O(1) relative to the number of revisions.
414
416
415 Both pieces of the revlog are written to in an append-only
417 Both pieces of the revlog are written to in an append-only
416 fashion, which means we never need to rewrite a file to insert or
418 fashion, which means we never need to rewrite a file to insert or
417 remove data, and can use some simple techniques to avoid the need
419 remove data, and can use some simple techniques to avoid the need
418 for locking while reading.
420 for locking while reading.
419 """
421 """
420 def __init__(self, opener, indexfile):
422 def __init__(self, opener, indexfile):
421 """
423 """
422 create a revlog object
424 create a revlog object
423
425
424 opener is a function that abstracts the file opening operation
426 opener is a function that abstracts the file opening operation
425 and can be used to implement COW semantics or the like.
427 and can be used to implement COW semantics or the like.
426 """
428 """
427 self.indexfile = indexfile
429 self.indexfile = indexfile
428 self.datafile = indexfile[:-2] + ".d"
430 self.datafile = indexfile[:-2] + ".d"
429 self.opener = opener
431 self.opener = opener
430 self._cache = None
432 self._cache = None
431 self._chunkcache = (0, '')
433 self._chunkcache = (0, '')
432 self.nodemap = {nullid: nullrev}
434 self.nodemap = {nullid: nullrev}
433 self.index = []
435 self.index = []
434
436
435 v = REVLOG_DEFAULT_VERSION
437 v = REVLOG_DEFAULT_VERSION
436 if hasattr(opener, 'options') and 'defversion' in opener.options:
438 if hasattr(opener, 'options') and 'defversion' in opener.options:
437 v = opener.options['defversion']
439 v = opener.options['defversion']
438 if v & REVLOGNG:
440 if v & REVLOGNG:
439 v |= REVLOGNGINLINEDATA
441 v |= REVLOGNGINLINEDATA
440
442
441 i = ''
443 i = ''
442 try:
444 try:
443 f = self.opener(self.indexfile)
445 f = self.opener(self.indexfile)
444 i = f.read(_prereadsize)
446 i = f.read(_prereadsize)
445 if len(i) > 0:
447 if len(i) > 0:
446 v = struct.unpack(versionformat, i[:4])[0]
448 v = struct.unpack(versionformat, i[:4])[0]
447 except IOError, inst:
449 except IOError, inst:
448 if inst.errno != errno.ENOENT:
450 if inst.errno != errno.ENOENT:
449 raise
451 raise
450
452
451 self.version = v
453 self.version = v
452 self._inline = v & REVLOGNGINLINEDATA
454 self._inline = v & REVLOGNGINLINEDATA
453 flags = v & ~0xFFFF
455 flags = v & ~0xFFFF
454 fmt = v & 0xFFFF
456 fmt = v & 0xFFFF
455 if fmt == REVLOGV0 and flags:
457 if fmt == REVLOGV0 and flags:
456 raise RevlogError(_("index %s unknown flags %#04x for format v0")
458 raise RevlogError(_("index %s unknown flags %#04x for format v0")
457 % (self.indexfile, flags >> 16))
459 % (self.indexfile, flags >> 16))
458 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
460 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
459 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
461 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
460 % (self.indexfile, flags >> 16))
462 % (self.indexfile, flags >> 16))
461 elif fmt > REVLOGNG:
463 elif fmt > REVLOGNG:
462 raise RevlogError(_("index %s unknown format %d")
464 raise RevlogError(_("index %s unknown format %d")
463 % (self.indexfile, fmt))
465 % (self.indexfile, fmt))
464
466
465 self._io = revlogio()
467 self._io = revlogio()
466 if self.version == REVLOGV0:
468 if self.version == REVLOGV0:
467 self._io = revlogoldio()
469 self._io = revlogoldio()
468 if i:
470 if i:
469 try:
471 try:
470 d = self._io.parseindex(f, i, self._inline)
472 d = self._io.parseindex(f, i, self._inline)
471 except (ValueError, IndexError):
473 except (ValueError, IndexError):
472 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
474 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
473 self.index, self.nodemap, self._chunkcache = d
475 self.index, self.nodemap, self._chunkcache = d
474 if not self._chunkcache:
476 if not self._chunkcache:
475 self._chunkclear()
477 self._chunkclear()
476
478
477 # add the magic null revision at -1 (if it hasn't been done already)
479 # add the magic null revision at -1 (if it hasn't been done already)
478 if (self.index == [] or isinstance(self.index, lazyindex) or
480 if (self.index == [] or isinstance(self.index, lazyindex) or
479 self.index[-1][7] != nullid) :
481 self.index[-1][7] != nullid) :
480 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
482 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
481
483
482 def _loadindex(self, start, end):
484 def _loadindex(self, start, end):
483 """load a block of indexes all at once from the lazy parser"""
485 """load a block of indexes all at once from the lazy parser"""
484 if isinstance(self.index, lazyindex):
486 if isinstance(self.index, lazyindex):
485 self.index.p.loadindex(start, end)
487 self.index.p.loadindex(start, end)
486
488
487 def _loadindexmap(self):
489 def _loadindexmap(self):
488 """loads both the map and the index from the lazy parser"""
490 """loads both the map and the index from the lazy parser"""
489 if isinstance(self.index, lazyindex):
491 if isinstance(self.index, lazyindex):
490 p = self.index.p
492 p = self.index.p
491 p.loadindex()
493 p.loadindex()
492 self.nodemap = p.map
494 self.nodemap = p.map
493
495
494 def _loadmap(self):
496 def _loadmap(self):
495 """loads the map from the lazy parser"""
497 """loads the map from the lazy parser"""
496 if isinstance(self.nodemap, lazymap):
498 if isinstance(self.nodemap, lazymap):
497 self.nodemap.p.loadmap()
499 self.nodemap.p.loadmap()
498 self.nodemap = self.nodemap.p.map
500 self.nodemap = self.nodemap.p.map
499
501
500 def tip(self):
502 def tip(self):
501 return self.node(len(self.index) - 2)
503 return self.node(len(self.index) - 2)
502 def __len__(self):
504 def __len__(self):
503 return len(self.index) - 1
505 return len(self.index) - 1
504 def __iter__(self):
506 def __iter__(self):
505 for i in xrange(len(self)):
507 for i in xrange(len(self)):
506 yield i
508 yield i
507 def rev(self, node):
509 def rev(self, node):
508 try:
510 try:
509 return self.nodemap[node]
511 return self.nodemap[node]
510 except KeyError:
512 except KeyError:
511 raise LookupError(node, self.indexfile, _('no node'))
513 raise LookupError(node, self.indexfile, _('no node'))
512 def node(self, rev):
514 def node(self, rev):
513 return self.index[rev][7]
515 return self.index[rev][7]
514 def linkrev(self, rev):
516 def linkrev(self, rev):
515 return self.index[rev][4]
517 return self.index[rev][4]
516 def parents(self, node):
518 def parents(self, node):
517 i = self.index
519 i = self.index
518 d = i[self.rev(node)]
520 d = i[self.rev(node)]
519 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
521 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
520 def parentrevs(self, rev):
522 def parentrevs(self, rev):
521 return self.index[rev][5:7]
523 return self.index[rev][5:7]
522 def start(self, rev):
524 def start(self, rev):
523 return int(self.index[rev][0] >> 16)
525 return int(self.index[rev][0] >> 16)
524 def end(self, rev):
526 def end(self, rev):
525 return self.start(rev) + self.length(rev)
527 return self.start(rev) + self.length(rev)
526 def length(self, rev):
528 def length(self, rev):
527 return self.index[rev][1]
529 return self.index[rev][1]
528 def base(self, rev):
530 def base(self, rev):
529 return self.index[rev][3]
531 return self.index[rev][3]
530
532
531 def size(self, rev):
533 def size(self, rev):
532 """return the length of the uncompressed text for a given revision"""
534 """return the length of the uncompressed text for a given revision"""
533 l = self.index[rev][2]
535 l = self.index[rev][2]
534 if l >= 0:
536 if l >= 0:
535 return l
537 return l
536
538
537 t = self.revision(self.node(rev))
539 t = self.revision(self.node(rev))
538 return len(t)
540 return len(t)
539
541
540 def reachable(self, node, stop=None):
542 def reachable(self, node, stop=None):
541 """return the set of all nodes ancestral to a given node, including
543 """return the set of all nodes ancestral to a given node, including
542 the node itself, stopping when stop is matched"""
544 the node itself, stopping when stop is matched"""
543 reachable = set((node,))
545 reachable = set((node,))
544 visit = [node]
546 visit = [node]
545 if stop:
547 if stop:
546 stopn = self.rev(stop)
548 stopn = self.rev(stop)
547 else:
549 else:
548 stopn = 0
550 stopn = 0
549 while visit:
551 while visit:
550 n = visit.pop(0)
552 n = visit.pop(0)
551 if n == stop:
553 if n == stop:
552 continue
554 continue
553 if n == nullid:
555 if n == nullid:
554 continue
556 continue
555 for p in self.parents(n):
557 for p in self.parents(n):
556 if self.rev(p) < stopn:
558 if self.rev(p) < stopn:
557 continue
559 continue
558 if p not in reachable:
560 if p not in reachable:
559 reachable.add(p)
561 reachable.add(p)
560 visit.append(p)
562 visit.append(p)
561 return reachable
563 return reachable
562
564
563 def ancestors(self, *revs):
565 def ancestors(self, *revs):
564 """Generate the ancestors of 'revs' in reverse topological order.
566 """Generate the ancestors of 'revs' in reverse topological order.
565
567
566 Yield a sequence of revision numbers starting with the parents
568 Yield a sequence of revision numbers starting with the parents
567 of each revision in revs, i.e., each revision is *not* considered
569 of each revision in revs, i.e., each revision is *not* considered
568 an ancestor of itself. Results are in breadth-first order:
570 an ancestor of itself. Results are in breadth-first order:
569 parents of each rev in revs, then parents of those, etc. Result
571 parents of each rev in revs, then parents of those, etc. Result
570 does not include the null revision."""
572 does not include the null revision."""
571 visit = list(revs)
573 visit = list(revs)
572 seen = set([nullrev])
574 seen = set([nullrev])
573 while visit:
575 while visit:
574 for parent in self.parentrevs(visit.pop(0)):
576 for parent in self.parentrevs(visit.pop(0)):
575 if parent not in seen:
577 if parent not in seen:
576 visit.append(parent)
578 visit.append(parent)
577 seen.add(parent)
579 seen.add(parent)
578 yield parent
580 yield parent
579
581
580 def descendants(self, *revs):
582 def descendants(self, *revs):
581 """Generate the descendants of 'revs' in revision order.
583 """Generate the descendants of 'revs' in revision order.
582
584
583 Yield a sequence of revision numbers starting with a child of
585 Yield a sequence of revision numbers starting with a child of
584 some rev in revs, i.e., each revision is *not* considered a
586 some rev in revs, i.e., each revision is *not* considered a
585 descendant of itself. Results are ordered by revision number (a
587 descendant of itself. Results are ordered by revision number (a
586 topological sort)."""
588 topological sort)."""
587 seen = set(revs)
589 seen = set(revs)
588 for i in xrange(min(revs) + 1, len(self)):
590 for i in xrange(min(revs) + 1, len(self)):
589 for x in self.parentrevs(i):
591 for x in self.parentrevs(i):
590 if x != nullrev and x in seen:
592 if x != nullrev and x in seen:
591 seen.add(i)
593 seen.add(i)
592 yield i
594 yield i
593 break
595 break
594
596
595 def findmissing(self, common=None, heads=None):
597 def findmissing(self, common=None, heads=None):
596 """Return the ancestors of heads that are not ancestors of common.
598 """Return the ancestors of heads that are not ancestors of common.
597
599
598 More specifically, return a list of nodes N such that every N
600 More specifically, return a list of nodes N such that every N
599 satisfies the following constraints:
601 satisfies the following constraints:
600
602
601 1. N is an ancestor of some node in 'heads'
603 1. N is an ancestor of some node in 'heads'
602 2. N is not an ancestor of any node in 'common'
604 2. N is not an ancestor of any node in 'common'
603
605
604 The list is sorted by revision number, meaning it is
606 The list is sorted by revision number, meaning it is
605 topologically sorted.
607 topologically sorted.
606
608
607 'heads' and 'common' are both lists of node IDs. If heads is
609 'heads' and 'common' are both lists of node IDs. If heads is
608 not supplied, uses all of the revlog's heads. If common is not
610 not supplied, uses all of the revlog's heads. If common is not
609 supplied, uses nullid."""
611 supplied, uses nullid."""
610 if common is None:
612 if common is None:
611 common = [nullid]
613 common = [nullid]
612 if heads is None:
614 if heads is None:
613 heads = self.heads()
615 heads = self.heads()
614
616
615 common = [self.rev(n) for n in common]
617 common = [self.rev(n) for n in common]
616 heads = [self.rev(n) for n in heads]
618 heads = [self.rev(n) for n in heads]
617
619
618 # we want the ancestors, but inclusive
620 # we want the ancestors, but inclusive
619 has = set(self.ancestors(*common))
621 has = set(self.ancestors(*common))
620 has.add(nullrev)
622 has.add(nullrev)
621 has.update(common)
623 has.update(common)
622
624
623 # take all ancestors from heads that aren't in has
625 # take all ancestors from heads that aren't in has
624 missing = set()
626 missing = set()
625 visit = [r for r in heads if r not in has]
627 visit = [r for r in heads if r not in has]
626 while visit:
628 while visit:
627 r = visit.pop(0)
629 r = visit.pop(0)
628 if r in missing:
630 if r in missing:
629 continue
631 continue
630 else:
632 else:
631 missing.add(r)
633 missing.add(r)
632 for p in self.parentrevs(r):
634 for p in self.parentrevs(r):
633 if p not in has:
635 if p not in has:
634 visit.append(p)
636 visit.append(p)
635 missing = list(missing)
637 missing = list(missing)
636 missing.sort()
638 missing.sort()
637 return [self.node(r) for r in missing]
639 return [self.node(r) for r in missing]
638
640
639 def nodesbetween(self, roots=None, heads=None):
641 def nodesbetween(self, roots=None, heads=None):
640 """Return a topological path from 'roots' to 'heads'.
642 """Return a topological path from 'roots' to 'heads'.
641
643
642 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
644 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
643 topologically sorted list of all nodes N that satisfy both of
645 topologically sorted list of all nodes N that satisfy both of
644 these constraints:
646 these constraints:
645
647
646 1. N is a descendant of some node in 'roots'
648 1. N is a descendant of some node in 'roots'
647 2. N is an ancestor of some node in 'heads'
649 2. N is an ancestor of some node in 'heads'
648
650
649 Every node is considered to be both a descendant and an ancestor
651 Every node is considered to be both a descendant and an ancestor
650 of itself, so every reachable node in 'roots' and 'heads' will be
652 of itself, so every reachable node in 'roots' and 'heads' will be
651 included in 'nodes'.
653 included in 'nodes'.
652
654
653 'outroots' is the list of reachable nodes in 'roots', i.e., the
655 'outroots' is the list of reachable nodes in 'roots', i.e., the
654 subset of 'roots' that is returned in 'nodes'. Likewise,
656 subset of 'roots' that is returned in 'nodes'. Likewise,
655 'outheads' is the subset of 'heads' that is also in 'nodes'.
657 'outheads' is the subset of 'heads' that is also in 'nodes'.
656
658
657 'roots' and 'heads' are both lists of node IDs. If 'roots' is
659 'roots' and 'heads' are both lists of node IDs. If 'roots' is
658 unspecified, uses nullid as the only root. If 'heads' is
660 unspecified, uses nullid as the only root. If 'heads' is
659 unspecified, uses list of all of the revlog's heads."""
661 unspecified, uses list of all of the revlog's heads."""
660 nonodes = ([], [], [])
662 nonodes = ([], [], [])
661 if roots is not None:
663 if roots is not None:
662 roots = list(roots)
664 roots = list(roots)
663 if not roots:
665 if not roots:
664 return nonodes
666 return nonodes
665 lowestrev = min([self.rev(n) for n in roots])
667 lowestrev = min([self.rev(n) for n in roots])
666 else:
668 else:
667 roots = [nullid] # Everybody's a descendent of nullid
669 roots = [nullid] # Everybody's a descendent of nullid
668 lowestrev = nullrev
670 lowestrev = nullrev
669 if (lowestrev == nullrev) and (heads is None):
671 if (lowestrev == nullrev) and (heads is None):
670 # We want _all_ the nodes!
672 # We want _all_ the nodes!
671 return ([self.node(r) for r in self], [nullid], list(self.heads()))
673 return ([self.node(r) for r in self], [nullid], list(self.heads()))
672 if heads is None:
674 if heads is None:
673 # All nodes are ancestors, so the latest ancestor is the last
675 # All nodes are ancestors, so the latest ancestor is the last
674 # node.
676 # node.
675 highestrev = len(self) - 1
677 highestrev = len(self) - 1
676 # Set ancestors to None to signal that every node is an ancestor.
678 # Set ancestors to None to signal that every node is an ancestor.
677 ancestors = None
679 ancestors = None
678 # Set heads to an empty dictionary for later discovery of heads
680 # Set heads to an empty dictionary for later discovery of heads
679 heads = {}
681 heads = {}
680 else:
682 else:
681 heads = list(heads)
683 heads = list(heads)
682 if not heads:
684 if not heads:
683 return nonodes
685 return nonodes
684 ancestors = set()
686 ancestors = set()
685 # Turn heads into a dictionary so we can remove 'fake' heads.
687 # Turn heads into a dictionary so we can remove 'fake' heads.
686 # Also, later we will be using it to filter out the heads we can't
688 # Also, later we will be using it to filter out the heads we can't
687 # find from roots.
689 # find from roots.
688 heads = dict.fromkeys(heads, 0)
690 heads = dict.fromkeys(heads, 0)
689 # Start at the top and keep marking parents until we're done.
691 # Start at the top and keep marking parents until we're done.
690 nodestotag = set(heads)
692 nodestotag = set(heads)
691 # Remember where the top was so we can use it as a limit later.
693 # Remember where the top was so we can use it as a limit later.
692 highestrev = max([self.rev(n) for n in nodestotag])
694 highestrev = max([self.rev(n) for n in nodestotag])
693 while nodestotag:
695 while nodestotag:
694 # grab a node to tag
696 # grab a node to tag
695 n = nodestotag.pop()
697 n = nodestotag.pop()
696 # Never tag nullid
698 # Never tag nullid
697 if n == nullid:
699 if n == nullid:
698 continue
700 continue
699 # A node's revision number represents its place in a
701 # A node's revision number represents its place in a
700 # topologically sorted list of nodes.
702 # topologically sorted list of nodes.
701 r = self.rev(n)
703 r = self.rev(n)
702 if r >= lowestrev:
704 if r >= lowestrev:
703 if n not in ancestors:
705 if n not in ancestors:
704 # If we are possibly a descendent of one of the roots
706 # If we are possibly a descendent of one of the roots
705 # and we haven't already been marked as an ancestor
707 # and we haven't already been marked as an ancestor
706 ancestors.add(n) # Mark as ancestor
708 ancestors.add(n) # Mark as ancestor
707 # Add non-nullid parents to list of nodes to tag.
709 # Add non-nullid parents to list of nodes to tag.
708 nodestotag.update([p for p in self.parents(n) if
710 nodestotag.update([p for p in self.parents(n) if
709 p != nullid])
711 p != nullid])
710 elif n in heads: # We've seen it before, is it a fake head?
712 elif n in heads: # We've seen it before, is it a fake head?
711 # So it is, real heads should not be the ancestors of
713 # So it is, real heads should not be the ancestors of
712 # any other heads.
714 # any other heads.
713 heads.pop(n)
715 heads.pop(n)
714 if not ancestors:
716 if not ancestors:
715 return nonodes
717 return nonodes
716 # Now that we have our set of ancestors, we want to remove any
718 # Now that we have our set of ancestors, we want to remove any
717 # roots that are not ancestors.
719 # roots that are not ancestors.
718
720
719 # If one of the roots was nullid, everything is included anyway.
721 # If one of the roots was nullid, everything is included anyway.
720 if lowestrev > nullrev:
722 if lowestrev > nullrev:
721 # But, since we weren't, let's recompute the lowest rev to not
723 # But, since we weren't, let's recompute the lowest rev to not
722 # include roots that aren't ancestors.
724 # include roots that aren't ancestors.
723
725
724 # Filter out roots that aren't ancestors of heads
726 # Filter out roots that aren't ancestors of heads
725 roots = [n for n in roots if n in ancestors]
727 roots = [n for n in roots if n in ancestors]
726 # Recompute the lowest revision
728 # Recompute the lowest revision
727 if roots:
729 if roots:
728 lowestrev = min([self.rev(n) for n in roots])
730 lowestrev = min([self.rev(n) for n in roots])
729 else:
731 else:
730 # No more roots? Return empty list
732 # No more roots? Return empty list
731 return nonodes
733 return nonodes
732 else:
734 else:
733 # We are descending from nullid, and don't need to care about
735 # We are descending from nullid, and don't need to care about
734 # any other roots.
736 # any other roots.
735 lowestrev = nullrev
737 lowestrev = nullrev
736 roots = [nullid]
738 roots = [nullid]
737 # Transform our roots list into a set.
739 # Transform our roots list into a set.
738 descendents = set(roots)
740 descendents = set(roots)
739 # Also, keep the original roots so we can filter out roots that aren't
741 # Also, keep the original roots so we can filter out roots that aren't
740 # 'real' roots (i.e. are descended from other roots).
742 # 'real' roots (i.e. are descended from other roots).
741 roots = descendents.copy()
743 roots = descendents.copy()
742 # Our topologically sorted list of output nodes.
744 # Our topologically sorted list of output nodes.
743 orderedout = []
745 orderedout = []
744 # Don't start at nullid since we don't want nullid in our output list,
746 # Don't start at nullid since we don't want nullid in our output list,
745 # and if nullid shows up in descedents, empty parents will look like
747 # and if nullid shows up in descedents, empty parents will look like
746 # they're descendents.
748 # they're descendents.
747 for r in xrange(max(lowestrev, 0), highestrev + 1):
749 for r in xrange(max(lowestrev, 0), highestrev + 1):
748 n = self.node(r)
750 n = self.node(r)
749 isdescendent = False
751 isdescendent = False
750 if lowestrev == nullrev: # Everybody is a descendent of nullid
752 if lowestrev == nullrev: # Everybody is a descendent of nullid
751 isdescendent = True
753 isdescendent = True
752 elif n in descendents:
754 elif n in descendents:
753 # n is already a descendent
755 # n is already a descendent
754 isdescendent = True
756 isdescendent = True
755 # This check only needs to be done here because all the roots
757 # This check only needs to be done here because all the roots
756 # will start being marked is descendents before the loop.
758 # will start being marked is descendents before the loop.
757 if n in roots:
759 if n in roots:
758 # If n was a root, check if it's a 'real' root.
760 # If n was a root, check if it's a 'real' root.
759 p = tuple(self.parents(n))
761 p = tuple(self.parents(n))
760 # If any of its parents are descendents, it's not a root.
762 # If any of its parents are descendents, it's not a root.
761 if (p[0] in descendents) or (p[1] in descendents):
763 if (p[0] in descendents) or (p[1] in descendents):
762 roots.remove(n)
764 roots.remove(n)
763 else:
765 else:
764 p = tuple(self.parents(n))
766 p = tuple(self.parents(n))
765 # A node is a descendent if either of its parents are
767 # A node is a descendent if either of its parents are
766 # descendents. (We seeded the dependents list with the roots
768 # descendents. (We seeded the dependents list with the roots
767 # up there, remember?)
769 # up there, remember?)
768 if (p[0] in descendents) or (p[1] in descendents):
770 if (p[0] in descendents) or (p[1] in descendents):
769 descendents.add(n)
771 descendents.add(n)
770 isdescendent = True
772 isdescendent = True
771 if isdescendent and ((ancestors is None) or (n in ancestors)):
773 if isdescendent and ((ancestors is None) or (n in ancestors)):
772 # Only include nodes that are both descendents and ancestors.
774 # Only include nodes that are both descendents and ancestors.
773 orderedout.append(n)
775 orderedout.append(n)
774 if (ancestors is not None) and (n in heads):
776 if (ancestors is not None) and (n in heads):
775 # We're trying to figure out which heads are reachable
777 # We're trying to figure out which heads are reachable
776 # from roots.
778 # from roots.
777 # Mark this head as having been reached
779 # Mark this head as having been reached
778 heads[n] = 1
780 heads[n] = 1
779 elif ancestors is None:
781 elif ancestors is None:
780 # Otherwise, we're trying to discover the heads.
782 # Otherwise, we're trying to discover the heads.
781 # Assume this is a head because if it isn't, the next step
783 # Assume this is a head because if it isn't, the next step
782 # will eventually remove it.
784 # will eventually remove it.
783 heads[n] = 1
785 heads[n] = 1
784 # But, obviously its parents aren't.
786 # But, obviously its parents aren't.
785 for p in self.parents(n):
787 for p in self.parents(n):
786 heads.pop(p, None)
788 heads.pop(p, None)
787 heads = [n for n in heads.iterkeys() if heads[n] != 0]
789 heads = [n for n in heads.iterkeys() if heads[n] != 0]
788 roots = list(roots)
790 roots = list(roots)
789 assert orderedout
791 assert orderedout
790 assert roots
792 assert roots
791 assert heads
793 assert heads
792 return (orderedout, roots, heads)
794 return (orderedout, roots, heads)
793
795
794 def heads(self, start=None, stop=None):
796 def heads(self, start=None, stop=None):
795 """return the list of all nodes that have no children
797 """return the list of all nodes that have no children
796
798
797 if start is specified, only heads that are descendants of
799 if start is specified, only heads that are descendants of
798 start will be returned
800 start will be returned
799 if stop is specified, it will consider all the revs from stop
801 if stop is specified, it will consider all the revs from stop
800 as if they had no children
802 as if they had no children
801 """
803 """
802 if start is None and stop is None:
804 if start is None and stop is None:
803 count = len(self)
805 count = len(self)
804 if not count:
806 if not count:
805 return [nullid]
807 return [nullid]
806 ishead = [1] * (count + 1)
808 ishead = [1] * (count + 1)
807 index = self.index
809 index = self.index
808 for r in xrange(count):
810 for r in xrange(count):
809 e = index[r]
811 e = index[r]
810 ishead[e[5]] = ishead[e[6]] = 0
812 ishead[e[5]] = ishead[e[6]] = 0
811 return [self.node(r) for r in xrange(count) if ishead[r]]
813 return [self.node(r) for r in xrange(count) if ishead[r]]
812
814
813 if start is None:
815 if start is None:
814 start = nullid
816 start = nullid
815 if stop is None:
817 if stop is None:
816 stop = []
818 stop = []
817 stoprevs = set([self.rev(n) for n in stop])
819 stoprevs = set([self.rev(n) for n in stop])
818 startrev = self.rev(start)
820 startrev = self.rev(start)
819 reachable = set((startrev,))
821 reachable = set((startrev,))
820 heads = set((startrev,))
822 heads = set((startrev,))
821
823
822 parentrevs = self.parentrevs
824 parentrevs = self.parentrevs
823 for r in xrange(startrev + 1, len(self)):
825 for r in xrange(startrev + 1, len(self)):
824 for p in parentrevs(r):
826 for p in parentrevs(r):
825 if p in reachable:
827 if p in reachable:
826 if r not in stoprevs:
828 if r not in stoprevs:
827 reachable.add(r)
829 reachable.add(r)
828 heads.add(r)
830 heads.add(r)
829 if p in heads and p not in stoprevs:
831 if p in heads and p not in stoprevs:
830 heads.remove(p)
832 heads.remove(p)
831
833
832 return [self.node(r) for r in heads]
834 return [self.node(r) for r in heads]
833
835
834 def children(self, node):
836 def children(self, node):
835 """find the children of a given node"""
837 """find the children of a given node"""
836 c = []
838 c = []
837 p = self.rev(node)
839 p = self.rev(node)
838 for r in range(p + 1, len(self)):
840 for r in range(p + 1, len(self)):
839 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
841 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
840 if prevs:
842 if prevs:
841 for pr in prevs:
843 for pr in prevs:
842 if pr == p:
844 if pr == p:
843 c.append(self.node(r))
845 c.append(self.node(r))
844 elif p == nullrev:
846 elif p == nullrev:
845 c.append(self.node(r))
847 c.append(self.node(r))
846 return c
848 return c
847
849
848 def descendant(self, start, end):
850 def descendant(self, start, end):
849 for i in self.descendants(start):
851 for i in self.descendants(start):
850 if i == end:
852 if i == end:
851 return True
853 return True
852 elif i > end:
854 elif i > end:
853 break
855 break
854 return False
856 return False
855
857
856 def ancestor(self, a, b):
858 def ancestor(self, a, b):
857 """calculate the least common ancestor of nodes a and b"""
859 """calculate the least common ancestor of nodes a and b"""
858
860
859 # fast path, check if it is a descendant
861 # fast path, check if it is a descendant
860 a, b = self.rev(a), self.rev(b)
862 a, b = self.rev(a), self.rev(b)
861 start, end = sorted((a, b))
863 start, end = sorted((a, b))
862 if self.descendant(start, end):
864 if self.descendant(start, end):
863 return self.node(start)
865 return self.node(start)
864
866
865 def parents(rev):
867 def parents(rev):
866 return [p for p in self.parentrevs(rev) if p != nullrev]
868 return [p for p in self.parentrevs(rev) if p != nullrev]
867
869
868 c = ancestor.ancestor(a, b, parents)
870 c = ancestor.ancestor(a, b, parents)
869 if c is None:
871 if c is None:
870 return nullid
872 return nullid
871
873
872 return self.node(c)
874 return self.node(c)
873
875
874 def _match(self, id):
876 def _match(self, id):
875 if isinstance(id, (long, int)):
877 if isinstance(id, (long, int)):
876 # rev
878 # rev
877 return self.node(id)
879 return self.node(id)
878 if len(id) == 20:
880 if len(id) == 20:
879 # possibly a binary node
881 # possibly a binary node
880 # odds of a binary node being all hex in ASCII are 1 in 10**25
882 # odds of a binary node being all hex in ASCII are 1 in 10**25
881 try:
883 try:
882 node = id
884 node = id
883 self.rev(node) # quick search the index
885 self.rev(node) # quick search the index
884 return node
886 return node
885 except LookupError:
887 except LookupError:
886 pass # may be partial hex id
888 pass # may be partial hex id
887 try:
889 try:
888 # str(rev)
890 # str(rev)
889 rev = int(id)
891 rev = int(id)
890 if str(rev) != id:
892 if str(rev) != id:
891 raise ValueError
893 raise ValueError
892 if rev < 0:
894 if rev < 0:
893 rev = len(self) + rev
895 rev = len(self) + rev
894 if rev < 0 or rev >= len(self):
896 if rev < 0 or rev >= len(self):
895 raise ValueError
897 raise ValueError
896 return self.node(rev)
898 return self.node(rev)
897 except (ValueError, OverflowError):
899 except (ValueError, OverflowError):
898 pass
900 pass
899 if len(id) == 40:
901 if len(id) == 40:
900 try:
902 try:
901 # a full hex nodeid?
903 # a full hex nodeid?
902 node = bin(id)
904 node = bin(id)
903 self.rev(node)
905 self.rev(node)
904 return node
906 return node
905 except (TypeError, LookupError):
907 except (TypeError, LookupError):
906 pass
908 pass
907
909
908 def _partialmatch(self, id):
910 def _partialmatch(self, id):
909 if len(id) < 40:
911 if len(id) < 40:
910 try:
912 try:
911 # hex(node)[:...]
913 # hex(node)[:...]
912 l = len(id) // 2 # grab an even number of digits
914 l = len(id) // 2 # grab an even number of digits
913 bin_id = bin(id[:l * 2])
915 bin_id = bin(id[:l * 2])
914 nl = [n for n in self.nodemap if n[:l] == bin_id]
916 nl = [n for n in self.nodemap if n[:l] == bin_id]
915 nl = [n for n in nl if hex(n).startswith(id)]
917 nl = [n for n in nl if hex(n).startswith(id)]
916 if len(nl) > 0:
918 if len(nl) > 0:
917 if len(nl) == 1:
919 if len(nl) == 1:
918 return nl[0]
920 return nl[0]
919 raise LookupError(id, self.indexfile,
921 raise LookupError(id, self.indexfile,
920 _('ambiguous identifier'))
922 _('ambiguous identifier'))
921 return None
923 return None
922 except TypeError:
924 except TypeError:
923 pass
925 pass
924
926
925 def lookup(self, id):
927 def lookup(self, id):
926 """locate a node based on:
928 """locate a node based on:
927 - revision number or str(revision number)
929 - revision number or str(revision number)
928 - nodeid or subset of hex nodeid
930 - nodeid or subset of hex nodeid
929 """
931 """
930 n = self._match(id)
932 n = self._match(id)
931 if n is not None:
933 if n is not None:
932 return n
934 return n
933 n = self._partialmatch(id)
935 n = self._partialmatch(id)
934 if n:
936 if n:
935 return n
937 return n
936
938
937 raise LookupError(id, self.indexfile, _('no match found'))
939 raise LookupError(id, self.indexfile, _('no match found'))
938
940
939 def cmp(self, node, text):
941 def cmp(self, node, text):
940 """compare text with a given file revision"""
942 """compare text with a given file revision"""
941 p1, p2 = self.parents(node)
943 p1, p2 = self.parents(node)
942 return hash(text, p1, p2) != node
944 return hash(text, p1, p2) != node
943
945
944 def _addchunk(self, offset, data):
946 def _addchunk(self, offset, data):
945 o, d = self._chunkcache
947 o, d = self._chunkcache
946 # try to add to existing cache
948 # try to add to existing cache
947 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
949 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
948 self._chunkcache = o, d + data
950 self._chunkcache = o, d + data
949 else:
951 else:
950 self._chunkcache = offset, data
952 self._chunkcache = offset, data
951
953
952 def _loadchunk(self, offset, length):
954 def _loadchunk(self, offset, length):
953 if self._inline:
955 if self._inline:
954 df = self.opener(self.indexfile)
956 df = self.opener(self.indexfile)
955 else:
957 else:
956 df = self.opener(self.datafile)
958 df = self.opener(self.datafile)
957
959
958 readahead = max(65536, length)
960 readahead = max(65536, length)
959 df.seek(offset)
961 df.seek(offset)
960 d = df.read(readahead)
962 d = df.read(readahead)
961 self._addchunk(offset, d)
963 self._addchunk(offset, d)
962 if readahead > length:
964 if readahead > length:
963 return d[:length]
965 return d[:length]
964 return d
966 return d
965
967
966 def _getchunk(self, offset, length):
968 def _getchunk(self, offset, length):
967 o, d = self._chunkcache
969 o, d = self._chunkcache
968 l = len(d)
970 l = len(d)
969
971
970 # is it in the cache?
972 # is it in the cache?
971 cachestart = offset - o
973 cachestart = offset - o
972 cacheend = cachestart + length
974 cacheend = cachestart + length
973 if cachestart >= 0 and cacheend <= l:
975 if cachestart >= 0 and cacheend <= l:
974 if cachestart == 0 and cacheend == l:
976 if cachestart == 0 and cacheend == l:
975 return d # avoid a copy
977 return d # avoid a copy
976 return d[cachestart:cacheend]
978 return d[cachestart:cacheend]
977
979
978 return self._loadchunk(offset, length)
980 return self._loadchunk(offset, length)
979
981
980 def _chunkraw(self, startrev, endrev):
982 def _chunkraw(self, startrev, endrev):
981 start = self.start(startrev)
983 start = self.start(startrev)
982 length = self.end(endrev) - start
984 length = self.end(endrev) - start
983 if self._inline:
985 if self._inline:
984 start += (startrev + 1) * self._io.size
986 start += (startrev + 1) * self._io.size
985 return self._getchunk(start, length)
987 return self._getchunk(start, length)
986
988
987 def _chunk(self, rev):
989 def _chunk(self, rev):
988 return decompress(self._chunkraw(rev, rev))
990 return decompress(self._chunkraw(rev, rev))
989
991
990 def _chunkclear(self):
992 def _chunkclear(self):
991 self._chunkcache = (0, '')
993 self._chunkcache = (0, '')
992
994
993 def revdiff(self, rev1, rev2):
995 def revdiff(self, rev1, rev2):
994 """return or calculate a delta between two revisions"""
996 """return or calculate a delta between two revisions"""
995 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
997 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
996 return self._chunk(rev2)
998 return self._chunk(rev2)
997
999
998 return mdiff.textdiff(self.revision(self.node(rev1)),
1000 return mdiff.textdiff(self.revision(self.node(rev1)),
999 self.revision(self.node(rev2)))
1001 self.revision(self.node(rev2)))
1000
1002
1001 def revision(self, node):
1003 def revision(self, node):
1002 """return an uncompressed revision of a given node"""
1004 """return an uncompressed revision of a given node"""
1003 if node == nullid:
1005 if node == nullid:
1004 return ""
1006 return ""
1005 if self._cache and self._cache[0] == node:
1007 if self._cache and self._cache[0] == node:
1006 return self._cache[2]
1008 return self._cache[2]
1007
1009
1008 # look up what we need to read
1010 # look up what we need to read
1009 text = None
1011 text = None
1010 rev = self.rev(node)
1012 rev = self.rev(node)
1011 base = self.base(rev)
1013 base = self.base(rev)
1012
1014
1013 # check rev flags
1015 # check rev flags
1014 if self.index[rev][0] & 0xFFFF:
1016 if self.index[rev][0] & 0xFFFF:
1015 raise RevlogError(_('incompatible revision flag %x') %
1017 raise RevlogError(_('incompatible revision flag %x') %
1016 (self.index[rev][0] & 0xFFFF))
1018 (self.index[rev][0] & 0xFFFF))
1017
1019
1018 # do we have useful data cached?
1020 # do we have useful data cached?
1019 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1021 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1020 base = self._cache[1]
1022 base = self._cache[1]
1021 text = self._cache[2]
1023 text = self._cache[2]
1022
1024
1023 self._loadindex(base, rev + 1)
1025 self._loadindex(base, rev + 1)
1024 self._chunkraw(base, rev)
1026 self._chunkraw(base, rev)
1025 if text is None:
1027 if text is None:
1026 text = self._chunk(base)
1028 text = self._chunk(base)
1027
1029
1028 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1030 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1029 text = mdiff.patches(text, bins)
1031 text = mdiff.patches(text, bins)
1030 p1, p2 = self.parents(node)
1032 p1, p2 = self.parents(node)
1031 if node != hash(text, p1, p2):
1033 if node != hash(text, p1, p2):
1032 raise RevlogError(_("integrity check failed on %s:%d")
1034 raise RevlogError(_("integrity check failed on %s:%d")
1033 % (self.indexfile, rev))
1035 % (self.indexfile, rev))
1034
1036
1035 self._cache = (node, rev, text)
1037 self._cache = (node, rev, text)
1036 return text
1038 return text
1037
1039
1038 def checkinlinesize(self, tr, fp=None):
1040 def checkinlinesize(self, tr, fp=None):
1039 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1041 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1040 return
1042 return
1041
1043
1042 trinfo = tr.find(self.indexfile)
1044 trinfo = tr.find(self.indexfile)
1043 if trinfo is None:
1045 if trinfo is None:
1044 raise RevlogError(_("%s not found in the transaction")
1046 raise RevlogError(_("%s not found in the transaction")
1045 % self.indexfile)
1047 % self.indexfile)
1046
1048
1047 trindex = trinfo[2]
1049 trindex = trinfo[2]
1048 dataoff = self.start(trindex)
1050 dataoff = self.start(trindex)
1049
1051
1050 tr.add(self.datafile, dataoff)
1052 tr.add(self.datafile, dataoff)
1051
1053
1052 if fp:
1054 if fp:
1053 fp.flush()
1055 fp.flush()
1054 fp.close()
1056 fp.close()
1055
1057
1056 df = self.opener(self.datafile, 'w')
1058 df = self.opener(self.datafile, 'w')
1057 try:
1059 try:
1058 for r in self:
1060 for r in self:
1059 df.write(self._chunkraw(r, r))
1061 df.write(self._chunkraw(r, r))
1060 finally:
1062 finally:
1061 df.close()
1063 df.close()
1062
1064
1063 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1065 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1064 self.version &= ~(REVLOGNGINLINEDATA)
1066 self.version &= ~(REVLOGNGINLINEDATA)
1065 self._inline = False
1067 self._inline = False
1066 for i in self:
1068 for i in self:
1067 e = self._io.packentry(self.index[i], self.node, self.version, i)
1069 e = self._io.packentry(self.index[i], self.node, self.version, i)
1068 fp.write(e)
1070 fp.write(e)
1069
1071
1070 # if we don't call rename, the temp file will never replace the
1072 # if we don't call rename, the temp file will never replace the
1071 # real index
1073 # real index
1072 fp.rename()
1074 fp.rename()
1073
1075
1074 tr.replace(self.indexfile, trindex * self._io.size)
1076 tr.replace(self.indexfile, trindex * self._io.size)
1075 self._chunkclear()
1077 self._chunkclear()
1076
1078
1077 def addrevision(self, text, transaction, link, p1, p2, d=None):
1079 def addrevision(self, text, transaction, link, p1, p2, d=None):
1078 """add a revision to the log
1080 """add a revision to the log
1079
1081
1080 text - the revision data to add
1082 text - the revision data to add
1081 transaction - the transaction object used for rollback
1083 transaction - the transaction object used for rollback
1082 link - the linkrev data to add
1084 link - the linkrev data to add
1083 p1, p2 - the parent nodeids of the revision
1085 p1, p2 - the parent nodeids of the revision
1084 d - an optional precomputed delta
1086 d - an optional precomputed delta
1085 """
1087 """
1086 dfh = None
1088 dfh = None
1087 if not self._inline:
1089 if not self._inline:
1088 dfh = self.opener(self.datafile, "a")
1090 dfh = self.opener(self.datafile, "a")
1089 ifh = self.opener(self.indexfile, "a+")
1091 ifh = self.opener(self.indexfile, "a+")
1090 try:
1092 try:
1091 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1093 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1092 finally:
1094 finally:
1093 if dfh:
1095 if dfh:
1094 dfh.close()
1096 dfh.close()
1095 ifh.close()
1097 ifh.close()
1096
1098
1097 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1099 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1098 node = hash(text, p1, p2)
1100 node = hash(text, p1, p2)
1099 if node in self.nodemap:
1101 if node in self.nodemap:
1100 return node
1102 return node
1101
1103
1102 curr = len(self)
1104 curr = len(self)
1103 prev = curr - 1
1105 prev = curr - 1
1104 base = self.base(prev)
1106 base = self.base(prev)
1105 offset = self.end(prev)
1107 offset = self.end(prev)
1106
1108
1107 if curr:
1109 if curr:
1108 if not d:
1110 if not d:
1109 ptext = self.revision(self.node(prev))
1111 ptext = self.revision(self.node(prev))
1110 d = mdiff.textdiff(ptext, text)
1112 d = mdiff.textdiff(ptext, text)
1111 data = compress(d)
1113 data = compress(d)
1112 l = len(data[1]) + len(data[0])
1114 l = len(data[1]) + len(data[0])
1113 dist = l + offset - self.start(base)
1115 dist = l + offset - self.start(base)
1114
1116
1115 # full versions are inserted when the needed deltas
1117 # full versions are inserted when the needed deltas
1116 # become comparable to the uncompressed text
1118 # become comparable to the uncompressed text
1117 if not curr or dist > len(text) * 2:
1119 if not curr or dist > len(text) * 2:
1118 data = compress(text)
1120 data = compress(text)
1119 l = len(data[1]) + len(data[0])
1121 l = len(data[1]) + len(data[0])
1120 base = curr
1122 base = curr
1121
1123
1122 e = (offset_type(offset, 0), l, len(text),
1124 e = (offset_type(offset, 0), l, len(text),
1123 base, link, self.rev(p1), self.rev(p2), node)
1125 base, link, self.rev(p1), self.rev(p2), node)
1124 self.index.insert(-1, e)
1126 self.index.insert(-1, e)
1125 self.nodemap[node] = curr
1127 self.nodemap[node] = curr
1126
1128
1127 entry = self._io.packentry(e, self.node, self.version, curr)
1129 entry = self._io.packentry(e, self.node, self.version, curr)
1128 if not self._inline:
1130 if not self._inline:
1129 transaction.add(self.datafile, offset)
1131 transaction.add(self.datafile, offset)
1130 transaction.add(self.indexfile, curr * len(entry))
1132 transaction.add(self.indexfile, curr * len(entry))
1131 if data[0]:
1133 if data[0]:
1132 dfh.write(data[0])
1134 dfh.write(data[0])
1133 dfh.write(data[1])
1135 dfh.write(data[1])
1134 dfh.flush()
1136 dfh.flush()
1135 ifh.write(entry)
1137 ifh.write(entry)
1136 else:
1138 else:
1137 offset += curr * self._io.size
1139 offset += curr * self._io.size
1138 transaction.add(self.indexfile, offset, curr)
1140 transaction.add(self.indexfile, offset, curr)
1139 ifh.write(entry)
1141 ifh.write(entry)
1140 ifh.write(data[0])
1142 ifh.write(data[0])
1141 ifh.write(data[1])
1143 ifh.write(data[1])
1142 self.checkinlinesize(transaction, ifh)
1144 self.checkinlinesize(transaction, ifh)
1143
1145
1144 if type(text) == str: # only accept immutable objects
1146 if type(text) == str: # only accept immutable objects
1145 self._cache = (node, curr, text)
1147 self._cache = (node, curr, text)
1146 return node
1148 return node
1147
1149
1148 def group(self, nodelist, lookup, infocollect=None):
1150 def group(self, nodelist, lookup, infocollect=None):
1149 """Calculate a delta group, yielding a sequence of changegroup chunks
1151 """Calculate a delta group, yielding a sequence of changegroup chunks
1150 (strings).
1152 (strings).
1151
1153
1152 Given a list of changeset revs, return a set of deltas and
1154 Given a list of changeset revs, return a set of deltas and
1153 metadata corresponding to nodes. the first delta is
1155 metadata corresponding to nodes. the first delta is
1154 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1156 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1155 have this parent as it has all history before these
1157 have this parent as it has all history before these
1156 changesets. parent is parent[0]
1158 changesets. parent is parent[0]
1157 """
1159 """
1158
1160
1159 revs = [self.rev(n) for n in nodelist]
1161 revs = [self.rev(n) for n in nodelist]
1160
1162
1161 # if we don't have any revisions touched by these changesets, bail
1163 # if we don't have any revisions touched by these changesets, bail
1162 if not revs:
1164 if not revs:
1163 yield changegroup.closechunk()
1165 yield changegroup.closechunk()
1164 return
1166 return
1165
1167
1166 # add the parent of the first rev
1168 # add the parent of the first rev
1167 p = self.parentrevs(revs[0])[0]
1169 p = self.parentrevs(revs[0])[0]
1168 revs.insert(0, p)
1170 revs.insert(0, p)
1169
1171
1170 # build deltas
1172 # build deltas
1171 for d in xrange(len(revs) - 1):
1173 for d in xrange(len(revs) - 1):
1172 a, b = revs[d], revs[d + 1]
1174 a, b = revs[d], revs[d + 1]
1173 nb = self.node(b)
1175 nb = self.node(b)
1174
1176
1175 if infocollect is not None:
1177 if infocollect is not None:
1176 infocollect(nb)
1178 infocollect(nb)
1177
1179
1178 p = self.parents(nb)
1180 p = self.parents(nb)
1179 meta = nb + p[0] + p[1] + lookup(nb)
1181 meta = nb + p[0] + p[1] + lookup(nb)
1180 if a == -1:
1182 if a == -1:
1181 d = self.revision(nb)
1183 d = self.revision(nb)
1182 meta += mdiff.trivialdiffheader(len(d))
1184 meta += mdiff.trivialdiffheader(len(d))
1183 else:
1185 else:
1184 d = self.revdiff(a, b)
1186 d = self.revdiff(a, b)
1185 yield changegroup.chunkheader(len(meta) + len(d))
1187 yield changegroup.chunkheader(len(meta) + len(d))
1186 yield meta
1188 yield meta
1187 if len(d) > 2**20:
1189 if len(d) > 2**20:
1188 pos = 0
1190 pos = 0
1189 while pos < len(d):
1191 while pos < len(d):
1190 pos2 = pos + 2 ** 18
1192 pos2 = pos + 2 ** 18
1191 yield d[pos:pos2]
1193 yield d[pos:pos2]
1192 pos = pos2
1194 pos = pos2
1193 else:
1195 else:
1194 yield d
1196 yield d
1195
1197
1196 yield changegroup.closechunk()
1198 yield changegroup.closechunk()
1197
1199
1198 def addgroup(self, revs, linkmapper, transaction):
1200 def addgroup(self, revs, linkmapper, transaction):
1199 """
1201 """
1200 add a delta group
1202 add a delta group
1201
1203
1202 given a set of deltas, add them to the revision log. the
1204 given a set of deltas, add them to the revision log. the
1203 first delta is against its parent, which should be in our
1205 first delta is against its parent, which should be in our
1204 log, the rest are against the previous delta.
1206 log, the rest are against the previous delta.
1205 """
1207 """
1206
1208
1207 #track the base of the current delta log
1209 #track the base of the current delta log
1208 r = len(self)
1210 r = len(self)
1209 t = r - 1
1211 t = r - 1
1210 node = None
1212 node = None
1211
1213
1212 base = prev = nullrev
1214 base = prev = nullrev
1213 start = end = textlen = 0
1215 start = end = textlen = 0
1214 if r:
1216 if r:
1215 end = self.end(t)
1217 end = self.end(t)
1216
1218
1217 ifh = self.opener(self.indexfile, "a+")
1219 ifh = self.opener(self.indexfile, "a+")
1218 isize = r * self._io.size
1220 isize = r * self._io.size
1219 if self._inline:
1221 if self._inline:
1220 transaction.add(self.indexfile, end + isize, r)
1222 transaction.add(self.indexfile, end + isize, r)
1221 dfh = None
1223 dfh = None
1222 else:
1224 else:
1223 transaction.add(self.indexfile, isize, r)
1225 transaction.add(self.indexfile, isize, r)
1224 transaction.add(self.datafile, end)
1226 transaction.add(self.datafile, end)
1225 dfh = self.opener(self.datafile, "a")
1227 dfh = self.opener(self.datafile, "a")
1226
1228
1227 try:
1229 try:
1228 # loop through our set of deltas
1230 # loop through our set of deltas
1229 chain = None
1231 chain = None
1230 for chunk in revs:
1232 for chunk in revs:
1231 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1233 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1232 link = linkmapper(cs)
1234 link = linkmapper(cs)
1233 if node in self.nodemap:
1235 if node in self.nodemap:
1234 # this can happen if two branches make the same change
1236 # this can happen if two branches make the same change
1235 chain = node
1237 chain = node
1236 continue
1238 continue
1237 delta = buffer(chunk, 80)
1239 delta = buffer(chunk, 80)
1238 del chunk
1240 del chunk
1239
1241
1240 for p in (p1, p2):
1242 for p in (p1, p2):
1241 if not p in self.nodemap:
1243 if not p in self.nodemap:
1242 raise LookupError(p, self.indexfile, _('unknown parent'))
1244 raise LookupError(p, self.indexfile, _('unknown parent'))
1243
1245
1244 if not chain:
1246 if not chain:
1245 # retrieve the parent revision of the delta chain
1247 # retrieve the parent revision of the delta chain
1246 chain = p1
1248 chain = p1
1247 if not chain in self.nodemap:
1249 if not chain in self.nodemap:
1248 raise LookupError(chain, self.indexfile, _('unknown base'))
1250 raise LookupError(chain, self.indexfile, _('unknown base'))
1249
1251
1250 # full versions are inserted when the needed deltas become
1252 # full versions are inserted when the needed deltas become
1251 # comparable to the uncompressed text or when the previous
1253 # comparable to the uncompressed text or when the previous
1252 # version is not the one we have a delta against. We use
1254 # version is not the one we have a delta against. We use
1253 # the size of the previous full rev as a proxy for the
1255 # the size of the previous full rev as a proxy for the
1254 # current size.
1256 # current size.
1255
1257
1256 if chain == prev:
1258 if chain == prev:
1257 cdelta = compress(delta)
1259 cdelta = compress(delta)
1258 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1260 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1259 textlen = mdiff.patchedsize(textlen, delta)
1261 textlen = mdiff.patchedsize(textlen, delta)
1260
1262
1261 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1263 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1262 # flush our writes here so we can read it in revision
1264 # flush our writes here so we can read it in revision
1263 if dfh:
1265 if dfh:
1264 dfh.flush()
1266 dfh.flush()
1265 ifh.flush()
1267 ifh.flush()
1266 text = self.revision(chain)
1268 text = self.revision(chain)
1267 if len(text) == 0:
1269 if len(text) == 0:
1268 # skip over trivial delta header
1270 # skip over trivial delta header
1269 text = buffer(delta, 12)
1271 text = buffer(delta, 12)
1270 else:
1272 else:
1271 text = mdiff.patches(text, [delta])
1273 text = mdiff.patches(text, [delta])
1272 del delta
1274 del delta
1273 chk = self._addrevision(text, transaction, link, p1, p2, None,
1275 chk = self._addrevision(text, transaction, link, p1, p2, None,
1274 ifh, dfh)
1276 ifh, dfh)
1275 if not dfh and not self._inline:
1277 if not dfh and not self._inline:
1276 # addrevision switched from inline to conventional
1278 # addrevision switched from inline to conventional
1277 # reopen the index
1279 # reopen the index
1278 dfh = self.opener(self.datafile, "a")
1280 dfh = self.opener(self.datafile, "a")
1279 ifh = self.opener(self.indexfile, "a")
1281 ifh = self.opener(self.indexfile, "a")
1280 if chk != node:
1282 if chk != node:
1281 raise RevlogError(_("consistency error adding group"))
1283 raise RevlogError(_("consistency error adding group"))
1282 textlen = len(text)
1284 textlen = len(text)
1283 else:
1285 else:
1284 e = (offset_type(end, 0), cdeltalen, textlen, base,
1286 e = (offset_type(end, 0), cdeltalen, textlen, base,
1285 link, self.rev(p1), self.rev(p2), node)
1287 link, self.rev(p1), self.rev(p2), node)
1286 self.index.insert(-1, e)
1288 self.index.insert(-1, e)
1287 self.nodemap[node] = r
1289 self.nodemap[node] = r
1288 entry = self._io.packentry(e, self.node, self.version, r)
1290 entry = self._io.packentry(e, self.node, self.version, r)
1289 if self._inline:
1291 if self._inline:
1290 ifh.write(entry)
1292 ifh.write(entry)
1291 ifh.write(cdelta[0])
1293 ifh.write(cdelta[0])
1292 ifh.write(cdelta[1])
1294 ifh.write(cdelta[1])
1293 self.checkinlinesize(transaction, ifh)
1295 self.checkinlinesize(transaction, ifh)
1294 if not self._inline:
1296 if not self._inline:
1295 dfh = self.opener(self.datafile, "a")
1297 dfh = self.opener(self.datafile, "a")
1296 ifh = self.opener(self.indexfile, "a")
1298 ifh = self.opener(self.indexfile, "a")
1297 else:
1299 else:
1298 dfh.write(cdelta[0])
1300 dfh.write(cdelta[0])
1299 dfh.write(cdelta[1])
1301 dfh.write(cdelta[1])
1300 ifh.write(entry)
1302 ifh.write(entry)
1301
1303
1302 t, r, chain, prev = r, r + 1, node, node
1304 t, r, chain, prev = r, r + 1, node, node
1303 base = self.base(t)
1305 base = self.base(t)
1304 start = self.start(base)
1306 start = self.start(base)
1305 end = self.end(t)
1307 end = self.end(t)
1306 finally:
1308 finally:
1307 if dfh:
1309 if dfh:
1308 dfh.close()
1310 dfh.close()
1309 ifh.close()
1311 ifh.close()
1310
1312
1311 return node
1313 return node
1312
1314
1313 def strip(self, minlink, transaction):
1315 def strip(self, minlink, transaction):
1314 """truncate the revlog on the first revision with a linkrev >= minlink
1316 """truncate the revlog on the first revision with a linkrev >= minlink
1315
1317
1316 This function is called when we're stripping revision minlink and
1318 This function is called when we're stripping revision minlink and
1317 its descendants from the repository.
1319 its descendants from the repository.
1318
1320
1319 We have to remove all revisions with linkrev >= minlink, because
1321 We have to remove all revisions with linkrev >= minlink, because
1320 the equivalent changelog revisions will be renumbered after the
1322 the equivalent changelog revisions will be renumbered after the
1321 strip.
1323 strip.
1322
1324
1323 So we truncate the revlog on the first of these revisions, and
1325 So we truncate the revlog on the first of these revisions, and
1324 trust that the caller has saved the revisions that shouldn't be
1326 trust that the caller has saved the revisions that shouldn't be
1325 removed and that it'll readd them after this truncation.
1327 removed and that it'll readd them after this truncation.
1326 """
1328 """
1327 if len(self) == 0:
1329 if len(self) == 0:
1328 return
1330 return
1329
1331
1330 if isinstance(self.index, lazyindex):
1332 if isinstance(self.index, lazyindex):
1331 self._loadindexmap()
1333 self._loadindexmap()
1332
1334
1333 for rev in self:
1335 for rev in self:
1334 if self.index[rev][4] >= minlink:
1336 if self.index[rev][4] >= minlink:
1335 break
1337 break
1336 else:
1338 else:
1337 return
1339 return
1338
1340
1339 # first truncate the files on disk
1341 # first truncate the files on disk
1340 end = self.start(rev)
1342 end = self.start(rev)
1341 if not self._inline:
1343 if not self._inline:
1342 transaction.add(self.datafile, end)
1344 transaction.add(self.datafile, end)
1343 end = rev * self._io.size
1345 end = rev * self._io.size
1344 else:
1346 else:
1345 end += rev * self._io.size
1347 end += rev * self._io.size
1346
1348
1347 transaction.add(self.indexfile, end)
1349 transaction.add(self.indexfile, end)
1348
1350
1349 # then reset internal state in memory to forget those revisions
1351 # then reset internal state in memory to forget those revisions
1350 self._cache = None
1352 self._cache = None
1351 self._chunkclear()
1353 self._chunkclear()
1352 for x in xrange(rev, len(self)):
1354 for x in xrange(rev, len(self)):
1353 del self.nodemap[self.node(x)]
1355 del self.nodemap[self.node(x)]
1354
1356
1355 del self.index[rev:-1]
1357 del self.index[rev:-1]
1356
1358
1357 def checksize(self):
1359 def checksize(self):
1358 expected = 0
1360 expected = 0
1359 if len(self):
1361 if len(self):
1360 expected = max(0, self.end(len(self) - 1))
1362 expected = max(0, self.end(len(self) - 1))
1361
1363
1362 try:
1364 try:
1363 f = self.opener(self.datafile)
1365 f = self.opener(self.datafile)
1364 f.seek(0, 2)
1366 f.seek(0, 2)
1365 actual = f.tell()
1367 actual = f.tell()
1366 dd = actual - expected
1368 dd = actual - expected
1367 except IOError, inst:
1369 except IOError, inst:
1368 if inst.errno != errno.ENOENT:
1370 if inst.errno != errno.ENOENT:
1369 raise
1371 raise
1370 dd = 0
1372 dd = 0
1371
1373
1372 try:
1374 try:
1373 f = self.opener(self.indexfile)
1375 f = self.opener(self.indexfile)
1374 f.seek(0, 2)
1376 f.seek(0, 2)
1375 actual = f.tell()
1377 actual = f.tell()
1376 s = self._io.size
1378 s = self._io.size
1377 i = max(0, actual // s)
1379 i = max(0, actual // s)
1378 di = actual - (i * s)
1380 di = actual - (i * s)
1379 if self._inline:
1381 if self._inline:
1380 databytes = 0
1382 databytes = 0
1381 for r in self:
1383 for r in self:
1382 databytes += max(0, self.length(r))
1384 databytes += max(0, self.length(r))
1383 dd = 0
1385 dd = 0
1384 di = actual - len(self) * s - databytes
1386 di = actual - len(self) * s - databytes
1385 except IOError, inst:
1387 except IOError, inst:
1386 if inst.errno != errno.ENOENT:
1388 if inst.errno != errno.ENOENT:
1387 raise
1389 raise
1388 di = 0
1390 di = 0
1389
1391
1390 return (dd, di)
1392 return (dd, di)
1391
1393
1392 def files(self):
1394 def files(self):
1393 res = [self.indexfile]
1395 res = [self.indexfile]
1394 if not self._inline:
1396 if not self._inline:
1395 res.append(self.datafile)
1397 res.append(self.datafile)
1396 return res
1398 return res
General Comments 0
You need to be logged in to leave comments. Login now