##// END OF EJS Templates
revlog: don't silently discard revlog flags on revlogv0
Benoit Boissinot -
r10395:ea52a2d4 default
parent child Browse files
Show More
@@ -1,1414 +1,1416 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog flags
26 # revlog flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33
33
34 _prereadsize = 1048576
34 _prereadsize = 1048576
35
35
36 RevlogError = error.RevlogError
36 RevlogError = error.RevlogError
37 LookupError = error.LookupError
37 LookupError = error.LookupError
38
38
39 def getoffset(q):
39 def getoffset(q):
40 return int(q >> 16)
40 return int(q >> 16)
41
41
42 def gettype(q):
42 def gettype(q):
43 return int(q & 0xFFFF)
43 return int(q & 0xFFFF)
44
44
45 def offset_type(offset, type):
45 def offset_type(offset, type):
46 return long(long(offset) << 16 | type)
46 return long(long(offset) << 16 | type)
47
47
48 nullhash = _sha(nullid)
48 nullhash = _sha(nullid)
49
49
50 def hash(text, p1, p2):
50 def hash(text, p1, p2):
51 """generate a hash from the given text and its parent hashes
51 """generate a hash from the given text and its parent hashes
52
52
53 This hash combines both the current file contents and its history
53 This hash combines both the current file contents and its history
54 in a manner that makes it easy to distinguish nodes with the same
54 in a manner that makes it easy to distinguish nodes with the same
55 content in the revision graph.
55 content in the revision graph.
56 """
56 """
57 # As of now, if one of the parent node is null, p2 is null
57 # As of now, if one of the parent node is null, p2 is null
58 if p2 == nullid:
58 if p2 == nullid:
59 # deep copy of a hash is faster than creating one
59 # deep copy of a hash is faster than creating one
60 s = nullhash.copy()
60 s = nullhash.copy()
61 s.update(p1)
61 s.update(p1)
62 else:
62 else:
63 # none of the parent nodes are nullid
63 # none of the parent nodes are nullid
64 l = [p1, p2]
64 l = [p1, p2]
65 l.sort()
65 l.sort()
66 s = _sha(l[0])
66 s = _sha(l[0])
67 s.update(l[1])
67 s.update(l[1])
68 s.update(text)
68 s.update(text)
69 return s.digest()
69 return s.digest()
70
70
71 def compress(text):
71 def compress(text):
72 """ generate a possibly-compressed representation of text """
72 """ generate a possibly-compressed representation of text """
73 if not text:
73 if not text:
74 return ("", text)
74 return ("", text)
75 l = len(text)
75 l = len(text)
76 bin = None
76 bin = None
77 if l < 44:
77 if l < 44:
78 pass
78 pass
79 elif l > 1000000:
79 elif l > 1000000:
80 # zlib makes an internal copy, thus doubling memory usage for
80 # zlib makes an internal copy, thus doubling memory usage for
81 # large files, so lets do this in pieces
81 # large files, so lets do this in pieces
82 z = zlib.compressobj()
82 z = zlib.compressobj()
83 p = []
83 p = []
84 pos = 0
84 pos = 0
85 while pos < l:
85 while pos < l:
86 pos2 = pos + 2**20
86 pos2 = pos + 2**20
87 p.append(z.compress(text[pos:pos2]))
87 p.append(z.compress(text[pos:pos2]))
88 pos = pos2
88 pos = pos2
89 p.append(z.flush())
89 p.append(z.flush())
90 if sum(map(len, p)) < l:
90 if sum(map(len, p)) < l:
91 bin = "".join(p)
91 bin = "".join(p)
92 else:
92 else:
93 bin = _compress(text)
93 bin = _compress(text)
94 if bin is None or len(bin) > l:
94 if bin is None or len(bin) > l:
95 if text[0] == '\0':
95 if text[0] == '\0':
96 return ("", text)
96 return ("", text)
97 return ('u', text)
97 return ('u', text)
98 return ("", bin)
98 return ("", bin)
99
99
100 def decompress(bin):
100 def decompress(bin):
101 """ decompress the given input """
101 """ decompress the given input """
102 if not bin:
102 if not bin:
103 return bin
103 return bin
104 t = bin[0]
104 t = bin[0]
105 if t == '\0':
105 if t == '\0':
106 return bin
106 return bin
107 if t == 'x':
107 if t == 'x':
108 return _decompress(bin)
108 return _decompress(bin)
109 if t == 'u':
109 if t == 'u':
110 return bin[1:]
110 return bin[1:]
111 raise RevlogError(_("unknown compression type %r") % t)
111 raise RevlogError(_("unknown compression type %r") % t)
112
112
113 class lazyparser(object):
113 class lazyparser(object):
114 """
114 """
115 this class avoids the need to parse the entirety of large indices
115 this class avoids the need to parse the entirety of large indices
116 """
116 """
117
117
118 # lazyparser is not safe to use on windows if win32 extensions not
118 # lazyparser is not safe to use on windows if win32 extensions not
119 # available. it keeps file handle open, which make it not possible
119 # available. it keeps file handle open, which make it not possible
120 # to break hardlinks on local cloned repos.
120 # to break hardlinks on local cloned repos.
121
121
122 def __init__(self, dataf):
122 def __init__(self, dataf):
123 try:
123 try:
124 size = util.fstat(dataf).st_size
124 size = util.fstat(dataf).st_size
125 except AttributeError:
125 except AttributeError:
126 size = 0
126 size = 0
127 self.dataf = dataf
127 self.dataf = dataf
128 self.s = struct.calcsize(indexformatng)
128 self.s = struct.calcsize(indexformatng)
129 self.datasize = size
129 self.datasize = size
130 self.l = size / self.s
130 self.l = size / self.s
131 self.index = [None] * self.l
131 self.index = [None] * self.l
132 self.map = {nullid: nullrev}
132 self.map = {nullid: nullrev}
133 self.allmap = 0
133 self.allmap = 0
134 self.all = 0
134 self.all = 0
135 self.mapfind_count = 0
135 self.mapfind_count = 0
136
136
137 def loadmap(self):
137 def loadmap(self):
138 """
138 """
139 during a commit, we need to make sure the rev being added is
139 during a commit, we need to make sure the rev being added is
140 not a duplicate. This requires loading the entire index,
140 not a duplicate. This requires loading the entire index,
141 which is fairly slow. loadmap can load up just the node map,
141 which is fairly slow. loadmap can load up just the node map,
142 which takes much less time.
142 which takes much less time.
143 """
143 """
144 if self.allmap:
144 if self.allmap:
145 return
145 return
146 end = self.datasize
146 end = self.datasize
147 self.allmap = 1
147 self.allmap = 1
148 cur = 0
148 cur = 0
149 count = 0
149 count = 0
150 blocksize = self.s * 256
150 blocksize = self.s * 256
151 self.dataf.seek(0)
151 self.dataf.seek(0)
152 while cur < end:
152 while cur < end:
153 data = self.dataf.read(blocksize)
153 data = self.dataf.read(blocksize)
154 off = 0
154 off = 0
155 for x in xrange(256):
155 for x in xrange(256):
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
156 n = data[off + ngshaoffset:off + ngshaoffset + 20]
157 self.map[n] = count
157 self.map[n] = count
158 count += 1
158 count += 1
159 if count >= self.l:
159 if count >= self.l:
160 break
160 break
161 off += self.s
161 off += self.s
162 cur += blocksize
162 cur += blocksize
163
163
164 def loadblock(self, blockstart, blocksize, data=None):
164 def loadblock(self, blockstart, blocksize, data=None):
165 if self.all:
165 if self.all:
166 return
166 return
167 if data is None:
167 if data is None:
168 self.dataf.seek(blockstart)
168 self.dataf.seek(blockstart)
169 if blockstart + blocksize > self.datasize:
169 if blockstart + blocksize > self.datasize:
170 # the revlog may have grown since we've started running,
170 # the revlog may have grown since we've started running,
171 # but we don't have space in self.index for more entries.
171 # but we don't have space in self.index for more entries.
172 # limit blocksize so that we don't get too much data.
172 # limit blocksize so that we don't get too much data.
173 blocksize = max(self.datasize - blockstart, 0)
173 blocksize = max(self.datasize - blockstart, 0)
174 data = self.dataf.read(blocksize)
174 data = self.dataf.read(blocksize)
175 lend = len(data) / self.s
175 lend = len(data) / self.s
176 i = blockstart / self.s
176 i = blockstart / self.s
177 off = 0
177 off = 0
178 # lazyindex supports __delitem__
178 # lazyindex supports __delitem__
179 if lend > len(self.index) - i:
179 if lend > len(self.index) - i:
180 lend = len(self.index) - i
180 lend = len(self.index) - i
181 for x in xrange(lend):
181 for x in xrange(lend):
182 if self.index[i + x] is None:
182 if self.index[i + x] is None:
183 b = data[off : off + self.s]
183 b = data[off : off + self.s]
184 self.index[i + x] = b
184 self.index[i + x] = b
185 n = b[ngshaoffset:ngshaoffset + 20]
185 n = b[ngshaoffset:ngshaoffset + 20]
186 self.map[n] = i + x
186 self.map[n] = i + x
187 off += self.s
187 off += self.s
188
188
189 def findnode(self, node):
189 def findnode(self, node):
190 """search backwards through the index file for a specific node"""
190 """search backwards through the index file for a specific node"""
191 if self.allmap:
191 if self.allmap:
192 return None
192 return None
193
193
194 # hg log will cause many many searches for the manifest
194 # hg log will cause many many searches for the manifest
195 # nodes. After we get called a few times, just load the whole
195 # nodes. After we get called a few times, just load the whole
196 # thing.
196 # thing.
197 if self.mapfind_count > 8:
197 if self.mapfind_count > 8:
198 self.loadmap()
198 self.loadmap()
199 if node in self.map:
199 if node in self.map:
200 return node
200 return node
201 return None
201 return None
202 self.mapfind_count += 1
202 self.mapfind_count += 1
203 last = self.l - 1
203 last = self.l - 1
204 while self.index[last] != None:
204 while self.index[last] != None:
205 if last == 0:
205 if last == 0:
206 self.all = 1
206 self.all = 1
207 self.allmap = 1
207 self.allmap = 1
208 return None
208 return None
209 last -= 1
209 last -= 1
210 end = (last + 1) * self.s
210 end = (last + 1) * self.s
211 blocksize = self.s * 256
211 blocksize = self.s * 256
212 while end >= 0:
212 while end >= 0:
213 start = max(end - blocksize, 0)
213 start = max(end - blocksize, 0)
214 self.dataf.seek(start)
214 self.dataf.seek(start)
215 data = self.dataf.read(end - start)
215 data = self.dataf.read(end - start)
216 findend = end - start
216 findend = end - start
217 while True:
217 while True:
218 # we're searching backwards, so we have to make sure
218 # we're searching backwards, so we have to make sure
219 # we don't find a changeset where this node is a parent
219 # we don't find a changeset where this node is a parent
220 off = data.find(node, 0, findend)
220 off = data.find(node, 0, findend)
221 findend = off
221 findend = off
222 if off >= 0:
222 if off >= 0:
223 i = off / self.s
223 i = off / self.s
224 off = i * self.s
224 off = i * self.s
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
225 n = data[off + ngshaoffset:off + ngshaoffset + 20]
226 if n == node:
226 if n == node:
227 self.map[n] = i + start / self.s
227 self.map[n] = i + start / self.s
228 return node
228 return node
229 else:
229 else:
230 break
230 break
231 end -= blocksize
231 end -= blocksize
232 return None
232 return None
233
233
234 def loadindex(self, i=None, end=None):
234 def loadindex(self, i=None, end=None):
235 if self.all:
235 if self.all:
236 return
236 return
237 all = False
237 all = False
238 if i is None:
238 if i is None:
239 blockstart = 0
239 blockstart = 0
240 blocksize = (65536 / self.s) * self.s
240 blocksize = (65536 / self.s) * self.s
241 end = self.datasize
241 end = self.datasize
242 all = True
242 all = True
243 else:
243 else:
244 if end:
244 if end:
245 blockstart = i * self.s
245 blockstart = i * self.s
246 end = end * self.s
246 end = end * self.s
247 blocksize = end - blockstart
247 blocksize = end - blockstart
248 else:
248 else:
249 blockstart = (i & ~1023) * self.s
249 blockstart = (i & ~1023) * self.s
250 blocksize = self.s * 1024
250 blocksize = self.s * 1024
251 end = blockstart + blocksize
251 end = blockstart + blocksize
252 while blockstart < end:
252 while blockstart < end:
253 self.loadblock(blockstart, blocksize)
253 self.loadblock(blockstart, blocksize)
254 blockstart += blocksize
254 blockstart += blocksize
255 if all:
255 if all:
256 self.all = True
256 self.all = True
257
257
258 class lazyindex(object):
258 class lazyindex(object):
259 """a lazy version of the index array"""
259 """a lazy version of the index array"""
260 def __init__(self, parser):
260 def __init__(self, parser):
261 self.p = parser
261 self.p = parser
262 def __len__(self):
262 def __len__(self):
263 return len(self.p.index)
263 return len(self.p.index)
264 def load(self, pos):
264 def load(self, pos):
265 if pos < 0:
265 if pos < 0:
266 pos += len(self.p.index)
266 pos += len(self.p.index)
267 self.p.loadindex(pos)
267 self.p.loadindex(pos)
268 return self.p.index[pos]
268 return self.p.index[pos]
269 def __getitem__(self, pos):
269 def __getitem__(self, pos):
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
270 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
271 def __setitem__(self, pos, item):
271 def __setitem__(self, pos, item):
272 self.p.index[pos] = _pack(indexformatng, *item)
272 self.p.index[pos] = _pack(indexformatng, *item)
273 def __delitem__(self, pos):
273 def __delitem__(self, pos):
274 del self.p.index[pos]
274 del self.p.index[pos]
275 def insert(self, pos, e):
275 def insert(self, pos, e):
276 self.p.index.insert(pos, _pack(indexformatng, *e))
276 self.p.index.insert(pos, _pack(indexformatng, *e))
277 def append(self, e):
277 def append(self, e):
278 self.p.index.append(_pack(indexformatng, *e))
278 self.p.index.append(_pack(indexformatng, *e))
279
279
280 class lazymap(object):
280 class lazymap(object):
281 """a lazy version of the node map"""
281 """a lazy version of the node map"""
282 def __init__(self, parser):
282 def __init__(self, parser):
283 self.p = parser
283 self.p = parser
284 def load(self, key):
284 def load(self, key):
285 n = self.p.findnode(key)
285 n = self.p.findnode(key)
286 if n is None:
286 if n is None:
287 raise KeyError(key)
287 raise KeyError(key)
288 def __contains__(self, key):
288 def __contains__(self, key):
289 if key in self.p.map:
289 if key in self.p.map:
290 return True
290 return True
291 self.p.loadmap()
291 self.p.loadmap()
292 return key in self.p.map
292 return key in self.p.map
293 def __iter__(self):
293 def __iter__(self):
294 yield nullid
294 yield nullid
295 for i in xrange(self.p.l):
295 for i in xrange(self.p.l):
296 ret = self.p.index[i]
296 ret = self.p.index[i]
297 if not ret:
297 if not ret:
298 self.p.loadindex(i)
298 self.p.loadindex(i)
299 ret = self.p.index[i]
299 ret = self.p.index[i]
300 if isinstance(ret, str):
300 if isinstance(ret, str):
301 ret = _unpack(indexformatng, ret)
301 ret = _unpack(indexformatng, ret)
302 yield ret[7]
302 yield ret[7]
303 def __getitem__(self, key):
303 def __getitem__(self, key):
304 try:
304 try:
305 return self.p.map[key]
305 return self.p.map[key]
306 except KeyError:
306 except KeyError:
307 try:
307 try:
308 self.load(key)
308 self.load(key)
309 return self.p.map[key]
309 return self.p.map[key]
310 except KeyError:
310 except KeyError:
311 raise KeyError("node " + hex(key))
311 raise KeyError("node " + hex(key))
312 def __setitem__(self, key, val):
312 def __setitem__(self, key, val):
313 self.p.map[key] = val
313 self.p.map[key] = val
314 def __delitem__(self, key):
314 def __delitem__(self, key):
315 del self.p.map[key]
315 del self.p.map[key]
316
316
317 indexformatv0 = ">4l20s20s20s"
317 indexformatv0 = ">4l20s20s20s"
318 v0shaoffset = 56
318 v0shaoffset = 56
319
319
320 class revlogoldio(object):
320 class revlogoldio(object):
321 def __init__(self):
321 def __init__(self):
322 self.size = struct.calcsize(indexformatv0)
322 self.size = struct.calcsize(indexformatv0)
323
323
324 def parseindex(self, fp, data, inline):
324 def parseindex(self, fp, data, inline):
325 s = self.size
325 s = self.size
326 index = []
326 index = []
327 nodemap = {nullid: nullrev}
327 nodemap = {nullid: nullrev}
328 n = off = 0
328 n = off = 0
329 if len(data) == _prereadsize:
329 if len(data) == _prereadsize:
330 data += fp.read() # read the rest
330 data += fp.read() # read the rest
331 l = len(data)
331 l = len(data)
332 while off + s <= l:
332 while off + s <= l:
333 cur = data[off:off + s]
333 cur = data[off:off + s]
334 off += s
334 off += s
335 e = _unpack(indexformatv0, cur)
335 e = _unpack(indexformatv0, cur)
336 # transform to revlogv1 format
336 # transform to revlogv1 format
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
337 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
338 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
339 index.append(e2)
339 index.append(e2)
340 nodemap[e[6]] = n
340 nodemap[e[6]] = n
341 n += 1
341 n += 1
342
342
343 return index, nodemap, None
343 return index, nodemap, None
344
344
345 def packentry(self, entry, node, version, rev):
345 def packentry(self, entry, node, version, rev):
346 if gettype(entry[0]):
347 raise RevlogError(_("index entry flags need RevlogNG"))
346 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
348 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
347 node(entry[5]), node(entry[6]), entry[7])
349 node(entry[5]), node(entry[6]), entry[7])
348 return _pack(indexformatv0, *e2)
350 return _pack(indexformatv0, *e2)
349
351
350 # index ng:
352 # index ng:
351 # 6 bytes offset
353 # 6 bytes offset
352 # 2 bytes flags
354 # 2 bytes flags
353 # 4 bytes compressed length
355 # 4 bytes compressed length
354 # 4 bytes uncompressed length
356 # 4 bytes uncompressed length
355 # 4 bytes: base rev
357 # 4 bytes: base rev
356 # 4 bytes link rev
358 # 4 bytes link rev
357 # 4 bytes parent 1 rev
359 # 4 bytes parent 1 rev
358 # 4 bytes parent 2 rev
360 # 4 bytes parent 2 rev
359 # 32 bytes: nodeid
361 # 32 bytes: nodeid
360 indexformatng = ">Qiiiiii20s12x"
362 indexformatng = ">Qiiiiii20s12x"
361 ngshaoffset = 32
363 ngshaoffset = 32
362 versionformat = ">I"
364 versionformat = ">I"
363
365
364 class revlogio(object):
366 class revlogio(object):
365 def __init__(self):
367 def __init__(self):
366 self.size = struct.calcsize(indexformatng)
368 self.size = struct.calcsize(indexformatng)
367
369
368 def parseindex(self, fp, data, inline):
370 def parseindex(self, fp, data, inline):
369 if len(data) == _prereadsize:
371 if len(data) == _prereadsize:
370 if util.openhardlinks() and not inline:
372 if util.openhardlinks() and not inline:
371 # big index, let's parse it on demand
373 # big index, let's parse it on demand
372 parser = lazyparser(fp)
374 parser = lazyparser(fp)
373 index = lazyindex(parser)
375 index = lazyindex(parser)
374 nodemap = lazymap(parser)
376 nodemap = lazymap(parser)
375 e = list(index[0])
377 e = list(index[0])
376 type = gettype(e[0])
378 type = gettype(e[0])
377 e[0] = offset_type(0, type)
379 e[0] = offset_type(0, type)
378 index[0] = e
380 index[0] = e
379 return index, nodemap, None
381 return index, nodemap, None
380 else:
382 else:
381 data += fp.read()
383 data += fp.read()
382
384
383 # call the C implementation to parse the index data
385 # call the C implementation to parse the index data
384 index, nodemap, cache = parsers.parse_index(data, inline)
386 index, nodemap, cache = parsers.parse_index(data, inline)
385 return index, nodemap, cache
387 return index, nodemap, cache
386
388
387 def packentry(self, entry, node, version, rev):
389 def packentry(self, entry, node, version, rev):
388 p = _pack(indexformatng, *entry)
390 p = _pack(indexformatng, *entry)
389 if rev == 0:
391 if rev == 0:
390 p = _pack(versionformat, version) + p[4:]
392 p = _pack(versionformat, version) + p[4:]
391 return p
393 return p
392
394
393 class revlog(object):
395 class revlog(object):
394 """
396 """
395 the underlying revision storage object
397 the underlying revision storage object
396
398
397 A revlog consists of two parts, an index and the revision data.
399 A revlog consists of two parts, an index and the revision data.
398
400
399 The index is a file with a fixed record size containing
401 The index is a file with a fixed record size containing
400 information on each revision, including its nodeid (hash), the
402 information on each revision, including its nodeid (hash), the
401 nodeids of its parents, the position and offset of its data within
403 nodeids of its parents, the position and offset of its data within
402 the data file, and the revision it's based on. Finally, each entry
404 the data file, and the revision it's based on. Finally, each entry
403 contains a linkrev entry that can serve as a pointer to external
405 contains a linkrev entry that can serve as a pointer to external
404 data.
406 data.
405
407
406 The revision data itself is a linear collection of data chunks.
408 The revision data itself is a linear collection of data chunks.
407 Each chunk represents a revision and is usually represented as a
409 Each chunk represents a revision and is usually represented as a
408 delta against the previous chunk. To bound lookup time, runs of
410 delta against the previous chunk. To bound lookup time, runs of
409 deltas are limited to about 2 times the length of the original
411 deltas are limited to about 2 times the length of the original
410 version data. This makes retrieval of a version proportional to
412 version data. This makes retrieval of a version proportional to
411 its size, or O(1) relative to the number of revisions.
413 its size, or O(1) relative to the number of revisions.
412
414
413 Both pieces of the revlog are written to in an append-only
415 Both pieces of the revlog are written to in an append-only
414 fashion, which means we never need to rewrite a file to insert or
416 fashion, which means we never need to rewrite a file to insert or
415 remove data, and can use some simple techniques to avoid the need
417 remove data, and can use some simple techniques to avoid the need
416 for locking while reading.
418 for locking while reading.
417 """
419 """
418 def __init__(self, opener, indexfile):
420 def __init__(self, opener, indexfile):
419 """
421 """
420 create a revlog object
422 create a revlog object
421
423
422 opener is a function that abstracts the file opening operation
424 opener is a function that abstracts the file opening operation
423 and can be used to implement COW semantics or the like.
425 and can be used to implement COW semantics or the like.
424 """
426 """
425 self.indexfile = indexfile
427 self.indexfile = indexfile
426 self.datafile = indexfile[:-2] + ".d"
428 self.datafile = indexfile[:-2] + ".d"
427 self.opener = opener
429 self.opener = opener
428 self._cache = None
430 self._cache = None
429 self._chunkcache = (0, '')
431 self._chunkcache = (0, '')
430 self.nodemap = {nullid: nullrev}
432 self.nodemap = {nullid: nullrev}
431 self.index = []
433 self.index = []
432
434
433 v = REVLOG_DEFAULT_VERSION
435 v = REVLOG_DEFAULT_VERSION
434 if hasattr(opener, 'options') and 'defversion' in opener.options:
436 if hasattr(opener, 'options') and 'defversion' in opener.options:
435 v = opener.options['defversion']
437 v = opener.options['defversion']
436 if v & REVLOGNG:
438 if v & REVLOGNG:
437 v |= REVLOGNGINLINEDATA
439 v |= REVLOGNGINLINEDATA
438
440
439 i = ''
441 i = ''
440 try:
442 try:
441 f = self.opener(self.indexfile)
443 f = self.opener(self.indexfile)
442 i = f.read(_prereadsize)
444 i = f.read(_prereadsize)
443 if len(i) > 0:
445 if len(i) > 0:
444 v = struct.unpack(versionformat, i[:4])[0]
446 v = struct.unpack(versionformat, i[:4])[0]
445 except IOError, inst:
447 except IOError, inst:
446 if inst.errno != errno.ENOENT:
448 if inst.errno != errno.ENOENT:
447 raise
449 raise
448
450
449 self.version = v
451 self.version = v
450 self._inline = v & REVLOGNGINLINEDATA
452 self._inline = v & REVLOGNGINLINEDATA
451 flags = v & ~0xFFFF
453 flags = v & ~0xFFFF
452 fmt = v & 0xFFFF
454 fmt = v & 0xFFFF
453 if fmt == REVLOGV0 and flags:
455 if fmt == REVLOGV0 and flags:
454 raise RevlogError(_("index %s unknown flags %#04x for format v0")
456 raise RevlogError(_("index %s unknown flags %#04x for format v0")
455 % (self.indexfile, flags >> 16))
457 % (self.indexfile, flags >> 16))
456 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
458 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
457 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
459 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
458 % (self.indexfile, flags >> 16))
460 % (self.indexfile, flags >> 16))
459 elif fmt > REVLOGNG:
461 elif fmt > REVLOGNG:
460 raise RevlogError(_("index %s unknown format %d")
462 raise RevlogError(_("index %s unknown format %d")
461 % (self.indexfile, fmt))
463 % (self.indexfile, fmt))
462
464
463 self._io = revlogio()
465 self._io = revlogio()
464 if self.version == REVLOGV0:
466 if self.version == REVLOGV0:
465 self._io = revlogoldio()
467 self._io = revlogoldio()
466 if i:
468 if i:
467 try:
469 try:
468 d = self._io.parseindex(f, i, self._inline)
470 d = self._io.parseindex(f, i, self._inline)
469 except (ValueError, IndexError):
471 except (ValueError, IndexError):
470 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
472 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
471 self.index, self.nodemap, self._chunkcache = d
473 self.index, self.nodemap, self._chunkcache = d
472 if not self._chunkcache:
474 if not self._chunkcache:
473 self._chunkclear()
475 self._chunkclear()
474
476
475 # add the magic null revision at -1 (if it hasn't been done already)
477 # add the magic null revision at -1 (if it hasn't been done already)
476 if (self.index == [] or isinstance(self.index, lazyindex) or
478 if (self.index == [] or isinstance(self.index, lazyindex) or
477 self.index[-1][7] != nullid) :
479 self.index[-1][7] != nullid) :
478 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
480 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
479
481
480 def _loadindex(self, start, end):
482 def _loadindex(self, start, end):
481 """load a block of indexes all at once from the lazy parser"""
483 """load a block of indexes all at once from the lazy parser"""
482 if isinstance(self.index, lazyindex):
484 if isinstance(self.index, lazyindex):
483 self.index.p.loadindex(start, end)
485 self.index.p.loadindex(start, end)
484
486
485 def _loadindexmap(self):
487 def _loadindexmap(self):
486 """loads both the map and the index from the lazy parser"""
488 """loads both the map and the index from the lazy parser"""
487 if isinstance(self.index, lazyindex):
489 if isinstance(self.index, lazyindex):
488 p = self.index.p
490 p = self.index.p
489 p.loadindex()
491 p.loadindex()
490 self.nodemap = p.map
492 self.nodemap = p.map
491
493
492 def _loadmap(self):
494 def _loadmap(self):
493 """loads the map from the lazy parser"""
495 """loads the map from the lazy parser"""
494 if isinstance(self.nodemap, lazymap):
496 if isinstance(self.nodemap, lazymap):
495 self.nodemap.p.loadmap()
497 self.nodemap.p.loadmap()
496 self.nodemap = self.nodemap.p.map
498 self.nodemap = self.nodemap.p.map
497
499
498 def tip(self):
500 def tip(self):
499 return self.node(len(self.index) - 2)
501 return self.node(len(self.index) - 2)
500 def __len__(self):
502 def __len__(self):
501 return len(self.index) - 1
503 return len(self.index) - 1
502 def __iter__(self):
504 def __iter__(self):
503 for i in xrange(len(self)):
505 for i in xrange(len(self)):
504 yield i
506 yield i
505 def rev(self, node):
507 def rev(self, node):
506 try:
508 try:
507 return self.nodemap[node]
509 return self.nodemap[node]
508 except KeyError:
510 except KeyError:
509 raise LookupError(node, self.indexfile, _('no node'))
511 raise LookupError(node, self.indexfile, _('no node'))
510 def node(self, rev):
512 def node(self, rev):
511 return self.index[rev][7]
513 return self.index[rev][7]
512 def linkrev(self, rev):
514 def linkrev(self, rev):
513 return self.index[rev][4]
515 return self.index[rev][4]
514 def parents(self, node):
516 def parents(self, node):
515 i = self.index
517 i = self.index
516 d = i[self.rev(node)]
518 d = i[self.rev(node)]
517 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
519 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
518 def parentrevs(self, rev):
520 def parentrevs(self, rev):
519 return self.index[rev][5:7]
521 return self.index[rev][5:7]
520 def start(self, rev):
522 def start(self, rev):
521 return int(self.index[rev][0] >> 16)
523 return int(self.index[rev][0] >> 16)
522 def end(self, rev):
524 def end(self, rev):
523 return self.start(rev) + self.length(rev)
525 return self.start(rev) + self.length(rev)
524 def length(self, rev):
526 def length(self, rev):
525 return self.index[rev][1]
527 return self.index[rev][1]
526 def base(self, rev):
528 def base(self, rev):
527 return self.index[rev][3]
529 return self.index[rev][3]
528
530
529 def size(self, rev):
531 def size(self, rev):
530 """return the length of the uncompressed text for a given revision"""
532 """return the length of the uncompressed text for a given revision"""
531 l = self.index[rev][2]
533 l = self.index[rev][2]
532 if l >= 0:
534 if l >= 0:
533 return l
535 return l
534
536
535 t = self.revision(self.node(rev))
537 t = self.revision(self.node(rev))
536 return len(t)
538 return len(t)
537
539
538 # Alternate implementation. The advantage to this code is it
540 # Alternate implementation. The advantage to this code is it
539 # will be faster for a single revision. However, the results
541 # will be faster for a single revision. However, the results
540 # are not cached, so finding the size of every revision will
542 # are not cached, so finding the size of every revision will
541 # be slower.
543 # be slower.
542 #
544 #
543 # if self.cache and self.cache[1] == rev:
545 # if self.cache and self.cache[1] == rev:
544 # return len(self.cache[2])
546 # return len(self.cache[2])
545 #
547 #
546 # base = self.base(rev)
548 # base = self.base(rev)
547 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
549 # if self.cache and self.cache[1] >= base and self.cache[1] < rev:
548 # base = self.cache[1]
550 # base = self.cache[1]
549 # text = self.cache[2]
551 # text = self.cache[2]
550 # else:
552 # else:
551 # text = self.revision(self.node(base))
553 # text = self.revision(self.node(base))
552 #
554 #
553 # l = len(text)
555 # l = len(text)
554 # for x in xrange(base + 1, rev + 1):
556 # for x in xrange(base + 1, rev + 1):
555 # l = mdiff.patchedsize(l, self._chunk(x))
557 # l = mdiff.patchedsize(l, self._chunk(x))
556 # return l
558 # return l
557
559
558 def reachable(self, node, stop=None):
560 def reachable(self, node, stop=None):
559 """return the set of all nodes ancestral to a given node, including
561 """return the set of all nodes ancestral to a given node, including
560 the node itself, stopping when stop is matched"""
562 the node itself, stopping when stop is matched"""
561 reachable = set((node,))
563 reachable = set((node,))
562 visit = [node]
564 visit = [node]
563 if stop:
565 if stop:
564 stopn = self.rev(stop)
566 stopn = self.rev(stop)
565 else:
567 else:
566 stopn = 0
568 stopn = 0
567 while visit:
569 while visit:
568 n = visit.pop(0)
570 n = visit.pop(0)
569 if n == stop:
571 if n == stop:
570 continue
572 continue
571 if n == nullid:
573 if n == nullid:
572 continue
574 continue
573 for p in self.parents(n):
575 for p in self.parents(n):
574 if self.rev(p) < stopn:
576 if self.rev(p) < stopn:
575 continue
577 continue
576 if p not in reachable:
578 if p not in reachable:
577 reachable.add(p)
579 reachable.add(p)
578 visit.append(p)
580 visit.append(p)
579 return reachable
581 return reachable
580
582
581 def ancestors(self, *revs):
583 def ancestors(self, *revs):
582 """Generate the ancestors of 'revs' in reverse topological order.
584 """Generate the ancestors of 'revs' in reverse topological order.
583
585
584 Yield a sequence of revision numbers starting with the parents
586 Yield a sequence of revision numbers starting with the parents
585 of each revision in revs, i.e., each revision is *not* considered
587 of each revision in revs, i.e., each revision is *not* considered
586 an ancestor of itself. Results are in breadth-first order:
588 an ancestor of itself. Results are in breadth-first order:
587 parents of each rev in revs, then parents of those, etc. Result
589 parents of each rev in revs, then parents of those, etc. Result
588 does not include the null revision."""
590 does not include the null revision."""
589 visit = list(revs)
591 visit = list(revs)
590 seen = set([nullrev])
592 seen = set([nullrev])
591 while visit:
593 while visit:
592 for parent in self.parentrevs(visit.pop(0)):
594 for parent in self.parentrevs(visit.pop(0)):
593 if parent not in seen:
595 if parent not in seen:
594 visit.append(parent)
596 visit.append(parent)
595 seen.add(parent)
597 seen.add(parent)
596 yield parent
598 yield parent
597
599
598 def descendants(self, *revs):
600 def descendants(self, *revs):
599 """Generate the descendants of 'revs' in revision order.
601 """Generate the descendants of 'revs' in revision order.
600
602
601 Yield a sequence of revision numbers starting with a child of
603 Yield a sequence of revision numbers starting with a child of
602 some rev in revs, i.e., each revision is *not* considered a
604 some rev in revs, i.e., each revision is *not* considered a
603 descendant of itself. Results are ordered by revision number (a
605 descendant of itself. Results are ordered by revision number (a
604 topological sort)."""
606 topological sort)."""
605 seen = set(revs)
607 seen = set(revs)
606 for i in xrange(min(revs) + 1, len(self)):
608 for i in xrange(min(revs) + 1, len(self)):
607 for x in self.parentrevs(i):
609 for x in self.parentrevs(i):
608 if x != nullrev and x in seen:
610 if x != nullrev and x in seen:
609 seen.add(i)
611 seen.add(i)
610 yield i
612 yield i
611 break
613 break
612
614
613 def findmissing(self, common=None, heads=None):
615 def findmissing(self, common=None, heads=None):
614 """Return the ancestors of heads that are not ancestors of common.
616 """Return the ancestors of heads that are not ancestors of common.
615
617
616 More specifically, return a list of nodes N such that every N
618 More specifically, return a list of nodes N such that every N
617 satisfies the following constraints:
619 satisfies the following constraints:
618
620
619 1. N is an ancestor of some node in 'heads'
621 1. N is an ancestor of some node in 'heads'
620 2. N is not an ancestor of any node in 'common'
622 2. N is not an ancestor of any node in 'common'
621
623
622 The list is sorted by revision number, meaning it is
624 The list is sorted by revision number, meaning it is
623 topologically sorted.
625 topologically sorted.
624
626
625 'heads' and 'common' are both lists of node IDs. If heads is
627 'heads' and 'common' are both lists of node IDs. If heads is
626 not supplied, uses all of the revlog's heads. If common is not
628 not supplied, uses all of the revlog's heads. If common is not
627 supplied, uses nullid."""
629 supplied, uses nullid."""
628 if common is None:
630 if common is None:
629 common = [nullid]
631 common = [nullid]
630 if heads is None:
632 if heads is None:
631 heads = self.heads()
633 heads = self.heads()
632
634
633 common = [self.rev(n) for n in common]
635 common = [self.rev(n) for n in common]
634 heads = [self.rev(n) for n in heads]
636 heads = [self.rev(n) for n in heads]
635
637
636 # we want the ancestors, but inclusive
638 # we want the ancestors, but inclusive
637 has = set(self.ancestors(*common))
639 has = set(self.ancestors(*common))
638 has.add(nullrev)
640 has.add(nullrev)
639 has.update(common)
641 has.update(common)
640
642
641 # take all ancestors from heads that aren't in has
643 # take all ancestors from heads that aren't in has
642 missing = set()
644 missing = set()
643 visit = [r for r in heads if r not in has]
645 visit = [r for r in heads if r not in has]
644 while visit:
646 while visit:
645 r = visit.pop(0)
647 r = visit.pop(0)
646 if r in missing:
648 if r in missing:
647 continue
649 continue
648 else:
650 else:
649 missing.add(r)
651 missing.add(r)
650 for p in self.parentrevs(r):
652 for p in self.parentrevs(r):
651 if p not in has:
653 if p not in has:
652 visit.append(p)
654 visit.append(p)
653 missing = list(missing)
655 missing = list(missing)
654 missing.sort()
656 missing.sort()
655 return [self.node(r) for r in missing]
657 return [self.node(r) for r in missing]
656
658
657 def nodesbetween(self, roots=None, heads=None):
659 def nodesbetween(self, roots=None, heads=None):
658 """Return a topological path from 'roots' to 'heads'.
660 """Return a topological path from 'roots' to 'heads'.
659
661
660 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
662 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
661 topologically sorted list of all nodes N that satisfy both of
663 topologically sorted list of all nodes N that satisfy both of
662 these constraints:
664 these constraints:
663
665
664 1. N is a descendant of some node in 'roots'
666 1. N is a descendant of some node in 'roots'
665 2. N is an ancestor of some node in 'heads'
667 2. N is an ancestor of some node in 'heads'
666
668
667 Every node is considered to be both a descendant and an ancestor
669 Every node is considered to be both a descendant and an ancestor
668 of itself, so every reachable node in 'roots' and 'heads' will be
670 of itself, so every reachable node in 'roots' and 'heads' will be
669 included in 'nodes'.
671 included in 'nodes'.
670
672
671 'outroots' is the list of reachable nodes in 'roots', i.e., the
673 'outroots' is the list of reachable nodes in 'roots', i.e., the
672 subset of 'roots' that is returned in 'nodes'. Likewise,
674 subset of 'roots' that is returned in 'nodes'. Likewise,
673 'outheads' is the subset of 'heads' that is also in 'nodes'.
675 'outheads' is the subset of 'heads' that is also in 'nodes'.
674
676
675 'roots' and 'heads' are both lists of node IDs. If 'roots' is
677 'roots' and 'heads' are both lists of node IDs. If 'roots' is
676 unspecified, uses nullid as the only root. If 'heads' is
678 unspecified, uses nullid as the only root. If 'heads' is
677 unspecified, uses list of all of the revlog's heads."""
679 unspecified, uses list of all of the revlog's heads."""
678 nonodes = ([], [], [])
680 nonodes = ([], [], [])
679 if roots is not None:
681 if roots is not None:
680 roots = list(roots)
682 roots = list(roots)
681 if not roots:
683 if not roots:
682 return nonodes
684 return nonodes
683 lowestrev = min([self.rev(n) for n in roots])
685 lowestrev = min([self.rev(n) for n in roots])
684 else:
686 else:
685 roots = [nullid] # Everybody's a descendent of nullid
687 roots = [nullid] # Everybody's a descendent of nullid
686 lowestrev = nullrev
688 lowestrev = nullrev
687 if (lowestrev == nullrev) and (heads is None):
689 if (lowestrev == nullrev) and (heads is None):
688 # We want _all_ the nodes!
690 # We want _all_ the nodes!
689 return ([self.node(r) for r in self], [nullid], list(self.heads()))
691 return ([self.node(r) for r in self], [nullid], list(self.heads()))
690 if heads is None:
692 if heads is None:
691 # All nodes are ancestors, so the latest ancestor is the last
693 # All nodes are ancestors, so the latest ancestor is the last
692 # node.
694 # node.
693 highestrev = len(self) - 1
695 highestrev = len(self) - 1
694 # Set ancestors to None to signal that every node is an ancestor.
696 # Set ancestors to None to signal that every node is an ancestor.
695 ancestors = None
697 ancestors = None
696 # Set heads to an empty dictionary for later discovery of heads
698 # Set heads to an empty dictionary for later discovery of heads
697 heads = {}
699 heads = {}
698 else:
700 else:
699 heads = list(heads)
701 heads = list(heads)
700 if not heads:
702 if not heads:
701 return nonodes
703 return nonodes
702 ancestors = set()
704 ancestors = set()
703 # Turn heads into a dictionary so we can remove 'fake' heads.
705 # Turn heads into a dictionary so we can remove 'fake' heads.
704 # Also, later we will be using it to filter out the heads we can't
706 # Also, later we will be using it to filter out the heads we can't
705 # find from roots.
707 # find from roots.
706 heads = dict.fromkeys(heads, 0)
708 heads = dict.fromkeys(heads, 0)
707 # Start at the top and keep marking parents until we're done.
709 # Start at the top and keep marking parents until we're done.
708 nodestotag = set(heads)
710 nodestotag = set(heads)
709 # Remember where the top was so we can use it as a limit later.
711 # Remember where the top was so we can use it as a limit later.
710 highestrev = max([self.rev(n) for n in nodestotag])
712 highestrev = max([self.rev(n) for n in nodestotag])
711 while nodestotag:
713 while nodestotag:
712 # grab a node to tag
714 # grab a node to tag
713 n = nodestotag.pop()
715 n = nodestotag.pop()
714 # Never tag nullid
716 # Never tag nullid
715 if n == nullid:
717 if n == nullid:
716 continue
718 continue
717 # A node's revision number represents its place in a
719 # A node's revision number represents its place in a
718 # topologically sorted list of nodes.
720 # topologically sorted list of nodes.
719 r = self.rev(n)
721 r = self.rev(n)
720 if r >= lowestrev:
722 if r >= lowestrev:
721 if n not in ancestors:
723 if n not in ancestors:
722 # If we are possibly a descendent of one of the roots
724 # If we are possibly a descendent of one of the roots
723 # and we haven't already been marked as an ancestor
725 # and we haven't already been marked as an ancestor
724 ancestors.add(n) # Mark as ancestor
726 ancestors.add(n) # Mark as ancestor
725 # Add non-nullid parents to list of nodes to tag.
727 # Add non-nullid parents to list of nodes to tag.
726 nodestotag.update([p for p in self.parents(n) if
728 nodestotag.update([p for p in self.parents(n) if
727 p != nullid])
729 p != nullid])
728 elif n in heads: # We've seen it before, is it a fake head?
730 elif n in heads: # We've seen it before, is it a fake head?
729 # So it is, real heads should not be the ancestors of
731 # So it is, real heads should not be the ancestors of
730 # any other heads.
732 # any other heads.
731 heads.pop(n)
733 heads.pop(n)
732 if not ancestors:
734 if not ancestors:
733 return nonodes
735 return nonodes
734 # Now that we have our set of ancestors, we want to remove any
736 # Now that we have our set of ancestors, we want to remove any
735 # roots that are not ancestors.
737 # roots that are not ancestors.
736
738
737 # If one of the roots was nullid, everything is included anyway.
739 # If one of the roots was nullid, everything is included anyway.
738 if lowestrev > nullrev:
740 if lowestrev > nullrev:
739 # But, since we weren't, let's recompute the lowest rev to not
741 # But, since we weren't, let's recompute the lowest rev to not
740 # include roots that aren't ancestors.
742 # include roots that aren't ancestors.
741
743
742 # Filter out roots that aren't ancestors of heads
744 # Filter out roots that aren't ancestors of heads
743 roots = [n for n in roots if n in ancestors]
745 roots = [n for n in roots if n in ancestors]
744 # Recompute the lowest revision
746 # Recompute the lowest revision
745 if roots:
747 if roots:
746 lowestrev = min([self.rev(n) for n in roots])
748 lowestrev = min([self.rev(n) for n in roots])
747 else:
749 else:
748 # No more roots? Return empty list
750 # No more roots? Return empty list
749 return nonodes
751 return nonodes
750 else:
752 else:
751 # We are descending from nullid, and don't need to care about
753 # We are descending from nullid, and don't need to care about
752 # any other roots.
754 # any other roots.
753 lowestrev = nullrev
755 lowestrev = nullrev
754 roots = [nullid]
756 roots = [nullid]
755 # Transform our roots list into a set.
757 # Transform our roots list into a set.
756 descendents = set(roots)
758 descendents = set(roots)
757 # Also, keep the original roots so we can filter out roots that aren't
759 # Also, keep the original roots so we can filter out roots that aren't
758 # 'real' roots (i.e. are descended from other roots).
760 # 'real' roots (i.e. are descended from other roots).
759 roots = descendents.copy()
761 roots = descendents.copy()
760 # Our topologically sorted list of output nodes.
762 # Our topologically sorted list of output nodes.
761 orderedout = []
763 orderedout = []
762 # Don't start at nullid since we don't want nullid in our output list,
764 # Don't start at nullid since we don't want nullid in our output list,
763 # and if nullid shows up in descedents, empty parents will look like
765 # and if nullid shows up in descedents, empty parents will look like
764 # they're descendents.
766 # they're descendents.
765 for r in xrange(max(lowestrev, 0), highestrev + 1):
767 for r in xrange(max(lowestrev, 0), highestrev + 1):
766 n = self.node(r)
768 n = self.node(r)
767 isdescendent = False
769 isdescendent = False
768 if lowestrev == nullrev: # Everybody is a descendent of nullid
770 if lowestrev == nullrev: # Everybody is a descendent of nullid
769 isdescendent = True
771 isdescendent = True
770 elif n in descendents:
772 elif n in descendents:
771 # n is already a descendent
773 # n is already a descendent
772 isdescendent = True
774 isdescendent = True
773 # This check only needs to be done here because all the roots
775 # This check only needs to be done here because all the roots
774 # will start being marked is descendents before the loop.
776 # will start being marked is descendents before the loop.
775 if n in roots:
777 if n in roots:
776 # If n was a root, check if it's a 'real' root.
778 # If n was a root, check if it's a 'real' root.
777 p = tuple(self.parents(n))
779 p = tuple(self.parents(n))
778 # If any of its parents are descendents, it's not a root.
780 # If any of its parents are descendents, it's not a root.
779 if (p[0] in descendents) or (p[1] in descendents):
781 if (p[0] in descendents) or (p[1] in descendents):
780 roots.remove(n)
782 roots.remove(n)
781 else:
783 else:
782 p = tuple(self.parents(n))
784 p = tuple(self.parents(n))
783 # A node is a descendent if either of its parents are
785 # A node is a descendent if either of its parents are
784 # descendents. (We seeded the dependents list with the roots
786 # descendents. (We seeded the dependents list with the roots
785 # up there, remember?)
787 # up there, remember?)
786 if (p[0] in descendents) or (p[1] in descendents):
788 if (p[0] in descendents) or (p[1] in descendents):
787 descendents.add(n)
789 descendents.add(n)
788 isdescendent = True
790 isdescendent = True
789 if isdescendent and ((ancestors is None) or (n in ancestors)):
791 if isdescendent and ((ancestors is None) or (n in ancestors)):
790 # Only include nodes that are both descendents and ancestors.
792 # Only include nodes that are both descendents and ancestors.
791 orderedout.append(n)
793 orderedout.append(n)
792 if (ancestors is not None) and (n in heads):
794 if (ancestors is not None) and (n in heads):
793 # We're trying to figure out which heads are reachable
795 # We're trying to figure out which heads are reachable
794 # from roots.
796 # from roots.
795 # Mark this head as having been reached
797 # Mark this head as having been reached
796 heads[n] = 1
798 heads[n] = 1
797 elif ancestors is None:
799 elif ancestors is None:
798 # Otherwise, we're trying to discover the heads.
800 # Otherwise, we're trying to discover the heads.
799 # Assume this is a head because if it isn't, the next step
801 # Assume this is a head because if it isn't, the next step
800 # will eventually remove it.
802 # will eventually remove it.
801 heads[n] = 1
803 heads[n] = 1
802 # But, obviously its parents aren't.
804 # But, obviously its parents aren't.
803 for p in self.parents(n):
805 for p in self.parents(n):
804 heads.pop(p, None)
806 heads.pop(p, None)
805 heads = [n for n in heads.iterkeys() if heads[n] != 0]
807 heads = [n for n in heads.iterkeys() if heads[n] != 0]
806 roots = list(roots)
808 roots = list(roots)
807 assert orderedout
809 assert orderedout
808 assert roots
810 assert roots
809 assert heads
811 assert heads
810 return (orderedout, roots, heads)
812 return (orderedout, roots, heads)
811
813
812 def heads(self, start=None, stop=None):
814 def heads(self, start=None, stop=None):
813 """return the list of all nodes that have no children
815 """return the list of all nodes that have no children
814
816
815 if start is specified, only heads that are descendants of
817 if start is specified, only heads that are descendants of
816 start will be returned
818 start will be returned
817 if stop is specified, it will consider all the revs from stop
819 if stop is specified, it will consider all the revs from stop
818 as if they had no children
820 as if they had no children
819 """
821 """
820 if start is None and stop is None:
822 if start is None and stop is None:
821 count = len(self)
823 count = len(self)
822 if not count:
824 if not count:
823 return [nullid]
825 return [nullid]
824 ishead = [1] * (count + 1)
826 ishead = [1] * (count + 1)
825 index = self.index
827 index = self.index
826 for r in xrange(count):
828 for r in xrange(count):
827 e = index[r]
829 e = index[r]
828 ishead[e[5]] = ishead[e[6]] = 0
830 ishead[e[5]] = ishead[e[6]] = 0
829 return [self.node(r) for r in xrange(count) if ishead[r]]
831 return [self.node(r) for r in xrange(count) if ishead[r]]
830
832
831 if start is None:
833 if start is None:
832 start = nullid
834 start = nullid
833 if stop is None:
835 if stop is None:
834 stop = []
836 stop = []
835 stoprevs = set([self.rev(n) for n in stop])
837 stoprevs = set([self.rev(n) for n in stop])
836 startrev = self.rev(start)
838 startrev = self.rev(start)
837 reachable = set((startrev,))
839 reachable = set((startrev,))
838 heads = set((startrev,))
840 heads = set((startrev,))
839
841
840 parentrevs = self.parentrevs
842 parentrevs = self.parentrevs
841 for r in xrange(startrev + 1, len(self)):
843 for r in xrange(startrev + 1, len(self)):
842 for p in parentrevs(r):
844 for p in parentrevs(r):
843 if p in reachable:
845 if p in reachable:
844 if r not in stoprevs:
846 if r not in stoprevs:
845 reachable.add(r)
847 reachable.add(r)
846 heads.add(r)
848 heads.add(r)
847 if p in heads and p not in stoprevs:
849 if p in heads and p not in stoprevs:
848 heads.remove(p)
850 heads.remove(p)
849
851
850 return [self.node(r) for r in heads]
852 return [self.node(r) for r in heads]
851
853
852 def children(self, node):
854 def children(self, node):
853 """find the children of a given node"""
855 """find the children of a given node"""
854 c = []
856 c = []
855 p = self.rev(node)
857 p = self.rev(node)
856 for r in range(p + 1, len(self)):
858 for r in range(p + 1, len(self)):
857 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
859 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
858 if prevs:
860 if prevs:
859 for pr in prevs:
861 for pr in prevs:
860 if pr == p:
862 if pr == p:
861 c.append(self.node(r))
863 c.append(self.node(r))
862 elif p == nullrev:
864 elif p == nullrev:
863 c.append(self.node(r))
865 c.append(self.node(r))
864 return c
866 return c
865
867
866 def _match(self, id):
868 def _match(self, id):
867 if isinstance(id, (long, int)):
869 if isinstance(id, (long, int)):
868 # rev
870 # rev
869 return self.node(id)
871 return self.node(id)
870 if len(id) == 20:
872 if len(id) == 20:
871 # possibly a binary node
873 # possibly a binary node
872 # odds of a binary node being all hex in ASCII are 1 in 10**25
874 # odds of a binary node being all hex in ASCII are 1 in 10**25
873 try:
875 try:
874 node = id
876 node = id
875 self.rev(node) # quick search the index
877 self.rev(node) # quick search the index
876 return node
878 return node
877 except LookupError:
879 except LookupError:
878 pass # may be partial hex id
880 pass # may be partial hex id
879 try:
881 try:
880 # str(rev)
882 # str(rev)
881 rev = int(id)
883 rev = int(id)
882 if str(rev) != id:
884 if str(rev) != id:
883 raise ValueError
885 raise ValueError
884 if rev < 0:
886 if rev < 0:
885 rev = len(self) + rev
887 rev = len(self) + rev
886 if rev < 0 or rev >= len(self):
888 if rev < 0 or rev >= len(self):
887 raise ValueError
889 raise ValueError
888 return self.node(rev)
890 return self.node(rev)
889 except (ValueError, OverflowError):
891 except (ValueError, OverflowError):
890 pass
892 pass
891 if len(id) == 40:
893 if len(id) == 40:
892 try:
894 try:
893 # a full hex nodeid?
895 # a full hex nodeid?
894 node = bin(id)
896 node = bin(id)
895 self.rev(node)
897 self.rev(node)
896 return node
898 return node
897 except (TypeError, LookupError):
899 except (TypeError, LookupError):
898 pass
900 pass
899
901
900 def _partialmatch(self, id):
902 def _partialmatch(self, id):
901 if len(id) < 40:
903 if len(id) < 40:
902 try:
904 try:
903 # hex(node)[:...]
905 # hex(node)[:...]
904 l = len(id) // 2 # grab an even number of digits
906 l = len(id) // 2 # grab an even number of digits
905 bin_id = bin(id[:l * 2])
907 bin_id = bin(id[:l * 2])
906 nl = [n for n in self.nodemap if n[:l] == bin_id]
908 nl = [n for n in self.nodemap if n[:l] == bin_id]
907 nl = [n for n in nl if hex(n).startswith(id)]
909 nl = [n for n in nl if hex(n).startswith(id)]
908 if len(nl) > 0:
910 if len(nl) > 0:
909 if len(nl) == 1:
911 if len(nl) == 1:
910 return nl[0]
912 return nl[0]
911 raise LookupError(id, self.indexfile,
913 raise LookupError(id, self.indexfile,
912 _('ambiguous identifier'))
914 _('ambiguous identifier'))
913 return None
915 return None
914 except TypeError:
916 except TypeError:
915 pass
917 pass
916
918
917 def lookup(self, id):
919 def lookup(self, id):
918 """locate a node based on:
920 """locate a node based on:
919 - revision number or str(revision number)
921 - revision number or str(revision number)
920 - nodeid or subset of hex nodeid
922 - nodeid or subset of hex nodeid
921 """
923 """
922 n = self._match(id)
924 n = self._match(id)
923 if n is not None:
925 if n is not None:
924 return n
926 return n
925 n = self._partialmatch(id)
927 n = self._partialmatch(id)
926 if n:
928 if n:
927 return n
929 return n
928
930
929 raise LookupError(id, self.indexfile, _('no match found'))
931 raise LookupError(id, self.indexfile, _('no match found'))
930
932
931 def cmp(self, node, text):
933 def cmp(self, node, text):
932 """compare text with a given file revision"""
934 """compare text with a given file revision"""
933 p1, p2 = self.parents(node)
935 p1, p2 = self.parents(node)
934 return hash(text, p1, p2) != node
936 return hash(text, p1, p2) != node
935
937
936 def _addchunk(self, offset, data):
938 def _addchunk(self, offset, data):
937 o, d = self._chunkcache
939 o, d = self._chunkcache
938 # try to add to existing cache
940 # try to add to existing cache
939 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
941 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
940 self._chunkcache = o, d + data
942 self._chunkcache = o, d + data
941 else:
943 else:
942 self._chunkcache = offset, data
944 self._chunkcache = offset, data
943
945
944 def _loadchunk(self, offset, length):
946 def _loadchunk(self, offset, length):
945 if self._inline:
947 if self._inline:
946 df = self.opener(self.indexfile)
948 df = self.opener(self.indexfile)
947 else:
949 else:
948 df = self.opener(self.datafile)
950 df = self.opener(self.datafile)
949
951
950 readahead = max(65536, length)
952 readahead = max(65536, length)
951 df.seek(offset)
953 df.seek(offset)
952 d = df.read(readahead)
954 d = df.read(readahead)
953 self._addchunk(offset, d)
955 self._addchunk(offset, d)
954 if readahead > length:
956 if readahead > length:
955 return d[:length]
957 return d[:length]
956 return d
958 return d
957
959
958 def _getchunk(self, offset, length):
960 def _getchunk(self, offset, length):
959 o, d = self._chunkcache
961 o, d = self._chunkcache
960 l = len(d)
962 l = len(d)
961
963
962 # is it in the cache?
964 # is it in the cache?
963 cachestart = offset - o
965 cachestart = offset - o
964 cacheend = cachestart + length
966 cacheend = cachestart + length
965 if cachestart >= 0 and cacheend <= l:
967 if cachestart >= 0 and cacheend <= l:
966 if cachestart == 0 and cacheend == l:
968 if cachestart == 0 and cacheend == l:
967 return d # avoid a copy
969 return d # avoid a copy
968 return d[cachestart:cacheend]
970 return d[cachestart:cacheend]
969
971
970 return self._loadchunk(offset, length)
972 return self._loadchunk(offset, length)
971
973
972 def _chunkraw(self, startrev, endrev):
974 def _chunkraw(self, startrev, endrev):
973 start = self.start(startrev)
975 start = self.start(startrev)
974 length = self.end(endrev) - start
976 length = self.end(endrev) - start
975 if self._inline:
977 if self._inline:
976 start += (startrev + 1) * self._io.size
978 start += (startrev + 1) * self._io.size
977 return self._getchunk(start, length)
979 return self._getchunk(start, length)
978
980
979 def _chunk(self, rev):
981 def _chunk(self, rev):
980 return decompress(self._chunkraw(rev, rev))
982 return decompress(self._chunkraw(rev, rev))
981
983
982 def _chunkclear(self):
984 def _chunkclear(self):
983 self._chunkcache = (0, '')
985 self._chunkcache = (0, '')
984
986
985 def revdiff(self, rev1, rev2):
987 def revdiff(self, rev1, rev2):
986 """return or calculate a delta between two revisions"""
988 """return or calculate a delta between two revisions"""
987 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
989 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
988 return self._chunk(rev2)
990 return self._chunk(rev2)
989
991
990 return mdiff.textdiff(self.revision(self.node(rev1)),
992 return mdiff.textdiff(self.revision(self.node(rev1)),
991 self.revision(self.node(rev2)))
993 self.revision(self.node(rev2)))
992
994
993 def revision(self, node):
995 def revision(self, node):
994 """return an uncompressed revision of a given node"""
996 """return an uncompressed revision of a given node"""
995 if node == nullid:
997 if node == nullid:
996 return ""
998 return ""
997 if self._cache and self._cache[0] == node:
999 if self._cache and self._cache[0] == node:
998 return self._cache[2]
1000 return self._cache[2]
999
1001
1000 # look up what we need to read
1002 # look up what we need to read
1001 text = None
1003 text = None
1002 rev = self.rev(node)
1004 rev = self.rev(node)
1003 base = self.base(rev)
1005 base = self.base(rev)
1004
1006
1005 # check rev flags
1007 # check rev flags
1006 if self.index[rev][0] & 0xFFFF:
1008 if self.index[rev][0] & 0xFFFF:
1007 raise RevlogError(_('incompatible revision flag %x') %
1009 raise RevlogError(_('incompatible revision flag %x') %
1008 (self.index[rev][0] & 0xFFFF))
1010 (self.index[rev][0] & 0xFFFF))
1009
1011
1010 # do we have useful data cached?
1012 # do we have useful data cached?
1011 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1013 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
1012 base = self._cache[1]
1014 base = self._cache[1]
1013 text = self._cache[2]
1015 text = self._cache[2]
1014
1016
1015 self._loadindex(base, rev + 1)
1017 self._loadindex(base, rev + 1)
1016 self._chunkraw(base, rev)
1018 self._chunkraw(base, rev)
1017 if text is None:
1019 if text is None:
1018 text = self._chunk(base)
1020 text = self._chunk(base)
1019
1021
1020 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1022 bins = [self._chunk(r) for r in xrange(base + 1, rev + 1)]
1021 text = mdiff.patches(text, bins)
1023 text = mdiff.patches(text, bins)
1022 p1, p2 = self.parents(node)
1024 p1, p2 = self.parents(node)
1023 if node != hash(text, p1, p2):
1025 if node != hash(text, p1, p2):
1024 raise RevlogError(_("integrity check failed on %s:%d")
1026 raise RevlogError(_("integrity check failed on %s:%d")
1025 % (self.indexfile, rev))
1027 % (self.indexfile, rev))
1026
1028
1027 self._cache = (node, rev, text)
1029 self._cache = (node, rev, text)
1028 return text
1030 return text
1029
1031
1030 def checkinlinesize(self, tr, fp=None):
1032 def checkinlinesize(self, tr, fp=None):
1031 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1033 if not self._inline or (self.start(-2) + self.length(-2)) < 131072:
1032 return
1034 return
1033
1035
1034 trinfo = tr.find(self.indexfile)
1036 trinfo = tr.find(self.indexfile)
1035 if trinfo is None:
1037 if trinfo is None:
1036 raise RevlogError(_("%s not found in the transaction")
1038 raise RevlogError(_("%s not found in the transaction")
1037 % self.indexfile)
1039 % self.indexfile)
1038
1040
1039 trindex = trinfo[2]
1041 trindex = trinfo[2]
1040 dataoff = self.start(trindex)
1042 dataoff = self.start(trindex)
1041
1043
1042 tr.add(self.datafile, dataoff)
1044 tr.add(self.datafile, dataoff)
1043
1045
1044 if fp:
1046 if fp:
1045 fp.flush()
1047 fp.flush()
1046 fp.close()
1048 fp.close()
1047
1049
1048 df = self.opener(self.datafile, 'w')
1050 df = self.opener(self.datafile, 'w')
1049 try:
1051 try:
1050 for r in self:
1052 for r in self:
1051 df.write(self._chunkraw(r, r))
1053 df.write(self._chunkraw(r, r))
1052 finally:
1054 finally:
1053 df.close()
1055 df.close()
1054
1056
1055 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1057 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1056 self.version &= ~(REVLOGNGINLINEDATA)
1058 self.version &= ~(REVLOGNGINLINEDATA)
1057 self._inline = False
1059 self._inline = False
1058 for i in self:
1060 for i in self:
1059 e = self._io.packentry(self.index[i], self.node, self.version, i)
1061 e = self._io.packentry(self.index[i], self.node, self.version, i)
1060 fp.write(e)
1062 fp.write(e)
1061
1063
1062 # if we don't call rename, the temp file will never replace the
1064 # if we don't call rename, the temp file will never replace the
1063 # real index
1065 # real index
1064 fp.rename()
1066 fp.rename()
1065
1067
1066 tr.replace(self.indexfile, trindex * self._io.size)
1068 tr.replace(self.indexfile, trindex * self._io.size)
1067 self._chunkclear()
1069 self._chunkclear()
1068
1070
1069 def addrevision(self, text, transaction, link, p1, p2, d=None):
1071 def addrevision(self, text, transaction, link, p1, p2, d=None):
1070 """add a revision to the log
1072 """add a revision to the log
1071
1073
1072 text - the revision data to add
1074 text - the revision data to add
1073 transaction - the transaction object used for rollback
1075 transaction - the transaction object used for rollback
1074 link - the linkrev data to add
1076 link - the linkrev data to add
1075 p1, p2 - the parent nodeids of the revision
1077 p1, p2 - the parent nodeids of the revision
1076 d - an optional precomputed delta
1078 d - an optional precomputed delta
1077 """
1079 """
1078 dfh = None
1080 dfh = None
1079 if not self._inline:
1081 if not self._inline:
1080 dfh = self.opener(self.datafile, "a")
1082 dfh = self.opener(self.datafile, "a")
1081 ifh = self.opener(self.indexfile, "a+")
1083 ifh = self.opener(self.indexfile, "a+")
1082 try:
1084 try:
1083 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1085 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1084 finally:
1086 finally:
1085 if dfh:
1087 if dfh:
1086 dfh.close()
1088 dfh.close()
1087 ifh.close()
1089 ifh.close()
1088
1090
1089 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1091 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1090 node = hash(text, p1, p2)
1092 node = hash(text, p1, p2)
1091 if node in self.nodemap:
1093 if node in self.nodemap:
1092 return node
1094 return node
1093
1095
1094 curr = len(self)
1096 curr = len(self)
1095 prev = curr - 1
1097 prev = curr - 1
1096 base = self.base(prev)
1098 base = self.base(prev)
1097 offset = self.end(prev)
1099 offset = self.end(prev)
1098
1100
1099 if curr:
1101 if curr:
1100 if not d:
1102 if not d:
1101 ptext = self.revision(self.node(prev))
1103 ptext = self.revision(self.node(prev))
1102 d = mdiff.textdiff(ptext, text)
1104 d = mdiff.textdiff(ptext, text)
1103 data = compress(d)
1105 data = compress(d)
1104 l = len(data[1]) + len(data[0])
1106 l = len(data[1]) + len(data[0])
1105 dist = l + offset - self.start(base)
1107 dist = l + offset - self.start(base)
1106
1108
1107 # full versions are inserted when the needed deltas
1109 # full versions are inserted when the needed deltas
1108 # become comparable to the uncompressed text
1110 # become comparable to the uncompressed text
1109 if not curr or dist > len(text) * 2:
1111 if not curr or dist > len(text) * 2:
1110 data = compress(text)
1112 data = compress(text)
1111 l = len(data[1]) + len(data[0])
1113 l = len(data[1]) + len(data[0])
1112 base = curr
1114 base = curr
1113
1115
1114 e = (offset_type(offset, 0), l, len(text),
1116 e = (offset_type(offset, 0), l, len(text),
1115 base, link, self.rev(p1), self.rev(p2), node)
1117 base, link, self.rev(p1), self.rev(p2), node)
1116 self.index.insert(-1, e)
1118 self.index.insert(-1, e)
1117 self.nodemap[node] = curr
1119 self.nodemap[node] = curr
1118
1120
1119 entry = self._io.packentry(e, self.node, self.version, curr)
1121 entry = self._io.packentry(e, self.node, self.version, curr)
1120 if not self._inline:
1122 if not self._inline:
1121 transaction.add(self.datafile, offset)
1123 transaction.add(self.datafile, offset)
1122 transaction.add(self.indexfile, curr * len(entry))
1124 transaction.add(self.indexfile, curr * len(entry))
1123 if data[0]:
1125 if data[0]:
1124 dfh.write(data[0])
1126 dfh.write(data[0])
1125 dfh.write(data[1])
1127 dfh.write(data[1])
1126 dfh.flush()
1128 dfh.flush()
1127 ifh.write(entry)
1129 ifh.write(entry)
1128 else:
1130 else:
1129 offset += curr * self._io.size
1131 offset += curr * self._io.size
1130 transaction.add(self.indexfile, offset, curr)
1132 transaction.add(self.indexfile, offset, curr)
1131 ifh.write(entry)
1133 ifh.write(entry)
1132 ifh.write(data[0])
1134 ifh.write(data[0])
1133 ifh.write(data[1])
1135 ifh.write(data[1])
1134 self.checkinlinesize(transaction, ifh)
1136 self.checkinlinesize(transaction, ifh)
1135
1137
1136 if type(text) == str: # only accept immutable objects
1138 if type(text) == str: # only accept immutable objects
1137 self._cache = (node, curr, text)
1139 self._cache = (node, curr, text)
1138 return node
1140 return node
1139
1141
1140 def descendant(self, start, end):
1142 def descendant(self, start, end):
1141 for i in self.descendants(start):
1143 for i in self.descendants(start):
1142 if i == end:
1144 if i == end:
1143 return True
1145 return True
1144 elif i > end:
1146 elif i > end:
1145 break
1147 break
1146 return False
1148 return False
1147
1149
1148 def ancestor(self, a, b):
1150 def ancestor(self, a, b):
1149 """calculate the least common ancestor of nodes a and b"""
1151 """calculate the least common ancestor of nodes a and b"""
1150
1152
1151 # fast path, check if it is a descendant
1153 # fast path, check if it is a descendant
1152 a, b = self.rev(a), self.rev(b)
1154 a, b = self.rev(a), self.rev(b)
1153 start, end = sorted((a, b))
1155 start, end = sorted((a, b))
1154 if self.descendant(start, end):
1156 if self.descendant(start, end):
1155 return self.node(start)
1157 return self.node(start)
1156
1158
1157 def parents(rev):
1159 def parents(rev):
1158 return [p for p in self.parentrevs(rev) if p != nullrev]
1160 return [p for p in self.parentrevs(rev) if p != nullrev]
1159
1161
1160 c = ancestor.ancestor(a, b, parents)
1162 c = ancestor.ancestor(a, b, parents)
1161 if c is None:
1163 if c is None:
1162 return nullid
1164 return nullid
1163
1165
1164 return self.node(c)
1166 return self.node(c)
1165
1167
1166 def group(self, nodelist, lookup, infocollect=None):
1168 def group(self, nodelist, lookup, infocollect=None):
1167 """Calculate a delta group, yielding a sequence of changegroup chunks
1169 """Calculate a delta group, yielding a sequence of changegroup chunks
1168 (strings).
1170 (strings).
1169
1171
1170 Given a list of changeset revs, return a set of deltas and
1172 Given a list of changeset revs, return a set of deltas and
1171 metadata corresponding to nodes. the first delta is
1173 metadata corresponding to nodes. the first delta is
1172 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1174 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1173 have this parent as it has all history before these
1175 have this parent as it has all history before these
1174 changesets. parent is parent[0]
1176 changesets. parent is parent[0]
1175 """
1177 """
1176
1178
1177 revs = [self.rev(n) for n in nodelist]
1179 revs = [self.rev(n) for n in nodelist]
1178
1180
1179 # if we don't have any revisions touched by these changesets, bail
1181 # if we don't have any revisions touched by these changesets, bail
1180 if not revs:
1182 if not revs:
1181 yield changegroup.closechunk()
1183 yield changegroup.closechunk()
1182 return
1184 return
1183
1185
1184 # add the parent of the first rev
1186 # add the parent of the first rev
1185 p = self.parentrevs(revs[0])[0]
1187 p = self.parentrevs(revs[0])[0]
1186 revs.insert(0, p)
1188 revs.insert(0, p)
1187
1189
1188 # build deltas
1190 # build deltas
1189 for d in xrange(len(revs) - 1):
1191 for d in xrange(len(revs) - 1):
1190 a, b = revs[d], revs[d + 1]
1192 a, b = revs[d], revs[d + 1]
1191 nb = self.node(b)
1193 nb = self.node(b)
1192
1194
1193 if infocollect is not None:
1195 if infocollect is not None:
1194 infocollect(nb)
1196 infocollect(nb)
1195
1197
1196 p = self.parents(nb)
1198 p = self.parents(nb)
1197 meta = nb + p[0] + p[1] + lookup(nb)
1199 meta = nb + p[0] + p[1] + lookup(nb)
1198 if a == -1:
1200 if a == -1:
1199 d = self.revision(nb)
1201 d = self.revision(nb)
1200 meta += mdiff.trivialdiffheader(len(d))
1202 meta += mdiff.trivialdiffheader(len(d))
1201 else:
1203 else:
1202 d = self.revdiff(a, b)
1204 d = self.revdiff(a, b)
1203 yield changegroup.chunkheader(len(meta) + len(d))
1205 yield changegroup.chunkheader(len(meta) + len(d))
1204 yield meta
1206 yield meta
1205 if len(d) > 2**20:
1207 if len(d) > 2**20:
1206 pos = 0
1208 pos = 0
1207 while pos < len(d):
1209 while pos < len(d):
1208 pos2 = pos + 2 ** 18
1210 pos2 = pos + 2 ** 18
1209 yield d[pos:pos2]
1211 yield d[pos:pos2]
1210 pos = pos2
1212 pos = pos2
1211 else:
1213 else:
1212 yield d
1214 yield d
1213
1215
1214 yield changegroup.closechunk()
1216 yield changegroup.closechunk()
1215
1217
1216 def addgroup(self, revs, linkmapper, transaction):
1218 def addgroup(self, revs, linkmapper, transaction):
1217 """
1219 """
1218 add a delta group
1220 add a delta group
1219
1221
1220 given a set of deltas, add them to the revision log. the
1222 given a set of deltas, add them to the revision log. the
1221 first delta is against its parent, which should be in our
1223 first delta is against its parent, which should be in our
1222 log, the rest are against the previous delta.
1224 log, the rest are against the previous delta.
1223 """
1225 """
1224
1226
1225 #track the base of the current delta log
1227 #track the base of the current delta log
1226 r = len(self)
1228 r = len(self)
1227 t = r - 1
1229 t = r - 1
1228 node = None
1230 node = None
1229
1231
1230 base = prev = nullrev
1232 base = prev = nullrev
1231 start = end = textlen = 0
1233 start = end = textlen = 0
1232 if r:
1234 if r:
1233 end = self.end(t)
1235 end = self.end(t)
1234
1236
1235 ifh = self.opener(self.indexfile, "a+")
1237 ifh = self.opener(self.indexfile, "a+")
1236 isize = r * self._io.size
1238 isize = r * self._io.size
1237 if self._inline:
1239 if self._inline:
1238 transaction.add(self.indexfile, end + isize, r)
1240 transaction.add(self.indexfile, end + isize, r)
1239 dfh = None
1241 dfh = None
1240 else:
1242 else:
1241 transaction.add(self.indexfile, isize, r)
1243 transaction.add(self.indexfile, isize, r)
1242 transaction.add(self.datafile, end)
1244 transaction.add(self.datafile, end)
1243 dfh = self.opener(self.datafile, "a")
1245 dfh = self.opener(self.datafile, "a")
1244
1246
1245 try:
1247 try:
1246 # loop through our set of deltas
1248 # loop through our set of deltas
1247 chain = None
1249 chain = None
1248 for chunk in revs:
1250 for chunk in revs:
1249 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1251 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1250 link = linkmapper(cs)
1252 link = linkmapper(cs)
1251 if node in self.nodemap:
1253 if node in self.nodemap:
1252 # this can happen if two branches make the same change
1254 # this can happen if two branches make the same change
1253 chain = node
1255 chain = node
1254 continue
1256 continue
1255 delta = buffer(chunk, 80)
1257 delta = buffer(chunk, 80)
1256 del chunk
1258 del chunk
1257
1259
1258 for p in (p1, p2):
1260 for p in (p1, p2):
1259 if not p in self.nodemap:
1261 if not p in self.nodemap:
1260 raise LookupError(p, self.indexfile, _('unknown parent'))
1262 raise LookupError(p, self.indexfile, _('unknown parent'))
1261
1263
1262 if not chain:
1264 if not chain:
1263 # retrieve the parent revision of the delta chain
1265 # retrieve the parent revision of the delta chain
1264 chain = p1
1266 chain = p1
1265 if not chain in self.nodemap:
1267 if not chain in self.nodemap:
1266 raise LookupError(chain, self.indexfile, _('unknown base'))
1268 raise LookupError(chain, self.indexfile, _('unknown base'))
1267
1269
1268 # full versions are inserted when the needed deltas become
1270 # full versions are inserted when the needed deltas become
1269 # comparable to the uncompressed text or when the previous
1271 # comparable to the uncompressed text or when the previous
1270 # version is not the one we have a delta against. We use
1272 # version is not the one we have a delta against. We use
1271 # the size of the previous full rev as a proxy for the
1273 # the size of the previous full rev as a proxy for the
1272 # current size.
1274 # current size.
1273
1275
1274 if chain == prev:
1276 if chain == prev:
1275 cdelta = compress(delta)
1277 cdelta = compress(delta)
1276 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1278 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1277 textlen = mdiff.patchedsize(textlen, delta)
1279 textlen = mdiff.patchedsize(textlen, delta)
1278
1280
1279 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1281 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1280 # flush our writes here so we can read it in revision
1282 # flush our writes here so we can read it in revision
1281 if dfh:
1283 if dfh:
1282 dfh.flush()
1284 dfh.flush()
1283 ifh.flush()
1285 ifh.flush()
1284 text = self.revision(chain)
1286 text = self.revision(chain)
1285 if len(text) == 0:
1287 if len(text) == 0:
1286 # skip over trivial delta header
1288 # skip over trivial delta header
1287 text = buffer(delta, 12)
1289 text = buffer(delta, 12)
1288 else:
1290 else:
1289 text = mdiff.patches(text, [delta])
1291 text = mdiff.patches(text, [delta])
1290 del delta
1292 del delta
1291 chk = self._addrevision(text, transaction, link, p1, p2, None,
1293 chk = self._addrevision(text, transaction, link, p1, p2, None,
1292 ifh, dfh)
1294 ifh, dfh)
1293 if not dfh and not self._inline:
1295 if not dfh and not self._inline:
1294 # addrevision switched from inline to conventional
1296 # addrevision switched from inline to conventional
1295 # reopen the index
1297 # reopen the index
1296 dfh = self.opener(self.datafile, "a")
1298 dfh = self.opener(self.datafile, "a")
1297 ifh = self.opener(self.indexfile, "a")
1299 ifh = self.opener(self.indexfile, "a")
1298 if chk != node:
1300 if chk != node:
1299 raise RevlogError(_("consistency error adding group"))
1301 raise RevlogError(_("consistency error adding group"))
1300 textlen = len(text)
1302 textlen = len(text)
1301 else:
1303 else:
1302 e = (offset_type(end, 0), cdeltalen, textlen, base,
1304 e = (offset_type(end, 0), cdeltalen, textlen, base,
1303 link, self.rev(p1), self.rev(p2), node)
1305 link, self.rev(p1), self.rev(p2), node)
1304 self.index.insert(-1, e)
1306 self.index.insert(-1, e)
1305 self.nodemap[node] = r
1307 self.nodemap[node] = r
1306 entry = self._io.packentry(e, self.node, self.version, r)
1308 entry = self._io.packentry(e, self.node, self.version, r)
1307 if self._inline:
1309 if self._inline:
1308 ifh.write(entry)
1310 ifh.write(entry)
1309 ifh.write(cdelta[0])
1311 ifh.write(cdelta[0])
1310 ifh.write(cdelta[1])
1312 ifh.write(cdelta[1])
1311 self.checkinlinesize(transaction, ifh)
1313 self.checkinlinesize(transaction, ifh)
1312 if not self._inline:
1314 if not self._inline:
1313 dfh = self.opener(self.datafile, "a")
1315 dfh = self.opener(self.datafile, "a")
1314 ifh = self.opener(self.indexfile, "a")
1316 ifh = self.opener(self.indexfile, "a")
1315 else:
1317 else:
1316 dfh.write(cdelta[0])
1318 dfh.write(cdelta[0])
1317 dfh.write(cdelta[1])
1319 dfh.write(cdelta[1])
1318 ifh.write(entry)
1320 ifh.write(entry)
1319
1321
1320 t, r, chain, prev = r, r + 1, node, node
1322 t, r, chain, prev = r, r + 1, node, node
1321 base = self.base(t)
1323 base = self.base(t)
1322 start = self.start(base)
1324 start = self.start(base)
1323 end = self.end(t)
1325 end = self.end(t)
1324 finally:
1326 finally:
1325 if dfh:
1327 if dfh:
1326 dfh.close()
1328 dfh.close()
1327 ifh.close()
1329 ifh.close()
1328
1330
1329 return node
1331 return node
1330
1332
1331 def strip(self, minlink, transaction):
1333 def strip(self, minlink, transaction):
1332 """truncate the revlog on the first revision with a linkrev >= minlink
1334 """truncate the revlog on the first revision with a linkrev >= minlink
1333
1335
1334 This function is called when we're stripping revision minlink and
1336 This function is called when we're stripping revision minlink and
1335 its descendants from the repository.
1337 its descendants from the repository.
1336
1338
1337 We have to remove all revisions with linkrev >= minlink, because
1339 We have to remove all revisions with linkrev >= minlink, because
1338 the equivalent changelog revisions will be renumbered after the
1340 the equivalent changelog revisions will be renumbered after the
1339 strip.
1341 strip.
1340
1342
1341 So we truncate the revlog on the first of these revisions, and
1343 So we truncate the revlog on the first of these revisions, and
1342 trust that the caller has saved the revisions that shouldn't be
1344 trust that the caller has saved the revisions that shouldn't be
1343 removed and that it'll readd them after this truncation.
1345 removed and that it'll readd them after this truncation.
1344 """
1346 """
1345 if len(self) == 0:
1347 if len(self) == 0:
1346 return
1348 return
1347
1349
1348 if isinstance(self.index, lazyindex):
1350 if isinstance(self.index, lazyindex):
1349 self._loadindexmap()
1351 self._loadindexmap()
1350
1352
1351 for rev in self:
1353 for rev in self:
1352 if self.index[rev][4] >= minlink:
1354 if self.index[rev][4] >= minlink:
1353 break
1355 break
1354 else:
1356 else:
1355 return
1357 return
1356
1358
1357 # first truncate the files on disk
1359 # first truncate the files on disk
1358 end = self.start(rev)
1360 end = self.start(rev)
1359 if not self._inline:
1361 if not self._inline:
1360 transaction.add(self.datafile, end)
1362 transaction.add(self.datafile, end)
1361 end = rev * self._io.size
1363 end = rev * self._io.size
1362 else:
1364 else:
1363 end += rev * self._io.size
1365 end += rev * self._io.size
1364
1366
1365 transaction.add(self.indexfile, end)
1367 transaction.add(self.indexfile, end)
1366
1368
1367 # then reset internal state in memory to forget those revisions
1369 # then reset internal state in memory to forget those revisions
1368 self._cache = None
1370 self._cache = None
1369 self._chunkclear()
1371 self._chunkclear()
1370 for x in xrange(rev, len(self)):
1372 for x in xrange(rev, len(self)):
1371 del self.nodemap[self.node(x)]
1373 del self.nodemap[self.node(x)]
1372
1374
1373 del self.index[rev:-1]
1375 del self.index[rev:-1]
1374
1376
1375 def checksize(self):
1377 def checksize(self):
1376 expected = 0
1378 expected = 0
1377 if len(self):
1379 if len(self):
1378 expected = max(0, self.end(len(self) - 1))
1380 expected = max(0, self.end(len(self) - 1))
1379
1381
1380 try:
1382 try:
1381 f = self.opener(self.datafile)
1383 f = self.opener(self.datafile)
1382 f.seek(0, 2)
1384 f.seek(0, 2)
1383 actual = f.tell()
1385 actual = f.tell()
1384 dd = actual - expected
1386 dd = actual - expected
1385 except IOError, inst:
1387 except IOError, inst:
1386 if inst.errno != errno.ENOENT:
1388 if inst.errno != errno.ENOENT:
1387 raise
1389 raise
1388 dd = 0
1390 dd = 0
1389
1391
1390 try:
1392 try:
1391 f = self.opener(self.indexfile)
1393 f = self.opener(self.indexfile)
1392 f.seek(0, 2)
1394 f.seek(0, 2)
1393 actual = f.tell()
1395 actual = f.tell()
1394 s = self._io.size
1396 s = self._io.size
1395 i = max(0, actual // s)
1397 i = max(0, actual // s)
1396 di = actual - (i * s)
1398 di = actual - (i * s)
1397 if self._inline:
1399 if self._inline:
1398 databytes = 0
1400 databytes = 0
1399 for r in self:
1401 for r in self:
1400 databytes += max(0, self.length(r))
1402 databytes += max(0, self.length(r))
1401 dd = 0
1403 dd = 0
1402 di = actual - len(self) * s - databytes
1404 di = actual - len(self) * s - databytes
1403 except IOError, inst:
1405 except IOError, inst:
1404 if inst.errno != errno.ENOENT:
1406 if inst.errno != errno.ENOENT:
1405 raise
1407 raise
1406 di = 0
1408 di = 0
1407
1409
1408 return (dd, di)
1410 return (dd, di)
1409
1411
1410 def files(self):
1412 def files(self):
1411 res = [self.indexfile]
1413 res = [self.indexfile]
1412 if not self._inline:
1414 if not self._inline:
1413 res.append(self.datafile)
1415 res.append(self.datafile)
1414 return res
1416 return res
General Comments 0
You need to be logged in to leave comments. Login now