##// END OF EJS Templates
revlog: let nodestotag be a set instead of a list
Martin Geisler -
r8153:616f20e1 default
parent child Browse files
Show More
@@ -1,1369 +1,1369 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 # import stuff from node for others to import from revlog
13 # import stuff from node for others to import from revlog
14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
14 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from i18n import _
15 from i18n import _
16 import changegroup, errno, ancestor, mdiff, parsers
16 import changegroup, errno, ancestor, mdiff, parsers
17 import struct, util, zlib, error
17 import struct, util, zlib, error
18
18
19 _pack = struct.pack
19 _pack = struct.pack
20 _unpack = struct.unpack
20 _unpack = struct.unpack
21 _compress = zlib.compress
21 _compress = zlib.compress
22 _decompress = zlib.decompress
22 _decompress = zlib.decompress
23 _sha = util.sha1
23 _sha = util.sha1
24
24
25 # revlog flags
25 # revlog flags
26 REVLOGV0 = 0
26 REVLOGV0 = 0
27 REVLOGNG = 1
27 REVLOGNG = 1
28 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
30 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_FORMAT = REVLOGNG
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
32
32
33 RevlogError = error.RevlogError
33 RevlogError = error.RevlogError
34 LookupError = error.LookupError
34 LookupError = error.LookupError
35
35
36 def getoffset(q):
36 def getoffset(q):
37 return int(q >> 16)
37 return int(q >> 16)
38
38
39 def gettype(q):
39 def gettype(q):
40 return int(q & 0xFFFF)
40 return int(q & 0xFFFF)
41
41
42 def offset_type(offset, type):
42 def offset_type(offset, type):
43 return long(long(offset) << 16 | type)
43 return long(long(offset) << 16 | type)
44
44
45 nullhash = _sha(nullid)
45 nullhash = _sha(nullid)
46
46
47 def hash(text, p1, p2):
47 def hash(text, p1, p2):
48 """generate a hash from the given text and its parent hashes
48 """generate a hash from the given text and its parent hashes
49
49
50 This hash combines both the current file contents and its history
50 This hash combines both the current file contents and its history
51 in a manner that makes it easy to distinguish nodes with the same
51 in a manner that makes it easy to distinguish nodes with the same
52 content in the revision graph.
52 content in the revision graph.
53 """
53 """
54 # As of now, if one of the parent node is null, p2 is null
54 # As of now, if one of the parent node is null, p2 is null
55 if p2 == nullid:
55 if p2 == nullid:
56 # deep copy of a hash is faster than creating one
56 # deep copy of a hash is faster than creating one
57 s = nullhash.copy()
57 s = nullhash.copy()
58 s.update(p1)
58 s.update(p1)
59 else:
59 else:
60 # none of the parent nodes are nullid
60 # none of the parent nodes are nullid
61 l = [p1, p2]
61 l = [p1, p2]
62 l.sort()
62 l.sort()
63 s = _sha(l[0])
63 s = _sha(l[0])
64 s.update(l[1])
64 s.update(l[1])
65 s.update(text)
65 s.update(text)
66 return s.digest()
66 return s.digest()
67
67
68 def compress(text):
68 def compress(text):
69 """ generate a possibly-compressed representation of text """
69 """ generate a possibly-compressed representation of text """
70 if not text:
70 if not text:
71 return ("", text)
71 return ("", text)
72 l = len(text)
72 l = len(text)
73 bin = None
73 bin = None
74 if l < 44:
74 if l < 44:
75 pass
75 pass
76 elif l > 1000000:
76 elif l > 1000000:
77 # zlib makes an internal copy, thus doubling memory usage for
77 # zlib makes an internal copy, thus doubling memory usage for
78 # large files, so lets do this in pieces
78 # large files, so lets do this in pieces
79 z = zlib.compressobj()
79 z = zlib.compressobj()
80 p = []
80 p = []
81 pos = 0
81 pos = 0
82 while pos < l:
82 while pos < l:
83 pos2 = pos + 2**20
83 pos2 = pos + 2**20
84 p.append(z.compress(text[pos:pos2]))
84 p.append(z.compress(text[pos:pos2]))
85 pos = pos2
85 pos = pos2
86 p.append(z.flush())
86 p.append(z.flush())
87 if sum(map(len, p)) < l:
87 if sum(map(len, p)) < l:
88 bin = "".join(p)
88 bin = "".join(p)
89 else:
89 else:
90 bin = _compress(text)
90 bin = _compress(text)
91 if bin is None or len(bin) > l:
91 if bin is None or len(bin) > l:
92 if text[0] == '\0':
92 if text[0] == '\0':
93 return ("", text)
93 return ("", text)
94 return ('u', text)
94 return ('u', text)
95 return ("", bin)
95 return ("", bin)
96
96
97 def decompress(bin):
97 def decompress(bin):
98 """ decompress the given input """
98 """ decompress the given input """
99 if not bin:
99 if not bin:
100 return bin
100 return bin
101 t = bin[0]
101 t = bin[0]
102 if t == '\0':
102 if t == '\0':
103 return bin
103 return bin
104 if t == 'x':
104 if t == 'x':
105 return _decompress(bin)
105 return _decompress(bin)
106 if t == 'u':
106 if t == 'u':
107 return bin[1:]
107 return bin[1:]
108 raise RevlogError(_("unknown compression type %r") % t)
108 raise RevlogError(_("unknown compression type %r") % t)
109
109
110 class lazyparser(object):
110 class lazyparser(object):
111 """
111 """
112 this class avoids the need to parse the entirety of large indices
112 this class avoids the need to parse the entirety of large indices
113 """
113 """
114
114
115 # lazyparser is not safe to use on windows if win32 extensions not
115 # lazyparser is not safe to use on windows if win32 extensions not
116 # available. it keeps file handle open, which make it not possible
116 # available. it keeps file handle open, which make it not possible
117 # to break hardlinks on local cloned repos.
117 # to break hardlinks on local cloned repos.
118
118
119 def __init__(self, dataf, size):
119 def __init__(self, dataf, size):
120 self.dataf = dataf
120 self.dataf = dataf
121 self.s = struct.calcsize(indexformatng)
121 self.s = struct.calcsize(indexformatng)
122 self.datasize = size
122 self.datasize = size
123 self.l = size/self.s
123 self.l = size/self.s
124 self.index = [None] * self.l
124 self.index = [None] * self.l
125 self.map = {nullid: nullrev}
125 self.map = {nullid: nullrev}
126 self.allmap = 0
126 self.allmap = 0
127 self.all = 0
127 self.all = 0
128 self.mapfind_count = 0
128 self.mapfind_count = 0
129
129
130 def loadmap(self):
130 def loadmap(self):
131 """
131 """
132 during a commit, we need to make sure the rev being added is
132 during a commit, we need to make sure the rev being added is
133 not a duplicate. This requires loading the entire index,
133 not a duplicate. This requires loading the entire index,
134 which is fairly slow. loadmap can load up just the node map,
134 which is fairly slow. loadmap can load up just the node map,
135 which takes much less time.
135 which takes much less time.
136 """
136 """
137 if self.allmap:
137 if self.allmap:
138 return
138 return
139 end = self.datasize
139 end = self.datasize
140 self.allmap = 1
140 self.allmap = 1
141 cur = 0
141 cur = 0
142 count = 0
142 count = 0
143 blocksize = self.s * 256
143 blocksize = self.s * 256
144 self.dataf.seek(0)
144 self.dataf.seek(0)
145 while cur < end:
145 while cur < end:
146 data = self.dataf.read(blocksize)
146 data = self.dataf.read(blocksize)
147 off = 0
147 off = 0
148 for x in xrange(256):
148 for x in xrange(256):
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
149 n = data[off + ngshaoffset:off + ngshaoffset + 20]
150 self.map[n] = count
150 self.map[n] = count
151 count += 1
151 count += 1
152 if count >= self.l:
152 if count >= self.l:
153 break
153 break
154 off += self.s
154 off += self.s
155 cur += blocksize
155 cur += blocksize
156
156
157 def loadblock(self, blockstart, blocksize, data=None):
157 def loadblock(self, blockstart, blocksize, data=None):
158 if self.all:
158 if self.all:
159 return
159 return
160 if data is None:
160 if data is None:
161 self.dataf.seek(blockstart)
161 self.dataf.seek(blockstart)
162 if blockstart + blocksize > self.datasize:
162 if blockstart + blocksize > self.datasize:
163 # the revlog may have grown since we've started running,
163 # the revlog may have grown since we've started running,
164 # but we don't have space in self.index for more entries.
164 # but we don't have space in self.index for more entries.
165 # limit blocksize so that we don't get too much data.
165 # limit blocksize so that we don't get too much data.
166 blocksize = max(self.datasize - blockstart, 0)
166 blocksize = max(self.datasize - blockstart, 0)
167 data = self.dataf.read(blocksize)
167 data = self.dataf.read(blocksize)
168 lend = len(data) / self.s
168 lend = len(data) / self.s
169 i = blockstart / self.s
169 i = blockstart / self.s
170 off = 0
170 off = 0
171 # lazyindex supports __delitem__
171 # lazyindex supports __delitem__
172 if lend > len(self.index) - i:
172 if lend > len(self.index) - i:
173 lend = len(self.index) - i
173 lend = len(self.index) - i
174 for x in xrange(lend):
174 for x in xrange(lend):
175 if self.index[i + x] == None:
175 if self.index[i + x] == None:
176 b = data[off : off + self.s]
176 b = data[off : off + self.s]
177 self.index[i + x] = b
177 self.index[i + x] = b
178 n = b[ngshaoffset:ngshaoffset + 20]
178 n = b[ngshaoffset:ngshaoffset + 20]
179 self.map[n] = i + x
179 self.map[n] = i + x
180 off += self.s
180 off += self.s
181
181
182 def findnode(self, node):
182 def findnode(self, node):
183 """search backwards through the index file for a specific node"""
183 """search backwards through the index file for a specific node"""
184 if self.allmap:
184 if self.allmap:
185 return None
185 return None
186
186
187 # hg log will cause many many searches for the manifest
187 # hg log will cause many many searches for the manifest
188 # nodes. After we get called a few times, just load the whole
188 # nodes. After we get called a few times, just load the whole
189 # thing.
189 # thing.
190 if self.mapfind_count > 8:
190 if self.mapfind_count > 8:
191 self.loadmap()
191 self.loadmap()
192 if node in self.map:
192 if node in self.map:
193 return node
193 return node
194 return None
194 return None
195 self.mapfind_count += 1
195 self.mapfind_count += 1
196 last = self.l - 1
196 last = self.l - 1
197 while self.index[last] != None:
197 while self.index[last] != None:
198 if last == 0:
198 if last == 0:
199 self.all = 1
199 self.all = 1
200 self.allmap = 1
200 self.allmap = 1
201 return None
201 return None
202 last -= 1
202 last -= 1
203 end = (last + 1) * self.s
203 end = (last + 1) * self.s
204 blocksize = self.s * 256
204 blocksize = self.s * 256
205 while end >= 0:
205 while end >= 0:
206 start = max(end - blocksize, 0)
206 start = max(end - blocksize, 0)
207 self.dataf.seek(start)
207 self.dataf.seek(start)
208 data = self.dataf.read(end - start)
208 data = self.dataf.read(end - start)
209 findend = end - start
209 findend = end - start
210 while True:
210 while True:
211 # we're searching backwards, so we have to make sure
211 # we're searching backwards, so we have to make sure
212 # we don't find a changeset where this node is a parent
212 # we don't find a changeset where this node is a parent
213 off = data.find(node, 0, findend)
213 off = data.find(node, 0, findend)
214 findend = off
214 findend = off
215 if off >= 0:
215 if off >= 0:
216 i = off / self.s
216 i = off / self.s
217 off = i * self.s
217 off = i * self.s
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
218 n = data[off + ngshaoffset:off + ngshaoffset + 20]
219 if n == node:
219 if n == node:
220 self.map[n] = i + start / self.s
220 self.map[n] = i + start / self.s
221 return node
221 return node
222 else:
222 else:
223 break
223 break
224 end -= blocksize
224 end -= blocksize
225 return None
225 return None
226
226
227 def loadindex(self, i=None, end=None):
227 def loadindex(self, i=None, end=None):
228 if self.all:
228 if self.all:
229 return
229 return
230 all = False
230 all = False
231 if i == None:
231 if i == None:
232 blockstart = 0
232 blockstart = 0
233 blocksize = (65536 / self.s) * self.s
233 blocksize = (65536 / self.s) * self.s
234 end = self.datasize
234 end = self.datasize
235 all = True
235 all = True
236 else:
236 else:
237 if end:
237 if end:
238 blockstart = i * self.s
238 blockstart = i * self.s
239 end = end * self.s
239 end = end * self.s
240 blocksize = end - blockstart
240 blocksize = end - blockstart
241 else:
241 else:
242 blockstart = (i & ~1023) * self.s
242 blockstart = (i & ~1023) * self.s
243 blocksize = self.s * 1024
243 blocksize = self.s * 1024
244 end = blockstart + blocksize
244 end = blockstart + blocksize
245 while blockstart < end:
245 while blockstart < end:
246 self.loadblock(blockstart, blocksize)
246 self.loadblock(blockstart, blocksize)
247 blockstart += blocksize
247 blockstart += blocksize
248 if all:
248 if all:
249 self.all = True
249 self.all = True
250
250
251 class lazyindex(object):
251 class lazyindex(object):
252 """a lazy version of the index array"""
252 """a lazy version of the index array"""
253 def __init__(self, parser):
253 def __init__(self, parser):
254 self.p = parser
254 self.p = parser
255 def __len__(self):
255 def __len__(self):
256 return len(self.p.index)
256 return len(self.p.index)
257 def load(self, pos):
257 def load(self, pos):
258 if pos < 0:
258 if pos < 0:
259 pos += len(self.p.index)
259 pos += len(self.p.index)
260 self.p.loadindex(pos)
260 self.p.loadindex(pos)
261 return self.p.index[pos]
261 return self.p.index[pos]
262 def __getitem__(self, pos):
262 def __getitem__(self, pos):
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
263 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
264 def __setitem__(self, pos, item):
264 def __setitem__(self, pos, item):
265 self.p.index[pos] = _pack(indexformatng, *item)
265 self.p.index[pos] = _pack(indexformatng, *item)
266 def __delitem__(self, pos):
266 def __delitem__(self, pos):
267 del self.p.index[pos]
267 del self.p.index[pos]
268 def insert(self, pos, e):
268 def insert(self, pos, e):
269 self.p.index.insert(pos, _pack(indexformatng, *e))
269 self.p.index.insert(pos, _pack(indexformatng, *e))
270 def append(self, e):
270 def append(self, e):
271 self.p.index.append(_pack(indexformatng, *e))
271 self.p.index.append(_pack(indexformatng, *e))
272
272
273 class lazymap(object):
273 class lazymap(object):
274 """a lazy version of the node map"""
274 """a lazy version of the node map"""
275 def __init__(self, parser):
275 def __init__(self, parser):
276 self.p = parser
276 self.p = parser
277 def load(self, key):
277 def load(self, key):
278 n = self.p.findnode(key)
278 n = self.p.findnode(key)
279 if n == None:
279 if n == None:
280 raise KeyError(key)
280 raise KeyError(key)
281 def __contains__(self, key):
281 def __contains__(self, key):
282 if key in self.p.map:
282 if key in self.p.map:
283 return True
283 return True
284 self.p.loadmap()
284 self.p.loadmap()
285 return key in self.p.map
285 return key in self.p.map
286 def __iter__(self):
286 def __iter__(self):
287 yield nullid
287 yield nullid
288 for i in xrange(self.p.l):
288 for i in xrange(self.p.l):
289 ret = self.p.index[i]
289 ret = self.p.index[i]
290 if not ret:
290 if not ret:
291 self.p.loadindex(i)
291 self.p.loadindex(i)
292 ret = self.p.index[i]
292 ret = self.p.index[i]
293 if isinstance(ret, str):
293 if isinstance(ret, str):
294 ret = _unpack(indexformatng, ret)
294 ret = _unpack(indexformatng, ret)
295 yield ret[7]
295 yield ret[7]
296 def __getitem__(self, key):
296 def __getitem__(self, key):
297 try:
297 try:
298 return self.p.map[key]
298 return self.p.map[key]
299 except KeyError:
299 except KeyError:
300 try:
300 try:
301 self.load(key)
301 self.load(key)
302 return self.p.map[key]
302 return self.p.map[key]
303 except KeyError:
303 except KeyError:
304 raise KeyError("node " + hex(key))
304 raise KeyError("node " + hex(key))
305 def __setitem__(self, key, val):
305 def __setitem__(self, key, val):
306 self.p.map[key] = val
306 self.p.map[key] = val
307 def __delitem__(self, key):
307 def __delitem__(self, key):
308 del self.p.map[key]
308 del self.p.map[key]
309
309
310 indexformatv0 = ">4l20s20s20s"
310 indexformatv0 = ">4l20s20s20s"
311 v0shaoffset = 56
311 v0shaoffset = 56
312
312
313 class revlogoldio(object):
313 class revlogoldio(object):
314 def __init__(self):
314 def __init__(self):
315 self.size = struct.calcsize(indexformatv0)
315 self.size = struct.calcsize(indexformatv0)
316
316
317 def parseindex(self, fp, inline):
317 def parseindex(self, fp, inline):
318 s = self.size
318 s = self.size
319 index = []
319 index = []
320 nodemap = {nullid: nullrev}
320 nodemap = {nullid: nullrev}
321 n = off = 0
321 n = off = 0
322 data = fp.read()
322 data = fp.read()
323 l = len(data)
323 l = len(data)
324 while off + s <= l:
324 while off + s <= l:
325 cur = data[off:off + s]
325 cur = data[off:off + s]
326 off += s
326 off += s
327 e = _unpack(indexformatv0, cur)
327 e = _unpack(indexformatv0, cur)
328 # transform to revlogv1 format
328 # transform to revlogv1 format
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
329 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
330 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
331 index.append(e2)
331 index.append(e2)
332 nodemap[e[6]] = n
332 nodemap[e[6]] = n
333 n += 1
333 n += 1
334
334
335 return index, nodemap, None
335 return index, nodemap, None
336
336
337 def packentry(self, entry, node, version, rev):
337 def packentry(self, entry, node, version, rev):
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
338 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
339 node(entry[5]), node(entry[6]), entry[7])
339 node(entry[5]), node(entry[6]), entry[7])
340 return _pack(indexformatv0, *e2)
340 return _pack(indexformatv0, *e2)
341
341
342 # index ng:
342 # index ng:
343 # 6 bytes offset
343 # 6 bytes offset
344 # 2 bytes flags
344 # 2 bytes flags
345 # 4 bytes compressed length
345 # 4 bytes compressed length
346 # 4 bytes uncompressed length
346 # 4 bytes uncompressed length
347 # 4 bytes: base rev
347 # 4 bytes: base rev
348 # 4 bytes link rev
348 # 4 bytes link rev
349 # 4 bytes parent 1 rev
349 # 4 bytes parent 1 rev
350 # 4 bytes parent 2 rev
350 # 4 bytes parent 2 rev
351 # 32 bytes: nodeid
351 # 32 bytes: nodeid
352 indexformatng = ">Qiiiiii20s12x"
352 indexformatng = ">Qiiiiii20s12x"
353 ngshaoffset = 32
353 ngshaoffset = 32
354 versionformat = ">I"
354 versionformat = ">I"
355
355
356 class revlogio(object):
356 class revlogio(object):
357 def __init__(self):
357 def __init__(self):
358 self.size = struct.calcsize(indexformatng)
358 self.size = struct.calcsize(indexformatng)
359
359
360 def parseindex(self, fp, inline):
360 def parseindex(self, fp, inline):
361 try:
361 try:
362 size = util.fstat(fp).st_size
362 size = util.fstat(fp).st_size
363 except AttributeError:
363 except AttributeError:
364 size = 0
364 size = 0
365
365
366 if util.openhardlinks() and not inline and size > 1000000:
366 if util.openhardlinks() and not inline and size > 1000000:
367 # big index, let's parse it on demand
367 # big index, let's parse it on demand
368 parser = lazyparser(fp, size)
368 parser = lazyparser(fp, size)
369 index = lazyindex(parser)
369 index = lazyindex(parser)
370 nodemap = lazymap(parser)
370 nodemap = lazymap(parser)
371 e = list(index[0])
371 e = list(index[0])
372 type = gettype(e[0])
372 type = gettype(e[0])
373 e[0] = offset_type(0, type)
373 e[0] = offset_type(0, type)
374 index[0] = e
374 index[0] = e
375 return index, nodemap, None
375 return index, nodemap, None
376
376
377 data = fp.read()
377 data = fp.read()
378 # call the C implementation to parse the index data
378 # call the C implementation to parse the index data
379 index, nodemap, cache = parsers.parse_index(data, inline)
379 index, nodemap, cache = parsers.parse_index(data, inline)
380 return index, nodemap, cache
380 return index, nodemap, cache
381
381
382 def packentry(self, entry, node, version, rev):
382 def packentry(self, entry, node, version, rev):
383 p = _pack(indexformatng, *entry)
383 p = _pack(indexformatng, *entry)
384 if rev == 0:
384 if rev == 0:
385 p = _pack(versionformat, version) + p[4:]
385 p = _pack(versionformat, version) + p[4:]
386 return p
386 return p
387
387
388 class revlog(object):
388 class revlog(object):
389 """
389 """
390 the underlying revision storage object
390 the underlying revision storage object
391
391
392 A revlog consists of two parts, an index and the revision data.
392 A revlog consists of two parts, an index and the revision data.
393
393
394 The index is a file with a fixed record size containing
394 The index is a file with a fixed record size containing
395 information on each revision, including its nodeid (hash), the
395 information on each revision, including its nodeid (hash), the
396 nodeids of its parents, the position and offset of its data within
396 nodeids of its parents, the position and offset of its data within
397 the data file, and the revision it's based on. Finally, each entry
397 the data file, and the revision it's based on. Finally, each entry
398 contains a linkrev entry that can serve as a pointer to external
398 contains a linkrev entry that can serve as a pointer to external
399 data.
399 data.
400
400
401 The revision data itself is a linear collection of data chunks.
401 The revision data itself is a linear collection of data chunks.
402 Each chunk represents a revision and is usually represented as a
402 Each chunk represents a revision and is usually represented as a
403 delta against the previous chunk. To bound lookup time, runs of
403 delta against the previous chunk. To bound lookup time, runs of
404 deltas are limited to about 2 times the length of the original
404 deltas are limited to about 2 times the length of the original
405 version data. This makes retrieval of a version proportional to
405 version data. This makes retrieval of a version proportional to
406 its size, or O(1) relative to the number of revisions.
406 its size, or O(1) relative to the number of revisions.
407
407
408 Both pieces of the revlog are written to in an append-only
408 Both pieces of the revlog are written to in an append-only
409 fashion, which means we never need to rewrite a file to insert or
409 fashion, which means we never need to rewrite a file to insert or
410 remove data, and can use some simple techniques to avoid the need
410 remove data, and can use some simple techniques to avoid the need
411 for locking while reading.
411 for locking while reading.
412 """
412 """
413 def __init__(self, opener, indexfile):
413 def __init__(self, opener, indexfile):
414 """
414 """
415 create a revlog object
415 create a revlog object
416
416
417 opener is a function that abstracts the file opening operation
417 opener is a function that abstracts the file opening operation
418 and can be used to implement COW semantics or the like.
418 and can be used to implement COW semantics or the like.
419 """
419 """
420 self.indexfile = indexfile
420 self.indexfile = indexfile
421 self.datafile = indexfile[:-2] + ".d"
421 self.datafile = indexfile[:-2] + ".d"
422 self.opener = opener
422 self.opener = opener
423 self._cache = None
423 self._cache = None
424 self._chunkcache = None
424 self._chunkcache = None
425 self.nodemap = {nullid: nullrev}
425 self.nodemap = {nullid: nullrev}
426 self.index = []
426 self.index = []
427
427
428 v = REVLOG_DEFAULT_VERSION
428 v = REVLOG_DEFAULT_VERSION
429 if hasattr(opener, "defversion"):
429 if hasattr(opener, "defversion"):
430 v = opener.defversion
430 v = opener.defversion
431 if v & REVLOGNG:
431 if v & REVLOGNG:
432 v |= REVLOGNGINLINEDATA
432 v |= REVLOGNGINLINEDATA
433
433
434 i = ""
434 i = ""
435 try:
435 try:
436 f = self.opener(self.indexfile)
436 f = self.opener(self.indexfile)
437 i = f.read(4)
437 i = f.read(4)
438 f.seek(0)
438 f.seek(0)
439 if len(i) > 0:
439 if len(i) > 0:
440 v = struct.unpack(versionformat, i)[0]
440 v = struct.unpack(versionformat, i)[0]
441 except IOError, inst:
441 except IOError, inst:
442 if inst.errno != errno.ENOENT:
442 if inst.errno != errno.ENOENT:
443 raise
443 raise
444
444
445 self.version = v
445 self.version = v
446 self._inline = v & REVLOGNGINLINEDATA
446 self._inline = v & REVLOGNGINLINEDATA
447 flags = v & ~0xFFFF
447 flags = v & ~0xFFFF
448 fmt = v & 0xFFFF
448 fmt = v & 0xFFFF
449 if fmt == REVLOGV0 and flags:
449 if fmt == REVLOGV0 and flags:
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
450 raise RevlogError(_("index %s unknown flags %#04x for format v0")
451 % (self.indexfile, flags >> 16))
451 % (self.indexfile, flags >> 16))
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
452 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
453 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
454 % (self.indexfile, flags >> 16))
454 % (self.indexfile, flags >> 16))
455 elif fmt > REVLOGNG:
455 elif fmt > REVLOGNG:
456 raise RevlogError(_("index %s unknown format %d")
456 raise RevlogError(_("index %s unknown format %d")
457 % (self.indexfile, fmt))
457 % (self.indexfile, fmt))
458
458
459 self._io = revlogio()
459 self._io = revlogio()
460 if self.version == REVLOGV0:
460 if self.version == REVLOGV0:
461 self._io = revlogoldio()
461 self._io = revlogoldio()
462 if i:
462 if i:
463 try:
463 try:
464 d = self._io.parseindex(f, self._inline)
464 d = self._io.parseindex(f, self._inline)
465 except (ValueError, IndexError), e:
465 except (ValueError, IndexError), e:
466 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
466 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
467 self.index, self.nodemap, self._chunkcache = d
467 self.index, self.nodemap, self._chunkcache = d
468
468
469 # add the magic null revision at -1 (if it hasn't been done already)
469 # add the magic null revision at -1 (if it hasn't been done already)
470 if (self.index == [] or isinstance(self.index, lazyindex) or
470 if (self.index == [] or isinstance(self.index, lazyindex) or
471 self.index[-1][7] != nullid) :
471 self.index[-1][7] != nullid) :
472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
472 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
473
473
474 def _loadindex(self, start, end):
474 def _loadindex(self, start, end):
475 """load a block of indexes all at once from the lazy parser"""
475 """load a block of indexes all at once from the lazy parser"""
476 if isinstance(self.index, lazyindex):
476 if isinstance(self.index, lazyindex):
477 self.index.p.loadindex(start, end)
477 self.index.p.loadindex(start, end)
478
478
479 def _loadindexmap(self):
479 def _loadindexmap(self):
480 """loads both the map and the index from the lazy parser"""
480 """loads both the map and the index from the lazy parser"""
481 if isinstance(self.index, lazyindex):
481 if isinstance(self.index, lazyindex):
482 p = self.index.p
482 p = self.index.p
483 p.loadindex()
483 p.loadindex()
484 self.nodemap = p.map
484 self.nodemap = p.map
485
485
486 def _loadmap(self):
486 def _loadmap(self):
487 """loads the map from the lazy parser"""
487 """loads the map from the lazy parser"""
488 if isinstance(self.nodemap, lazymap):
488 if isinstance(self.nodemap, lazymap):
489 self.nodemap.p.loadmap()
489 self.nodemap.p.loadmap()
490 self.nodemap = self.nodemap.p.map
490 self.nodemap = self.nodemap.p.map
491
491
492 def tip(self):
492 def tip(self):
493 return self.node(len(self.index) - 2)
493 return self.node(len(self.index) - 2)
494 def __len__(self):
494 def __len__(self):
495 return len(self.index) - 1
495 return len(self.index) - 1
496 def __iter__(self):
496 def __iter__(self):
497 for i in xrange(len(self)):
497 for i in xrange(len(self)):
498 yield i
498 yield i
499 def rev(self, node):
499 def rev(self, node):
500 try:
500 try:
501 return self.nodemap[node]
501 return self.nodemap[node]
502 except KeyError:
502 except KeyError:
503 raise LookupError(node, self.indexfile, _('no node'))
503 raise LookupError(node, self.indexfile, _('no node'))
504 def node(self, rev):
504 def node(self, rev):
505 return self.index[rev][7]
505 return self.index[rev][7]
506 def linkrev(self, rev):
506 def linkrev(self, rev):
507 return self.index[rev][4]
507 return self.index[rev][4]
508 def parents(self, node):
508 def parents(self, node):
509 i = self.index
509 i = self.index
510 d = i[self.rev(node)]
510 d = i[self.rev(node)]
511 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
511 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
512 def parentrevs(self, rev):
512 def parentrevs(self, rev):
513 return self.index[rev][5:7]
513 return self.index[rev][5:7]
514 def start(self, rev):
514 def start(self, rev):
515 return int(self.index[rev][0] >> 16)
515 return int(self.index[rev][0] >> 16)
516 def end(self, rev):
516 def end(self, rev):
517 return self.start(rev) + self.length(rev)
517 return self.start(rev) + self.length(rev)
518 def length(self, rev):
518 def length(self, rev):
519 return self.index[rev][1]
519 return self.index[rev][1]
520 def base(self, rev):
520 def base(self, rev):
521 return self.index[rev][3]
521 return self.index[rev][3]
522
522
523 def size(self, rev):
523 def size(self, rev):
524 """return the length of the uncompressed text for a given revision"""
524 """return the length of the uncompressed text for a given revision"""
525 l = self.index[rev][2]
525 l = self.index[rev][2]
526 if l >= 0:
526 if l >= 0:
527 return l
527 return l
528
528
529 t = self.revision(self.node(rev))
529 t = self.revision(self.node(rev))
530 return len(t)
530 return len(t)
531
531
532 # alternate implementation, The advantage to this code is it
532 # alternate implementation, The advantage to this code is it
533 # will be faster for a single revision. But, the results are not
533 # will be faster for a single revision. But, the results are not
534 # cached, so finding the size of every revision will be slower.
534 # cached, so finding the size of every revision will be slower.
535 """
535 """
536 if self.cache and self.cache[1] == rev:
536 if self.cache and self.cache[1] == rev:
537 return len(self.cache[2])
537 return len(self.cache[2])
538
538
539 base = self.base(rev)
539 base = self.base(rev)
540 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
540 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
541 base = self.cache[1]
541 base = self.cache[1]
542 text = self.cache[2]
542 text = self.cache[2]
543 else:
543 else:
544 text = self.revision(self.node(base))
544 text = self.revision(self.node(base))
545
545
546 l = len(text)
546 l = len(text)
547 for x in xrange(base + 1, rev + 1):
547 for x in xrange(base + 1, rev + 1):
548 l = mdiff.patchedsize(l, self.chunk(x))
548 l = mdiff.patchedsize(l, self.chunk(x))
549 return l
549 return l
550 """
550 """
551
551
552 def reachable(self, node, stop=None):
552 def reachable(self, node, stop=None):
553 """return a hash of all nodes ancestral to a given node, including
553 """return a hash of all nodes ancestral to a given node, including
554 the node itself, stopping when stop is matched"""
554 the node itself, stopping when stop is matched"""
555 reachable = {}
555 reachable = {}
556 visit = [node]
556 visit = [node]
557 reachable[node] = 1
557 reachable[node] = 1
558 if stop:
558 if stop:
559 stopn = self.rev(stop)
559 stopn = self.rev(stop)
560 else:
560 else:
561 stopn = 0
561 stopn = 0
562 while visit:
562 while visit:
563 n = visit.pop(0)
563 n = visit.pop(0)
564 if n == stop:
564 if n == stop:
565 continue
565 continue
566 if n == nullid:
566 if n == nullid:
567 continue
567 continue
568 for p in self.parents(n):
568 for p in self.parents(n):
569 if self.rev(p) < stopn:
569 if self.rev(p) < stopn:
570 continue
570 continue
571 if p not in reachable:
571 if p not in reachable:
572 reachable[p] = 1
572 reachable[p] = 1
573 visit.append(p)
573 visit.append(p)
574 return reachable
574 return reachable
575
575
576 def ancestors(self, *revs):
576 def ancestors(self, *revs):
577 'Generate the ancestors of revs using a breadth-first visit'
577 'Generate the ancestors of revs using a breadth-first visit'
578 visit = list(revs)
578 visit = list(revs)
579 seen = set([nullrev])
579 seen = set([nullrev])
580 while visit:
580 while visit:
581 for parent in self.parentrevs(visit.pop(0)):
581 for parent in self.parentrevs(visit.pop(0)):
582 if parent not in seen:
582 if parent not in seen:
583 visit.append(parent)
583 visit.append(parent)
584 seen.add(parent)
584 seen.add(parent)
585 yield parent
585 yield parent
586
586
587 def descendants(self, *revs):
587 def descendants(self, *revs):
588 'Generate the descendants of revs in topological order'
588 'Generate the descendants of revs in topological order'
589 seen = set(revs)
589 seen = set(revs)
590 for i in xrange(min(revs) + 1, len(self)):
590 for i in xrange(min(revs) + 1, len(self)):
591 for x in self.parentrevs(i):
591 for x in self.parentrevs(i):
592 if x != nullrev and x in seen:
592 if x != nullrev and x in seen:
593 seen.add(i)
593 seen.add(i)
594 yield i
594 yield i
595 break
595 break
596
596
597 def findmissing(self, common=None, heads=None):
597 def findmissing(self, common=None, heads=None):
598 '''
598 '''
599 returns the topologically sorted list of nodes from the set:
599 returns the topologically sorted list of nodes from the set:
600 missing = (ancestors(heads) \ ancestors(common))
600 missing = (ancestors(heads) \ ancestors(common))
601
601
602 where ancestors() is the set of ancestors from heads, heads included
602 where ancestors() is the set of ancestors from heads, heads included
603
603
604 if heads is None, the heads of the revlog are used
604 if heads is None, the heads of the revlog are used
605 if common is None, nullid is assumed to be a common node
605 if common is None, nullid is assumed to be a common node
606 '''
606 '''
607 if common is None:
607 if common is None:
608 common = [nullid]
608 common = [nullid]
609 if heads is None:
609 if heads is None:
610 heads = self.heads()
610 heads = self.heads()
611
611
612 common = [self.rev(n) for n in common]
612 common = [self.rev(n) for n in common]
613 heads = [self.rev(n) for n in heads]
613 heads = [self.rev(n) for n in heads]
614
614
615 # we want the ancestors, but inclusive
615 # we want the ancestors, but inclusive
616 has = set(self.ancestors(*common))
616 has = set(self.ancestors(*common))
617 has.add(nullrev)
617 has.add(nullrev)
618 has.update(common)
618 has.update(common)
619
619
620 # take all ancestors from heads that aren't in has
620 # take all ancestors from heads that aren't in has
621 missing = {}
621 missing = {}
622 visit = [r for r in heads if r not in has]
622 visit = [r for r in heads if r not in has]
623 while visit:
623 while visit:
624 r = visit.pop(0)
624 r = visit.pop(0)
625 if r in missing:
625 if r in missing:
626 continue
626 continue
627 else:
627 else:
628 missing[r] = None
628 missing[r] = None
629 for p in self.parentrevs(r):
629 for p in self.parentrevs(r):
630 if p not in has:
630 if p not in has:
631 visit.append(p)
631 visit.append(p)
632 missing = missing.keys()
632 missing = missing.keys()
633 missing.sort()
633 missing.sort()
634 return [self.node(r) for r in missing]
634 return [self.node(r) for r in missing]
635
635
636 def nodesbetween(self, roots=None, heads=None):
636 def nodesbetween(self, roots=None, heads=None):
637 """Return a tuple containing three elements. Elements 1 and 2 contain
637 """Return a tuple containing three elements. Elements 1 and 2 contain
638 a final list bases and heads after all the unreachable ones have been
638 a final list bases and heads after all the unreachable ones have been
639 pruned. Element 0 contains a topologically sorted list of all
639 pruned. Element 0 contains a topologically sorted list of all
640
640
641 nodes that satisfy these constraints:
641 nodes that satisfy these constraints:
642 1. All nodes must be descended from a node in roots (the nodes on
642 1. All nodes must be descended from a node in roots (the nodes on
643 roots are considered descended from themselves).
643 roots are considered descended from themselves).
644 2. All nodes must also be ancestors of a node in heads (the nodes in
644 2. All nodes must also be ancestors of a node in heads (the nodes in
645 heads are considered to be their own ancestors).
645 heads are considered to be their own ancestors).
646
646
647 If roots is unspecified, nullid is assumed as the only root.
647 If roots is unspecified, nullid is assumed as the only root.
648 If heads is unspecified, it is taken to be the output of the
648 If heads is unspecified, it is taken to be the output of the
649 heads method (i.e. a list of all nodes in the repository that
649 heads method (i.e. a list of all nodes in the repository that
650 have no children)."""
650 have no children)."""
651 nonodes = ([], [], [])
651 nonodes = ([], [], [])
652 if roots is not None:
652 if roots is not None:
653 roots = list(roots)
653 roots = list(roots)
654 if not roots:
654 if not roots:
655 return nonodes
655 return nonodes
656 lowestrev = min([self.rev(n) for n in roots])
656 lowestrev = min([self.rev(n) for n in roots])
657 else:
657 else:
658 roots = [nullid] # Everybody's a descendent of nullid
658 roots = [nullid] # Everybody's a descendent of nullid
659 lowestrev = nullrev
659 lowestrev = nullrev
660 if (lowestrev == nullrev) and (heads is None):
660 if (lowestrev == nullrev) and (heads is None):
661 # We want _all_ the nodes!
661 # We want _all_ the nodes!
662 return ([self.node(r) for r in self], [nullid], list(self.heads()))
662 return ([self.node(r) for r in self], [nullid], list(self.heads()))
663 if heads is None:
663 if heads is None:
664 # All nodes are ancestors, so the latest ancestor is the last
664 # All nodes are ancestors, so the latest ancestor is the last
665 # node.
665 # node.
666 highestrev = len(self) - 1
666 highestrev = len(self) - 1
667 # Set ancestors to None to signal that every node is an ancestor.
667 # Set ancestors to None to signal that every node is an ancestor.
668 ancestors = None
668 ancestors = None
669 # Set heads to an empty dictionary for later discovery of heads
669 # Set heads to an empty dictionary for later discovery of heads
670 heads = {}
670 heads = {}
671 else:
671 else:
672 heads = list(heads)
672 heads = list(heads)
673 if not heads:
673 if not heads:
674 return nonodes
674 return nonodes
675 ancestors = {}
675 ancestors = {}
676 # Turn heads into a dictionary so we can remove 'fake' heads.
676 # Turn heads into a dictionary so we can remove 'fake' heads.
677 # Also, later we will be using it to filter out the heads we can't
677 # Also, later we will be using it to filter out the heads we can't
678 # find from roots.
678 # find from roots.
679 heads = dict.fromkeys(heads, 0)
679 heads = dict.fromkeys(heads, 0)
680 # Start at the top and keep marking parents until we're done.
680 # Start at the top and keep marking parents until we're done.
681 nodestotag = heads.keys()
681 nodestotag = set(heads.keys())
682 # Remember where the top was so we can use it as a limit later.
682 # Remember where the top was so we can use it as a limit later.
683 highestrev = max([self.rev(n) for n in nodestotag])
683 highestrev = max([self.rev(n) for n in nodestotag])
684 while nodestotag:
684 while nodestotag:
685 # grab a node to tag
685 # grab a node to tag
686 n = nodestotag.pop()
686 n = nodestotag.pop()
687 # Never tag nullid
687 # Never tag nullid
688 if n == nullid:
688 if n == nullid:
689 continue
689 continue
690 # A node's revision number represents its place in a
690 # A node's revision number represents its place in a
691 # topologically sorted list of nodes.
691 # topologically sorted list of nodes.
692 r = self.rev(n)
692 r = self.rev(n)
693 if r >= lowestrev:
693 if r >= lowestrev:
694 if n not in ancestors:
694 if n not in ancestors:
695 # If we are possibly a descendent of one of the roots
695 # If we are possibly a descendent of one of the roots
696 # and we haven't already been marked as an ancestor
696 # and we haven't already been marked as an ancestor
697 ancestors[n] = 1 # Mark as ancestor
697 ancestors[n] = 1 # Mark as ancestor
698 # Add non-nullid parents to list of nodes to tag.
698 # Add non-nullid parents to list of nodes to tag.
699 nodestotag.extend([p for p in self.parents(n) if
699 nodestotag.update([p for p in self.parents(n) if
700 p != nullid])
700 p != nullid])
701 elif n in heads: # We've seen it before, is it a fake head?
701 elif n in heads: # We've seen it before, is it a fake head?
702 # So it is, real heads should not be the ancestors of
702 # So it is, real heads should not be the ancestors of
703 # any other heads.
703 # any other heads.
704 heads.pop(n)
704 heads.pop(n)
705 if not ancestors:
705 if not ancestors:
706 return nonodes
706 return nonodes
707 # Now that we have our set of ancestors, we want to remove any
707 # Now that we have our set of ancestors, we want to remove any
708 # roots that are not ancestors.
708 # roots that are not ancestors.
709
709
710 # If one of the roots was nullid, everything is included anyway.
710 # If one of the roots was nullid, everything is included anyway.
711 if lowestrev > nullrev:
711 if lowestrev > nullrev:
712 # But, since we weren't, let's recompute the lowest rev to not
712 # But, since we weren't, let's recompute the lowest rev to not
713 # include roots that aren't ancestors.
713 # include roots that aren't ancestors.
714
714
715 # Filter out roots that aren't ancestors of heads
715 # Filter out roots that aren't ancestors of heads
716 roots = [n for n in roots if n in ancestors]
716 roots = [n for n in roots if n in ancestors]
717 # Recompute the lowest revision
717 # Recompute the lowest revision
718 if roots:
718 if roots:
719 lowestrev = min([self.rev(n) for n in roots])
719 lowestrev = min([self.rev(n) for n in roots])
720 else:
720 else:
721 # No more roots? Return empty list
721 # No more roots? Return empty list
722 return nonodes
722 return nonodes
723 else:
723 else:
724 # We are descending from nullid, and don't need to care about
724 # We are descending from nullid, and don't need to care about
725 # any other roots.
725 # any other roots.
726 lowestrev = nullrev
726 lowestrev = nullrev
727 roots = [nullid]
727 roots = [nullid]
728 # Transform our roots list into a set.
728 # Transform our roots list into a set.
729 descendents = set(roots)
729 descendents = set(roots)
730 # Also, keep the original roots so we can filter out roots that aren't
730 # Also, keep the original roots so we can filter out roots that aren't
731 # 'real' roots (i.e. are descended from other roots).
731 # 'real' roots (i.e. are descended from other roots).
732 roots = descendents.copy()
732 roots = descendents.copy()
733 # Our topologically sorted list of output nodes.
733 # Our topologically sorted list of output nodes.
734 orderedout = []
734 orderedout = []
735 # Don't start at nullid since we don't want nullid in our output list,
735 # Don't start at nullid since we don't want nullid in our output list,
736 # and if nullid shows up in descedents, empty parents will look like
736 # and if nullid shows up in descedents, empty parents will look like
737 # they're descendents.
737 # they're descendents.
738 for r in xrange(max(lowestrev, 0), highestrev + 1):
738 for r in xrange(max(lowestrev, 0), highestrev + 1):
739 n = self.node(r)
739 n = self.node(r)
740 isdescendent = False
740 isdescendent = False
741 if lowestrev == nullrev: # Everybody is a descendent of nullid
741 if lowestrev == nullrev: # Everybody is a descendent of nullid
742 isdescendent = True
742 isdescendent = True
743 elif n in descendents:
743 elif n in descendents:
744 # n is already a descendent
744 # n is already a descendent
745 isdescendent = True
745 isdescendent = True
746 # This check only needs to be done here because all the roots
746 # This check only needs to be done here because all the roots
747 # will start being marked is descendents before the loop.
747 # will start being marked is descendents before the loop.
748 if n in roots:
748 if n in roots:
749 # If n was a root, check if it's a 'real' root.
749 # If n was a root, check if it's a 'real' root.
750 p = tuple(self.parents(n))
750 p = tuple(self.parents(n))
751 # If any of its parents are descendents, it's not a root.
751 # If any of its parents are descendents, it's not a root.
752 if (p[0] in descendents) or (p[1] in descendents):
752 if (p[0] in descendents) or (p[1] in descendents):
753 roots.remove(n)
753 roots.remove(n)
754 else:
754 else:
755 p = tuple(self.parents(n))
755 p = tuple(self.parents(n))
756 # A node is a descendent if either of its parents are
756 # A node is a descendent if either of its parents are
757 # descendents. (We seeded the dependents list with the roots
757 # descendents. (We seeded the dependents list with the roots
758 # up there, remember?)
758 # up there, remember?)
759 if (p[0] in descendents) or (p[1] in descendents):
759 if (p[0] in descendents) or (p[1] in descendents):
760 descendents.add(n)
760 descendents.add(n)
761 isdescendent = True
761 isdescendent = True
762 if isdescendent and ((ancestors is None) or (n in ancestors)):
762 if isdescendent and ((ancestors is None) or (n in ancestors)):
763 # Only include nodes that are both descendents and ancestors.
763 # Only include nodes that are both descendents and ancestors.
764 orderedout.append(n)
764 orderedout.append(n)
765 if (ancestors is not None) and (n in heads):
765 if (ancestors is not None) and (n in heads):
766 # We're trying to figure out which heads are reachable
766 # We're trying to figure out which heads are reachable
767 # from roots.
767 # from roots.
768 # Mark this head as having been reached
768 # Mark this head as having been reached
769 heads[n] = 1
769 heads[n] = 1
770 elif ancestors is None:
770 elif ancestors is None:
771 # Otherwise, we're trying to discover the heads.
771 # Otherwise, we're trying to discover the heads.
772 # Assume this is a head because if it isn't, the next step
772 # Assume this is a head because if it isn't, the next step
773 # will eventually remove it.
773 # will eventually remove it.
774 heads[n] = 1
774 heads[n] = 1
775 # But, obviously its parents aren't.
775 # But, obviously its parents aren't.
776 for p in self.parents(n):
776 for p in self.parents(n):
777 heads.pop(p, None)
777 heads.pop(p, None)
778 heads = [n for n in heads.iterkeys() if heads[n] != 0]
778 heads = [n for n in heads.iterkeys() if heads[n] != 0]
779 roots = list(roots)
779 roots = list(roots)
780 assert orderedout
780 assert orderedout
781 assert roots
781 assert roots
782 assert heads
782 assert heads
783 return (orderedout, roots, heads)
783 return (orderedout, roots, heads)
784
784
785 def heads(self, start=None, stop=None):
785 def heads(self, start=None, stop=None):
786 """return the list of all nodes that have no children
786 """return the list of all nodes that have no children
787
787
788 if start is specified, only heads that are descendants of
788 if start is specified, only heads that are descendants of
789 start will be returned
789 start will be returned
790 if stop is specified, it will consider all the revs from stop
790 if stop is specified, it will consider all the revs from stop
791 as if they had no children
791 as if they had no children
792 """
792 """
793 if start is None and stop is None:
793 if start is None and stop is None:
794 count = len(self)
794 count = len(self)
795 if not count:
795 if not count:
796 return [nullid]
796 return [nullid]
797 ishead = [1] * (count + 1)
797 ishead = [1] * (count + 1)
798 index = self.index
798 index = self.index
799 for r in xrange(count):
799 for r in xrange(count):
800 e = index[r]
800 e = index[r]
801 ishead[e[5]] = ishead[e[6]] = 0
801 ishead[e[5]] = ishead[e[6]] = 0
802 return [self.node(r) for r in xrange(count) if ishead[r]]
802 return [self.node(r) for r in xrange(count) if ishead[r]]
803
803
804 if start is None:
804 if start is None:
805 start = nullid
805 start = nullid
806 if stop is None:
806 if stop is None:
807 stop = []
807 stop = []
808 stoprevs = set([self.rev(n) for n in stop])
808 stoprevs = set([self.rev(n) for n in stop])
809 startrev = self.rev(start)
809 startrev = self.rev(start)
810 reachable = {startrev: 1}
810 reachable = {startrev: 1}
811 heads = {startrev: 1}
811 heads = {startrev: 1}
812
812
813 parentrevs = self.parentrevs
813 parentrevs = self.parentrevs
814 for r in xrange(startrev + 1, len(self)):
814 for r in xrange(startrev + 1, len(self)):
815 for p in parentrevs(r):
815 for p in parentrevs(r):
816 if p in reachable:
816 if p in reachable:
817 if r not in stoprevs:
817 if r not in stoprevs:
818 reachable[r] = 1
818 reachable[r] = 1
819 heads[r] = 1
819 heads[r] = 1
820 if p in heads and p not in stoprevs:
820 if p in heads and p not in stoprevs:
821 del heads[p]
821 del heads[p]
822
822
823 return [self.node(r) for r in heads]
823 return [self.node(r) for r in heads]
824
824
825 def children(self, node):
825 def children(self, node):
826 """find the children of a given node"""
826 """find the children of a given node"""
827 c = []
827 c = []
828 p = self.rev(node)
828 p = self.rev(node)
829 for r in range(p + 1, len(self)):
829 for r in range(p + 1, len(self)):
830 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
830 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
831 if prevs:
831 if prevs:
832 for pr in prevs:
832 for pr in prevs:
833 if pr == p:
833 if pr == p:
834 c.append(self.node(r))
834 c.append(self.node(r))
835 elif p == nullrev:
835 elif p == nullrev:
836 c.append(self.node(r))
836 c.append(self.node(r))
837 return c
837 return c
838
838
839 def _match(self, id):
839 def _match(self, id):
840 if isinstance(id, (long, int)):
840 if isinstance(id, (long, int)):
841 # rev
841 # rev
842 return self.node(id)
842 return self.node(id)
843 if len(id) == 20:
843 if len(id) == 20:
844 # possibly a binary node
844 # possibly a binary node
845 # odds of a binary node being all hex in ASCII are 1 in 10**25
845 # odds of a binary node being all hex in ASCII are 1 in 10**25
846 try:
846 try:
847 node = id
847 node = id
848 self.rev(node) # quick search the index
848 self.rev(node) # quick search the index
849 return node
849 return node
850 except LookupError:
850 except LookupError:
851 pass # may be partial hex id
851 pass # may be partial hex id
852 try:
852 try:
853 # str(rev)
853 # str(rev)
854 rev = int(id)
854 rev = int(id)
855 if str(rev) != id:
855 if str(rev) != id:
856 raise ValueError
856 raise ValueError
857 if rev < 0:
857 if rev < 0:
858 rev = len(self) + rev
858 rev = len(self) + rev
859 if rev < 0 or rev >= len(self):
859 if rev < 0 or rev >= len(self):
860 raise ValueError
860 raise ValueError
861 return self.node(rev)
861 return self.node(rev)
862 except (ValueError, OverflowError):
862 except (ValueError, OverflowError):
863 pass
863 pass
864 if len(id) == 40:
864 if len(id) == 40:
865 try:
865 try:
866 # a full hex nodeid?
866 # a full hex nodeid?
867 node = bin(id)
867 node = bin(id)
868 self.rev(node)
868 self.rev(node)
869 return node
869 return node
870 except (TypeError, LookupError):
870 except (TypeError, LookupError):
871 pass
871 pass
872
872
873 def _partialmatch(self, id):
873 def _partialmatch(self, id):
874 if len(id) < 40:
874 if len(id) < 40:
875 try:
875 try:
876 # hex(node)[:...]
876 # hex(node)[:...]
877 l = len(id) / 2 # grab an even number of digits
877 l = len(id) / 2 # grab an even number of digits
878 bin_id = bin(id[:l*2])
878 bin_id = bin(id[:l*2])
879 nl = [n for n in self.nodemap if n[:l] == bin_id]
879 nl = [n for n in self.nodemap if n[:l] == bin_id]
880 nl = [n for n in nl if hex(n).startswith(id)]
880 nl = [n for n in nl if hex(n).startswith(id)]
881 if len(nl) > 0:
881 if len(nl) > 0:
882 if len(nl) == 1:
882 if len(nl) == 1:
883 return nl[0]
883 return nl[0]
884 raise LookupError(id, self.indexfile,
884 raise LookupError(id, self.indexfile,
885 _('ambiguous identifier'))
885 _('ambiguous identifier'))
886 return None
886 return None
887 except TypeError:
887 except TypeError:
888 pass
888 pass
889
889
890 def lookup(self, id):
890 def lookup(self, id):
891 """locate a node based on:
891 """locate a node based on:
892 - revision number or str(revision number)
892 - revision number or str(revision number)
893 - nodeid or subset of hex nodeid
893 - nodeid or subset of hex nodeid
894 """
894 """
895 n = self._match(id)
895 n = self._match(id)
896 if n is not None:
896 if n is not None:
897 return n
897 return n
898 n = self._partialmatch(id)
898 n = self._partialmatch(id)
899 if n:
899 if n:
900 return n
900 return n
901
901
902 raise LookupError(id, self.indexfile, _('no match found'))
902 raise LookupError(id, self.indexfile, _('no match found'))
903
903
904 def cmp(self, node, text):
904 def cmp(self, node, text):
905 """compare text with a given file revision"""
905 """compare text with a given file revision"""
906 p1, p2 = self.parents(node)
906 p1, p2 = self.parents(node)
907 return hash(text, p1, p2) != node
907 return hash(text, p1, p2) != node
908
908
909 def chunk(self, rev, df=None):
909 def chunk(self, rev, df=None):
910 def loadcache(df):
910 def loadcache(df):
911 if not df:
911 if not df:
912 if self._inline:
912 if self._inline:
913 df = self.opener(self.indexfile)
913 df = self.opener(self.indexfile)
914 else:
914 else:
915 df = self.opener(self.datafile)
915 df = self.opener(self.datafile)
916 df.seek(start)
916 df.seek(start)
917 self._chunkcache = (start, df.read(cache_length))
917 self._chunkcache = (start, df.read(cache_length))
918
918
919 start, length = self.start(rev), self.length(rev)
919 start, length = self.start(rev), self.length(rev)
920 if self._inline:
920 if self._inline:
921 start += (rev + 1) * self._io.size
921 start += (rev + 1) * self._io.size
922 end = start + length
922 end = start + length
923
923
924 offset = 0
924 offset = 0
925 if not self._chunkcache:
925 if not self._chunkcache:
926 cache_length = max(65536, length)
926 cache_length = max(65536, length)
927 loadcache(df)
927 loadcache(df)
928 else:
928 else:
929 cache_start = self._chunkcache[0]
929 cache_start = self._chunkcache[0]
930 cache_length = len(self._chunkcache[1])
930 cache_length = len(self._chunkcache[1])
931 cache_end = cache_start + cache_length
931 cache_end = cache_start + cache_length
932 if start >= cache_start and end <= cache_end:
932 if start >= cache_start and end <= cache_end:
933 # it is cached
933 # it is cached
934 offset = start - cache_start
934 offset = start - cache_start
935 else:
935 else:
936 cache_length = max(65536, length)
936 cache_length = max(65536, length)
937 loadcache(df)
937 loadcache(df)
938
938
939 # avoid copying large chunks
939 # avoid copying large chunks
940 c = self._chunkcache[1]
940 c = self._chunkcache[1]
941 if cache_length != length:
941 if cache_length != length:
942 c = c[offset:offset + length]
942 c = c[offset:offset + length]
943
943
944 return decompress(c)
944 return decompress(c)
945
945
946 def revdiff(self, rev1, rev2):
946 def revdiff(self, rev1, rev2):
947 """return or calculate a delta between two revisions"""
947 """return or calculate a delta between two revisions"""
948 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
948 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
949 return self.chunk(rev2)
949 return self.chunk(rev2)
950
950
951 return mdiff.textdiff(self.revision(self.node(rev1)),
951 return mdiff.textdiff(self.revision(self.node(rev1)),
952 self.revision(self.node(rev2)))
952 self.revision(self.node(rev2)))
953
953
954 def revision(self, node):
954 def revision(self, node):
955 """return an uncompressed revision of a given node"""
955 """return an uncompressed revision of a given node"""
956 if node == nullid:
956 if node == nullid:
957 return ""
957 return ""
958 if self._cache and self._cache[0] == node:
958 if self._cache and self._cache[0] == node:
959 return str(self._cache[2])
959 return str(self._cache[2])
960
960
961 # look up what we need to read
961 # look up what we need to read
962 text = None
962 text = None
963 rev = self.rev(node)
963 rev = self.rev(node)
964 base = self.base(rev)
964 base = self.base(rev)
965
965
966 # check rev flags
966 # check rev flags
967 if self.index[rev][0] & 0xFFFF:
967 if self.index[rev][0] & 0xFFFF:
968 raise RevlogError(_('incompatible revision flag %x') %
968 raise RevlogError(_('incompatible revision flag %x') %
969 (self.index[rev][0] & 0xFFFF))
969 (self.index[rev][0] & 0xFFFF))
970
970
971 df = None
971 df = None
972
972
973 # do we have useful data cached?
973 # do we have useful data cached?
974 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
974 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
975 base = self._cache[1]
975 base = self._cache[1]
976 text = str(self._cache[2])
976 text = str(self._cache[2])
977 self._loadindex(base, rev + 1)
977 self._loadindex(base, rev + 1)
978 if not self._inline and rev > base + 1:
978 if not self._inline and rev > base + 1:
979 df = self.opener(self.datafile)
979 df = self.opener(self.datafile)
980 else:
980 else:
981 self._loadindex(base, rev + 1)
981 self._loadindex(base, rev + 1)
982 if not self._inline and rev > base:
982 if not self._inline and rev > base:
983 df = self.opener(self.datafile)
983 df = self.opener(self.datafile)
984 text = self.chunk(base, df=df)
984 text = self.chunk(base, df=df)
985
985
986 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
986 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
987 text = mdiff.patches(text, bins)
987 text = mdiff.patches(text, bins)
988 p1, p2 = self.parents(node)
988 p1, p2 = self.parents(node)
989 if node != hash(text, p1, p2):
989 if node != hash(text, p1, p2):
990 raise RevlogError(_("integrity check failed on %s:%d")
990 raise RevlogError(_("integrity check failed on %s:%d")
991 % (self.datafile, rev))
991 % (self.datafile, rev))
992
992
993 self._cache = (node, rev, text)
993 self._cache = (node, rev, text)
994 return text
994 return text
995
995
996 def checkinlinesize(self, tr, fp=None):
996 def checkinlinesize(self, tr, fp=None):
997 if not self._inline:
997 if not self._inline:
998 return
998 return
999 if not fp:
999 if not fp:
1000 fp = self.opener(self.indexfile, 'r')
1000 fp = self.opener(self.indexfile, 'r')
1001 fp.seek(0, 2)
1001 fp.seek(0, 2)
1002 size = fp.tell()
1002 size = fp.tell()
1003 if size < 131072:
1003 if size < 131072:
1004 return
1004 return
1005 trinfo = tr.find(self.indexfile)
1005 trinfo = tr.find(self.indexfile)
1006 if trinfo == None:
1006 if trinfo == None:
1007 raise RevlogError(_("%s not found in the transaction")
1007 raise RevlogError(_("%s not found in the transaction")
1008 % self.indexfile)
1008 % self.indexfile)
1009
1009
1010 trindex = trinfo[2]
1010 trindex = trinfo[2]
1011 dataoff = self.start(trindex)
1011 dataoff = self.start(trindex)
1012
1012
1013 tr.add(self.datafile, dataoff)
1013 tr.add(self.datafile, dataoff)
1014 df = self.opener(self.datafile, 'w')
1014 df = self.opener(self.datafile, 'w')
1015 try:
1015 try:
1016 calc = self._io.size
1016 calc = self._io.size
1017 for r in self:
1017 for r in self:
1018 start = self.start(r) + (r + 1) * calc
1018 start = self.start(r) + (r + 1) * calc
1019 length = self.length(r)
1019 length = self.length(r)
1020 fp.seek(start)
1020 fp.seek(start)
1021 d = fp.read(length)
1021 d = fp.read(length)
1022 df.write(d)
1022 df.write(d)
1023 finally:
1023 finally:
1024 df.close()
1024 df.close()
1025
1025
1026 fp.close()
1026 fp.close()
1027 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1027 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1028 self.version &= ~(REVLOGNGINLINEDATA)
1028 self.version &= ~(REVLOGNGINLINEDATA)
1029 self._inline = False
1029 self._inline = False
1030 for i in self:
1030 for i in self:
1031 e = self._io.packentry(self.index[i], self.node, self.version, i)
1031 e = self._io.packentry(self.index[i], self.node, self.version, i)
1032 fp.write(e)
1032 fp.write(e)
1033
1033
1034 # if we don't call rename, the temp file will never replace the
1034 # if we don't call rename, the temp file will never replace the
1035 # real index
1035 # real index
1036 fp.rename()
1036 fp.rename()
1037
1037
1038 tr.replace(self.indexfile, trindex * calc)
1038 tr.replace(self.indexfile, trindex * calc)
1039 self._chunkcache = None
1039 self._chunkcache = None
1040
1040
1041 def addrevision(self, text, transaction, link, p1, p2, d=None):
1041 def addrevision(self, text, transaction, link, p1, p2, d=None):
1042 """add a revision to the log
1042 """add a revision to the log
1043
1043
1044 text - the revision data to add
1044 text - the revision data to add
1045 transaction - the transaction object used for rollback
1045 transaction - the transaction object used for rollback
1046 link - the linkrev data to add
1046 link - the linkrev data to add
1047 p1, p2 - the parent nodeids of the revision
1047 p1, p2 - the parent nodeids of the revision
1048 d - an optional precomputed delta
1048 d - an optional precomputed delta
1049 """
1049 """
1050 dfh = None
1050 dfh = None
1051 if not self._inline:
1051 if not self._inline:
1052 dfh = self.opener(self.datafile, "a")
1052 dfh = self.opener(self.datafile, "a")
1053 ifh = self.opener(self.indexfile, "a+")
1053 ifh = self.opener(self.indexfile, "a+")
1054 try:
1054 try:
1055 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1055 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1056 finally:
1056 finally:
1057 if dfh:
1057 if dfh:
1058 dfh.close()
1058 dfh.close()
1059 ifh.close()
1059 ifh.close()
1060
1060
1061 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1061 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1062 node = hash(text, p1, p2)
1062 node = hash(text, p1, p2)
1063 if node in self.nodemap:
1063 if node in self.nodemap:
1064 return node
1064 return node
1065
1065
1066 curr = len(self)
1066 curr = len(self)
1067 prev = curr - 1
1067 prev = curr - 1
1068 base = self.base(prev)
1068 base = self.base(prev)
1069 offset = self.end(prev)
1069 offset = self.end(prev)
1070
1070
1071 if curr:
1071 if curr:
1072 if not d:
1072 if not d:
1073 ptext = self.revision(self.node(prev))
1073 ptext = self.revision(self.node(prev))
1074 d = mdiff.textdiff(ptext, text)
1074 d = mdiff.textdiff(ptext, text)
1075 data = compress(d)
1075 data = compress(d)
1076 l = len(data[1]) + len(data[0])
1076 l = len(data[1]) + len(data[0])
1077 dist = l + offset - self.start(base)
1077 dist = l + offset - self.start(base)
1078
1078
1079 # full versions are inserted when the needed deltas
1079 # full versions are inserted when the needed deltas
1080 # become comparable to the uncompressed text
1080 # become comparable to the uncompressed text
1081 if not curr or dist > len(text) * 2:
1081 if not curr or dist > len(text) * 2:
1082 data = compress(text)
1082 data = compress(text)
1083 l = len(data[1]) + len(data[0])
1083 l = len(data[1]) + len(data[0])
1084 base = curr
1084 base = curr
1085
1085
1086 e = (offset_type(offset, 0), l, len(text),
1086 e = (offset_type(offset, 0), l, len(text),
1087 base, link, self.rev(p1), self.rev(p2), node)
1087 base, link, self.rev(p1), self.rev(p2), node)
1088 self.index.insert(-1, e)
1088 self.index.insert(-1, e)
1089 self.nodemap[node] = curr
1089 self.nodemap[node] = curr
1090
1090
1091 entry = self._io.packentry(e, self.node, self.version, curr)
1091 entry = self._io.packentry(e, self.node, self.version, curr)
1092 if not self._inline:
1092 if not self._inline:
1093 transaction.add(self.datafile, offset)
1093 transaction.add(self.datafile, offset)
1094 transaction.add(self.indexfile, curr * len(entry))
1094 transaction.add(self.indexfile, curr * len(entry))
1095 if data[0]:
1095 if data[0]:
1096 dfh.write(data[0])
1096 dfh.write(data[0])
1097 dfh.write(data[1])
1097 dfh.write(data[1])
1098 dfh.flush()
1098 dfh.flush()
1099 ifh.write(entry)
1099 ifh.write(entry)
1100 else:
1100 else:
1101 offset += curr * self._io.size
1101 offset += curr * self._io.size
1102 transaction.add(self.indexfile, offset, curr)
1102 transaction.add(self.indexfile, offset, curr)
1103 ifh.write(entry)
1103 ifh.write(entry)
1104 ifh.write(data[0])
1104 ifh.write(data[0])
1105 ifh.write(data[1])
1105 ifh.write(data[1])
1106 self.checkinlinesize(transaction, ifh)
1106 self.checkinlinesize(transaction, ifh)
1107
1107
1108 self._cache = (node, curr, text)
1108 self._cache = (node, curr, text)
1109 return node
1109 return node
1110
1110
1111 def ancestor(self, a, b):
1111 def ancestor(self, a, b):
1112 """calculate the least common ancestor of nodes a and b"""
1112 """calculate the least common ancestor of nodes a and b"""
1113
1113
1114 def parents(rev):
1114 def parents(rev):
1115 return [p for p in self.parentrevs(rev) if p != nullrev]
1115 return [p for p in self.parentrevs(rev) if p != nullrev]
1116
1116
1117 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1117 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1118 if c is None:
1118 if c is None:
1119 return nullid
1119 return nullid
1120
1120
1121 return self.node(c)
1121 return self.node(c)
1122
1122
1123 def group(self, nodelist, lookup, infocollect=None):
1123 def group(self, nodelist, lookup, infocollect=None):
1124 """calculate a delta group
1124 """calculate a delta group
1125
1125
1126 Given a list of changeset revs, return a set of deltas and
1126 Given a list of changeset revs, return a set of deltas and
1127 metadata corresponding to nodes. the first delta is
1127 metadata corresponding to nodes. the first delta is
1128 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1128 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1129 have this parent as it has all history before these
1129 have this parent as it has all history before these
1130 changesets. parent is parent[0]
1130 changesets. parent is parent[0]
1131 """
1131 """
1132 revs = [self.rev(n) for n in nodelist]
1132 revs = [self.rev(n) for n in nodelist]
1133
1133
1134 # if we don't have any revisions touched by these changesets, bail
1134 # if we don't have any revisions touched by these changesets, bail
1135 if not revs:
1135 if not revs:
1136 yield changegroup.closechunk()
1136 yield changegroup.closechunk()
1137 return
1137 return
1138
1138
1139 # add the parent of the first rev
1139 # add the parent of the first rev
1140 p = self.parents(self.node(revs[0]))[0]
1140 p = self.parents(self.node(revs[0]))[0]
1141 revs.insert(0, self.rev(p))
1141 revs.insert(0, self.rev(p))
1142
1142
1143 # build deltas
1143 # build deltas
1144 for d in xrange(0, len(revs) - 1):
1144 for d in xrange(0, len(revs) - 1):
1145 a, b = revs[d], revs[d + 1]
1145 a, b = revs[d], revs[d + 1]
1146 nb = self.node(b)
1146 nb = self.node(b)
1147
1147
1148 if infocollect is not None:
1148 if infocollect is not None:
1149 infocollect(nb)
1149 infocollect(nb)
1150
1150
1151 p = self.parents(nb)
1151 p = self.parents(nb)
1152 meta = nb + p[0] + p[1] + lookup(nb)
1152 meta = nb + p[0] + p[1] + lookup(nb)
1153 if a == -1:
1153 if a == -1:
1154 d = self.revision(nb)
1154 d = self.revision(nb)
1155 meta += mdiff.trivialdiffheader(len(d))
1155 meta += mdiff.trivialdiffheader(len(d))
1156 else:
1156 else:
1157 d = self.revdiff(a, b)
1157 d = self.revdiff(a, b)
1158 yield changegroup.chunkheader(len(meta) + len(d))
1158 yield changegroup.chunkheader(len(meta) + len(d))
1159 yield meta
1159 yield meta
1160 if len(d) > 2**20:
1160 if len(d) > 2**20:
1161 pos = 0
1161 pos = 0
1162 while pos < len(d):
1162 while pos < len(d):
1163 pos2 = pos + 2 ** 18
1163 pos2 = pos + 2 ** 18
1164 yield d[pos:pos2]
1164 yield d[pos:pos2]
1165 pos = pos2
1165 pos = pos2
1166 else:
1166 else:
1167 yield d
1167 yield d
1168
1168
1169 yield changegroup.closechunk()
1169 yield changegroup.closechunk()
1170
1170
1171 def addgroup(self, revs, linkmapper, transaction):
1171 def addgroup(self, revs, linkmapper, transaction):
1172 """
1172 """
1173 add a delta group
1173 add a delta group
1174
1174
1175 given a set of deltas, add them to the revision log. the
1175 given a set of deltas, add them to the revision log. the
1176 first delta is against its parent, which should be in our
1176 first delta is against its parent, which should be in our
1177 log, the rest are against the previous delta.
1177 log, the rest are against the previous delta.
1178 """
1178 """
1179
1179
1180 #track the base of the current delta log
1180 #track the base of the current delta log
1181 r = len(self)
1181 r = len(self)
1182 t = r - 1
1182 t = r - 1
1183 node = None
1183 node = None
1184
1184
1185 base = prev = nullrev
1185 base = prev = nullrev
1186 start = end = textlen = 0
1186 start = end = textlen = 0
1187 if r:
1187 if r:
1188 end = self.end(t)
1188 end = self.end(t)
1189
1189
1190 ifh = self.opener(self.indexfile, "a+")
1190 ifh = self.opener(self.indexfile, "a+")
1191 isize = r * self._io.size
1191 isize = r * self._io.size
1192 if self._inline:
1192 if self._inline:
1193 transaction.add(self.indexfile, end + isize, r)
1193 transaction.add(self.indexfile, end + isize, r)
1194 dfh = None
1194 dfh = None
1195 else:
1195 else:
1196 transaction.add(self.indexfile, isize, r)
1196 transaction.add(self.indexfile, isize, r)
1197 transaction.add(self.datafile, end)
1197 transaction.add(self.datafile, end)
1198 dfh = self.opener(self.datafile, "a")
1198 dfh = self.opener(self.datafile, "a")
1199
1199
1200 try:
1200 try:
1201 # loop through our set of deltas
1201 # loop through our set of deltas
1202 chain = None
1202 chain = None
1203 for chunk in revs:
1203 for chunk in revs:
1204 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1204 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1205 link = linkmapper(cs)
1205 link = linkmapper(cs)
1206 if node in self.nodemap:
1206 if node in self.nodemap:
1207 # this can happen if two branches make the same change
1207 # this can happen if two branches make the same change
1208 chain = node
1208 chain = node
1209 continue
1209 continue
1210 delta = buffer(chunk, 80)
1210 delta = buffer(chunk, 80)
1211 del chunk
1211 del chunk
1212
1212
1213 for p in (p1, p2):
1213 for p in (p1, p2):
1214 if not p in self.nodemap:
1214 if not p in self.nodemap:
1215 raise LookupError(p, self.indexfile, _('unknown parent'))
1215 raise LookupError(p, self.indexfile, _('unknown parent'))
1216
1216
1217 if not chain:
1217 if not chain:
1218 # retrieve the parent revision of the delta chain
1218 # retrieve the parent revision of the delta chain
1219 chain = p1
1219 chain = p1
1220 if not chain in self.nodemap:
1220 if not chain in self.nodemap:
1221 raise LookupError(chain, self.indexfile, _('unknown base'))
1221 raise LookupError(chain, self.indexfile, _('unknown base'))
1222
1222
1223 # full versions are inserted when the needed deltas become
1223 # full versions are inserted when the needed deltas become
1224 # comparable to the uncompressed text or when the previous
1224 # comparable to the uncompressed text or when the previous
1225 # version is not the one we have a delta against. We use
1225 # version is not the one we have a delta against. We use
1226 # the size of the previous full rev as a proxy for the
1226 # the size of the previous full rev as a proxy for the
1227 # current size.
1227 # current size.
1228
1228
1229 if chain == prev:
1229 if chain == prev:
1230 cdelta = compress(delta)
1230 cdelta = compress(delta)
1231 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1231 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1232 textlen = mdiff.patchedsize(textlen, delta)
1232 textlen = mdiff.patchedsize(textlen, delta)
1233
1233
1234 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1234 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1235 # flush our writes here so we can read it in revision
1235 # flush our writes here so we can read it in revision
1236 if dfh:
1236 if dfh:
1237 dfh.flush()
1237 dfh.flush()
1238 ifh.flush()
1238 ifh.flush()
1239 text = self.revision(chain)
1239 text = self.revision(chain)
1240 if len(text) == 0:
1240 if len(text) == 0:
1241 # skip over trivial delta header
1241 # skip over trivial delta header
1242 text = buffer(delta, 12)
1242 text = buffer(delta, 12)
1243 else:
1243 else:
1244 text = mdiff.patches(text, [delta])
1244 text = mdiff.patches(text, [delta])
1245 del delta
1245 del delta
1246 chk = self._addrevision(text, transaction, link, p1, p2, None,
1246 chk = self._addrevision(text, transaction, link, p1, p2, None,
1247 ifh, dfh)
1247 ifh, dfh)
1248 if not dfh and not self._inline:
1248 if not dfh and not self._inline:
1249 # addrevision switched from inline to conventional
1249 # addrevision switched from inline to conventional
1250 # reopen the index
1250 # reopen the index
1251 dfh = self.opener(self.datafile, "a")
1251 dfh = self.opener(self.datafile, "a")
1252 ifh = self.opener(self.indexfile, "a")
1252 ifh = self.opener(self.indexfile, "a")
1253 if chk != node:
1253 if chk != node:
1254 raise RevlogError(_("consistency error adding group"))
1254 raise RevlogError(_("consistency error adding group"))
1255 textlen = len(text)
1255 textlen = len(text)
1256 else:
1256 else:
1257 e = (offset_type(end, 0), cdeltalen, textlen, base,
1257 e = (offset_type(end, 0), cdeltalen, textlen, base,
1258 link, self.rev(p1), self.rev(p2), node)
1258 link, self.rev(p1), self.rev(p2), node)
1259 self.index.insert(-1, e)
1259 self.index.insert(-1, e)
1260 self.nodemap[node] = r
1260 self.nodemap[node] = r
1261 entry = self._io.packentry(e, self.node, self.version, r)
1261 entry = self._io.packentry(e, self.node, self.version, r)
1262 if self._inline:
1262 if self._inline:
1263 ifh.write(entry)
1263 ifh.write(entry)
1264 ifh.write(cdelta[0])
1264 ifh.write(cdelta[0])
1265 ifh.write(cdelta[1])
1265 ifh.write(cdelta[1])
1266 self.checkinlinesize(transaction, ifh)
1266 self.checkinlinesize(transaction, ifh)
1267 if not self._inline:
1267 if not self._inline:
1268 dfh = self.opener(self.datafile, "a")
1268 dfh = self.opener(self.datafile, "a")
1269 ifh = self.opener(self.indexfile, "a")
1269 ifh = self.opener(self.indexfile, "a")
1270 else:
1270 else:
1271 dfh.write(cdelta[0])
1271 dfh.write(cdelta[0])
1272 dfh.write(cdelta[1])
1272 dfh.write(cdelta[1])
1273 ifh.write(entry)
1273 ifh.write(entry)
1274
1274
1275 t, r, chain, prev = r, r + 1, node, node
1275 t, r, chain, prev = r, r + 1, node, node
1276 base = self.base(t)
1276 base = self.base(t)
1277 start = self.start(base)
1277 start = self.start(base)
1278 end = self.end(t)
1278 end = self.end(t)
1279 finally:
1279 finally:
1280 if dfh:
1280 if dfh:
1281 dfh.close()
1281 dfh.close()
1282 ifh.close()
1282 ifh.close()
1283
1283
1284 return node
1284 return node
1285
1285
1286 def strip(self, minlink, transaction):
1286 def strip(self, minlink, transaction):
1287 """truncate the revlog on the first revision with a linkrev >= minlink
1287 """truncate the revlog on the first revision with a linkrev >= minlink
1288
1288
1289 This function is called when we're stripping revision minlink and
1289 This function is called when we're stripping revision minlink and
1290 its descendants from the repository.
1290 its descendants from the repository.
1291
1291
1292 We have to remove all revisions with linkrev >= minlink, because
1292 We have to remove all revisions with linkrev >= minlink, because
1293 the equivalent changelog revisions will be renumbered after the
1293 the equivalent changelog revisions will be renumbered after the
1294 strip.
1294 strip.
1295
1295
1296 So we truncate the revlog on the first of these revisions, and
1296 So we truncate the revlog on the first of these revisions, and
1297 trust that the caller has saved the revisions that shouldn't be
1297 trust that the caller has saved the revisions that shouldn't be
1298 removed and that it'll readd them after this truncation.
1298 removed and that it'll readd them after this truncation.
1299 """
1299 """
1300 if len(self) == 0:
1300 if len(self) == 0:
1301 return
1301 return
1302
1302
1303 if isinstance(self.index, lazyindex):
1303 if isinstance(self.index, lazyindex):
1304 self._loadindexmap()
1304 self._loadindexmap()
1305
1305
1306 for rev in self:
1306 for rev in self:
1307 if self.index[rev][4] >= minlink:
1307 if self.index[rev][4] >= minlink:
1308 break
1308 break
1309 else:
1309 else:
1310 return
1310 return
1311
1311
1312 # first truncate the files on disk
1312 # first truncate the files on disk
1313 end = self.start(rev)
1313 end = self.start(rev)
1314 if not self._inline:
1314 if not self._inline:
1315 transaction.add(self.datafile, end)
1315 transaction.add(self.datafile, end)
1316 end = rev * self._io.size
1316 end = rev * self._io.size
1317 else:
1317 else:
1318 end += rev * self._io.size
1318 end += rev * self._io.size
1319
1319
1320 transaction.add(self.indexfile, end)
1320 transaction.add(self.indexfile, end)
1321
1321
1322 # then reset internal state in memory to forget those revisions
1322 # then reset internal state in memory to forget those revisions
1323 self._cache = None
1323 self._cache = None
1324 self._chunkcache = None
1324 self._chunkcache = None
1325 for x in xrange(rev, len(self)):
1325 for x in xrange(rev, len(self)):
1326 del self.nodemap[self.node(x)]
1326 del self.nodemap[self.node(x)]
1327
1327
1328 del self.index[rev:-1]
1328 del self.index[rev:-1]
1329
1329
1330 def checksize(self):
1330 def checksize(self):
1331 expected = 0
1331 expected = 0
1332 if len(self):
1332 if len(self):
1333 expected = max(0, self.end(len(self) - 1))
1333 expected = max(0, self.end(len(self) - 1))
1334
1334
1335 try:
1335 try:
1336 f = self.opener(self.datafile)
1336 f = self.opener(self.datafile)
1337 f.seek(0, 2)
1337 f.seek(0, 2)
1338 actual = f.tell()
1338 actual = f.tell()
1339 dd = actual - expected
1339 dd = actual - expected
1340 except IOError, inst:
1340 except IOError, inst:
1341 if inst.errno != errno.ENOENT:
1341 if inst.errno != errno.ENOENT:
1342 raise
1342 raise
1343 dd = 0
1343 dd = 0
1344
1344
1345 try:
1345 try:
1346 f = self.opener(self.indexfile)
1346 f = self.opener(self.indexfile)
1347 f.seek(0, 2)
1347 f.seek(0, 2)
1348 actual = f.tell()
1348 actual = f.tell()
1349 s = self._io.size
1349 s = self._io.size
1350 i = max(0, actual / s)
1350 i = max(0, actual / s)
1351 di = actual - (i * s)
1351 di = actual - (i * s)
1352 if self._inline:
1352 if self._inline:
1353 databytes = 0
1353 databytes = 0
1354 for r in self:
1354 for r in self:
1355 databytes += max(0, self.length(r))
1355 databytes += max(0, self.length(r))
1356 dd = 0
1356 dd = 0
1357 di = actual - len(self) * s - databytes
1357 di = actual - len(self) * s - databytes
1358 except IOError, inst:
1358 except IOError, inst:
1359 if inst.errno != errno.ENOENT:
1359 if inst.errno != errno.ENOENT:
1360 raise
1360 raise
1361 di = 0
1361 di = 0
1362
1362
1363 return (dd, di)
1363 return (dd, di)
1364
1364
1365 def files(self):
1365 def files(self):
1366 res = [ self.indexfile ]
1366 res = [ self.indexfile ]
1367 if not self._inline:
1367 if not self._inline:
1368 res.append(self.datafile)
1368 res.append(self.datafile)
1369 return res
1369 return res
General Comments 0
You need to be logged in to leave comments. Login now