##// END OF EJS Templates
revlog: optimize deltachain
Matt Mackall -
r11935:927ada25 default
parent child Browse files
Show More
@@ -1,1450 +1,1464 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 _cached, _uncached = 0, 0
27
26 # revlog header flags
28 # revlog header flags
27 REVLOGV0 = 0
29 REVLOGV0 = 0
28 REVLOGNG = 1
30 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGSHALLOW = (1 << 17)
32 REVLOGSHALLOW = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
36 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35
37
36 # revlog index flags
38 # revlog index flags
37 REVIDX_PARENTDELTA = 1
39 REVIDX_PARENTDELTA = 1
38 REVIDX_PUNCHED_FLAG = 2
40 REVIDX_PUNCHED_FLAG = 2
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
41 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40
42
41 # amount of data read unconditionally, should be >= 4
43 # amount of data read unconditionally, should be >= 4
42 # when not inline: threshold for using lazy index
44 # when not inline: threshold for using lazy index
43 _prereadsize = 1048576
45 _prereadsize = 1048576
44 # max size of revlog with inline data
46 # max size of revlog with inline data
45 _maxinline = 131072
47 _maxinline = 131072
46
48
47 RevlogError = error.RevlogError
49 RevlogError = error.RevlogError
48 LookupError = error.LookupError
50 LookupError = error.LookupError
49
51
50 def getoffset(q):
52 def getoffset(q):
51 return int(q >> 16)
53 return int(q >> 16)
52
54
53 def gettype(q):
55 def gettype(q):
54 return int(q & 0xFFFF)
56 return int(q & 0xFFFF)
55
57
56 def offset_type(offset, type):
58 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
59 return long(long(offset) << 16 | type)
58
60
59 nullhash = _sha(nullid)
61 nullhash = _sha(nullid)
60
62
61 def hash(text, p1, p2):
63 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
64 """generate a hash from the given text and its parent hashes
63
65
64 This hash combines both the current file contents and its history
66 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
67 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
68 content in the revision graph.
67 """
69 """
68 # As of now, if one of the parent node is null, p2 is null
70 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
71 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
72 # deep copy of a hash is faster than creating one
71 s = nullhash.copy()
73 s = nullhash.copy()
72 s.update(p1)
74 s.update(p1)
73 else:
75 else:
74 # none of the parent nodes are nullid
76 # none of the parent nodes are nullid
75 l = [p1, p2]
77 l = [p1, p2]
76 l.sort()
78 l.sort()
77 s = _sha(l[0])
79 s = _sha(l[0])
78 s.update(l[1])
80 s.update(l[1])
79 s.update(text)
81 s.update(text)
80 return s.digest()
82 return s.digest()
81
83
82 def compress(text):
84 def compress(text):
83 """ generate a possibly-compressed representation of text """
85 """ generate a possibly-compressed representation of text """
84 if not text:
86 if not text:
85 return ("", text)
87 return ("", text)
86 l = len(text)
88 l = len(text)
87 bin = None
89 bin = None
88 if l < 44:
90 if l < 44:
89 pass
91 pass
90 elif l > 1000000:
92 elif l > 1000000:
91 # zlib makes an internal copy, thus doubling memory usage for
93 # zlib makes an internal copy, thus doubling memory usage for
92 # large files, so lets do this in pieces
94 # large files, so lets do this in pieces
93 z = zlib.compressobj()
95 z = zlib.compressobj()
94 p = []
96 p = []
95 pos = 0
97 pos = 0
96 while pos < l:
98 while pos < l:
97 pos2 = pos + 2**20
99 pos2 = pos + 2**20
98 p.append(z.compress(text[pos:pos2]))
100 p.append(z.compress(text[pos:pos2]))
99 pos = pos2
101 pos = pos2
100 p.append(z.flush())
102 p.append(z.flush())
101 if sum(map(len, p)) < l:
103 if sum(map(len, p)) < l:
102 bin = "".join(p)
104 bin = "".join(p)
103 else:
105 else:
104 bin = _compress(text)
106 bin = _compress(text)
105 if bin is None or len(bin) > l:
107 if bin is None or len(bin) > l:
106 if text[0] == '\0':
108 if text[0] == '\0':
107 return ("", text)
109 return ("", text)
108 return ('u', text)
110 return ('u', text)
109 return ("", bin)
111 return ("", bin)
110
112
111 def decompress(bin):
113 def decompress(bin):
112 """ decompress the given input """
114 """ decompress the given input """
113 if not bin:
115 if not bin:
114 return bin
116 return bin
115 t = bin[0]
117 t = bin[0]
116 if t == '\0':
118 if t == '\0':
117 return bin
119 return bin
118 if t == 'x':
120 if t == 'x':
119 return _decompress(bin)
121 return _decompress(bin)
120 if t == 'u':
122 if t == 'u':
121 return bin[1:]
123 return bin[1:]
122 raise RevlogError(_("unknown compression type %r") % t)
124 raise RevlogError(_("unknown compression type %r") % t)
123
125
124 class lazyparser(object):
126 class lazyparser(object):
125 """
127 """
126 this class avoids the need to parse the entirety of large indices
128 this class avoids the need to parse the entirety of large indices
127 """
129 """
128
130
129 # lazyparser is not safe to use on windows if win32 extensions not
131 # lazyparser is not safe to use on windows if win32 extensions not
130 # available. it keeps file handle open, which make it not possible
132 # available. it keeps file handle open, which make it not possible
131 # to break hardlinks on local cloned repos.
133 # to break hardlinks on local cloned repos.
132
134
133 def __init__(self, dataf):
135 def __init__(self, dataf):
134 try:
136 try:
135 size = util.fstat(dataf).st_size
137 size = util.fstat(dataf).st_size
136 except AttributeError:
138 except AttributeError:
137 size = 0
139 size = 0
138 self.dataf = dataf
140 self.dataf = dataf
139 self.s = struct.calcsize(indexformatng)
141 self.s = struct.calcsize(indexformatng)
140 self.datasize = size
142 self.datasize = size
141 self.l = size // self.s
143 self.l = size // self.s
142 self.index = [None] * self.l
144 self.index = [None] * self.l
143 self.map = {nullid: nullrev}
145 self.map = {nullid: nullrev}
144 self.allmap = 0
146 self.allmap = 0
145 self.all = 0
147 self.all = 0
146 self.mapfind_count = 0
148 self.mapfind_count = 0
147
149
148 def loadmap(self):
150 def loadmap(self):
149 """
151 """
150 during a commit, we need to make sure the rev being added is
152 during a commit, we need to make sure the rev being added is
151 not a duplicate. This requires loading the entire index,
153 not a duplicate. This requires loading the entire index,
152 which is fairly slow. loadmap can load up just the node map,
154 which is fairly slow. loadmap can load up just the node map,
153 which takes much less time.
155 which takes much less time.
154 """
156 """
155 if self.allmap:
157 if self.allmap:
156 return
158 return
157 end = self.datasize
159 end = self.datasize
158 self.allmap = 1
160 self.allmap = 1
159 cur = 0
161 cur = 0
160 count = 0
162 count = 0
161 blocksize = self.s * 256
163 blocksize = self.s * 256
162 self.dataf.seek(0)
164 self.dataf.seek(0)
163 while cur < end:
165 while cur < end:
164 data = self.dataf.read(blocksize)
166 data = self.dataf.read(blocksize)
165 off = 0
167 off = 0
166 for x in xrange(256):
168 for x in xrange(256):
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
169 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 self.map[n] = count
170 self.map[n] = count
169 count += 1
171 count += 1
170 if count >= self.l:
172 if count >= self.l:
171 break
173 break
172 off += self.s
174 off += self.s
173 cur += blocksize
175 cur += blocksize
174
176
175 def loadblock(self, blockstart, blocksize, data=None):
177 def loadblock(self, blockstart, blocksize, data=None):
176 if self.all:
178 if self.all:
177 return
179 return
178 if data is None:
180 if data is None:
179 self.dataf.seek(blockstart)
181 self.dataf.seek(blockstart)
180 if blockstart + blocksize > self.datasize:
182 if blockstart + blocksize > self.datasize:
181 # the revlog may have grown since we've started running,
183 # the revlog may have grown since we've started running,
182 # but we don't have space in self.index for more entries.
184 # but we don't have space in self.index for more entries.
183 # limit blocksize so that we don't get too much data.
185 # limit blocksize so that we don't get too much data.
184 blocksize = max(self.datasize - blockstart, 0)
186 blocksize = max(self.datasize - blockstart, 0)
185 data = self.dataf.read(blocksize)
187 data = self.dataf.read(blocksize)
186 lend = len(data) // self.s
188 lend = len(data) // self.s
187 i = blockstart // self.s
189 i = blockstart // self.s
188 off = 0
190 off = 0
189 # lazyindex supports __delitem__
191 # lazyindex supports __delitem__
190 if lend > len(self.index) - i:
192 if lend > len(self.index) - i:
191 lend = len(self.index) - i
193 lend = len(self.index) - i
192 for x in xrange(lend):
194 for x in xrange(lend):
193 if self.index[i + x] is None:
195 if self.index[i + x] is None:
194 b = data[off : off + self.s]
196 b = data[off : off + self.s]
195 self.index[i + x] = b
197 self.index[i + x] = b
196 n = b[ngshaoffset:ngshaoffset + 20]
198 n = b[ngshaoffset:ngshaoffset + 20]
197 self.map[n] = i + x
199 self.map[n] = i + x
198 off += self.s
200 off += self.s
199
201
200 def findnode(self, node):
202 def findnode(self, node):
201 """search backwards through the index file for a specific node"""
203 """search backwards through the index file for a specific node"""
202 if self.allmap:
204 if self.allmap:
203 return None
205 return None
204
206
205 # hg log will cause many many searches for the manifest
207 # hg log will cause many many searches for the manifest
206 # nodes. After we get called a few times, just load the whole
208 # nodes. After we get called a few times, just load the whole
207 # thing.
209 # thing.
208 if self.mapfind_count > 8:
210 if self.mapfind_count > 8:
209 self.loadmap()
211 self.loadmap()
210 if node in self.map:
212 if node in self.map:
211 return node
213 return node
212 return None
214 return None
213 self.mapfind_count += 1
215 self.mapfind_count += 1
214 last = self.l - 1
216 last = self.l - 1
215 while self.index[last] != None:
217 while self.index[last] != None:
216 if last == 0:
218 if last == 0:
217 self.all = 1
219 self.all = 1
218 self.allmap = 1
220 self.allmap = 1
219 return None
221 return None
220 last -= 1
222 last -= 1
221 end = (last + 1) * self.s
223 end = (last + 1) * self.s
222 blocksize = self.s * 256
224 blocksize = self.s * 256
223 while end >= 0:
225 while end >= 0:
224 start = max(end - blocksize, 0)
226 start = max(end - blocksize, 0)
225 self.dataf.seek(start)
227 self.dataf.seek(start)
226 data = self.dataf.read(end - start)
228 data = self.dataf.read(end - start)
227 findend = end - start
229 findend = end - start
228 while True:
230 while True:
229 # we're searching backwards, so we have to make sure
231 # we're searching backwards, so we have to make sure
230 # we don't find a changeset where this node is a parent
232 # we don't find a changeset where this node is a parent
231 off = data.find(node, 0, findend)
233 off = data.find(node, 0, findend)
232 findend = off
234 findend = off
233 if off >= 0:
235 if off >= 0:
234 i = off / self.s
236 i = off / self.s
235 off = i * self.s
237 off = i * self.s
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
238 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 if n == node:
239 if n == node:
238 self.map[n] = i + start / self.s
240 self.map[n] = i + start / self.s
239 return node
241 return node
240 else:
242 else:
241 break
243 break
242 end -= blocksize
244 end -= blocksize
243 return None
245 return None
244
246
245 def loadindex(self, i=None, end=None):
247 def loadindex(self, i=None, end=None):
246 if self.all:
248 if self.all:
247 return
249 return
248 all = False
250 all = False
249 if i is None:
251 if i is None:
250 blockstart = 0
252 blockstart = 0
251 blocksize = (65536 / self.s) * self.s
253 blocksize = (65536 / self.s) * self.s
252 end = self.datasize
254 end = self.datasize
253 all = True
255 all = True
254 else:
256 else:
255 if end:
257 if end:
256 blockstart = i * self.s
258 blockstart = i * self.s
257 end = end * self.s
259 end = end * self.s
258 blocksize = end - blockstart
260 blocksize = end - blockstart
259 else:
261 else:
260 blockstart = (i & ~1023) * self.s
262 blockstart = (i & ~1023) * self.s
261 blocksize = self.s * 1024
263 blocksize = self.s * 1024
262 end = blockstart + blocksize
264 end = blockstart + blocksize
263 while blockstart < end:
265 while blockstart < end:
264 self.loadblock(blockstart, blocksize)
266 self.loadblock(blockstart, blocksize)
265 blockstart += blocksize
267 blockstart += blocksize
266 if all:
268 if all:
267 self.all = True
269 self.all = True
268
270
269 class lazyindex(object):
271 class lazyindex(object):
270 """a lazy version of the index array"""
272 """a lazy version of the index array"""
271 def __init__(self, parser):
273 def __init__(self, parser):
272 self.p = parser
274 self.p = parser
273 def __len__(self):
275 def __len__(self):
274 return len(self.p.index)
276 return len(self.p.index)
275 def load(self, pos):
277 def load(self, pos):
276 if pos < 0:
278 if pos < 0:
277 pos += len(self.p.index)
279 pos += len(self.p.index)
278 self.p.loadindex(pos)
280 self.p.loadindex(pos)
279 return self.p.index[pos]
281 return self.p.index[pos]
280 def __getitem__(self, pos):
282 def __getitem__(self, pos):
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
283 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 def __setitem__(self, pos, item):
284 def __setitem__(self, pos, item):
283 self.p.index[pos] = _pack(indexformatng, *item)
285 self.p.index[pos] = _pack(indexformatng, *item)
284 def __delitem__(self, pos):
286 def __delitem__(self, pos):
285 del self.p.index[pos]
287 del self.p.index[pos]
286 def insert(self, pos, e):
288 def insert(self, pos, e):
287 self.p.index.insert(pos, _pack(indexformatng, *e))
289 self.p.index.insert(pos, _pack(indexformatng, *e))
288 def append(self, e):
290 def append(self, e):
289 self.p.index.append(_pack(indexformatng, *e))
291 self.p.index.append(_pack(indexformatng, *e))
290
292
291 class lazymap(object):
293 class lazymap(object):
292 """a lazy version of the node map"""
294 """a lazy version of the node map"""
293 def __init__(self, parser):
295 def __init__(self, parser):
294 self.p = parser
296 self.p = parser
295 def load(self, key):
297 def load(self, key):
296 n = self.p.findnode(key)
298 n = self.p.findnode(key)
297 if n is None:
299 if n is None:
298 raise KeyError(key)
300 raise KeyError(key)
299 def __contains__(self, key):
301 def __contains__(self, key):
300 if key in self.p.map:
302 if key in self.p.map:
301 return True
303 return True
302 self.p.loadmap()
304 self.p.loadmap()
303 return key in self.p.map
305 return key in self.p.map
304 def __iter__(self):
306 def __iter__(self):
305 yield nullid
307 yield nullid
306 for i, ret in enumerate(self.p.index):
308 for i, ret in enumerate(self.p.index):
307 if not ret:
309 if not ret:
308 self.p.loadindex(i)
310 self.p.loadindex(i)
309 ret = self.p.index[i]
311 ret = self.p.index[i]
310 if isinstance(ret, str):
312 if isinstance(ret, str):
311 ret = _unpack(indexformatng, ret)
313 ret = _unpack(indexformatng, ret)
312 yield ret[7]
314 yield ret[7]
313 def __getitem__(self, key):
315 def __getitem__(self, key):
314 try:
316 try:
315 return self.p.map[key]
317 return self.p.map[key]
316 except KeyError:
318 except KeyError:
317 try:
319 try:
318 self.load(key)
320 self.load(key)
319 return self.p.map[key]
321 return self.p.map[key]
320 except KeyError:
322 except KeyError:
321 raise KeyError("node " + hex(key))
323 raise KeyError("node " + hex(key))
322 def __setitem__(self, key, val):
324 def __setitem__(self, key, val):
323 self.p.map[key] = val
325 self.p.map[key] = val
324 def __delitem__(self, key):
326 def __delitem__(self, key):
325 del self.p.map[key]
327 del self.p.map[key]
326
328
327 indexformatv0 = ">4l20s20s20s"
329 indexformatv0 = ">4l20s20s20s"
328 v0shaoffset = 56
330 v0shaoffset = 56
329
331
330 class revlogoldio(object):
332 class revlogoldio(object):
331 def __init__(self):
333 def __init__(self):
332 self.size = struct.calcsize(indexformatv0)
334 self.size = struct.calcsize(indexformatv0)
333
335
334 def parseindex(self, fp, data, inline):
336 def parseindex(self, fp, data, inline):
335 s = self.size
337 s = self.size
336 index = []
338 index = []
337 nodemap = {nullid: nullrev}
339 nodemap = {nullid: nullrev}
338 n = off = 0
340 n = off = 0
339 if len(data) == _prereadsize:
341 if len(data) == _prereadsize:
340 data += fp.read() # read the rest
342 data += fp.read() # read the rest
341 l = len(data)
343 l = len(data)
342 while off + s <= l:
344 while off + s <= l:
343 cur = data[off:off + s]
345 cur = data[off:off + s]
344 off += s
346 off += s
345 e = _unpack(indexformatv0, cur)
347 e = _unpack(indexformatv0, cur)
346 # transform to revlogv1 format
348 # transform to revlogv1 format
347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
349 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
350 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 index.append(e2)
351 index.append(e2)
350 nodemap[e[6]] = n
352 nodemap[e[6]] = n
351 n += 1
353 n += 1
352
354
353 return index, nodemap, None
355 return index, nodemap, None
354
356
355 def packentry(self, entry, node, version, rev):
357 def packentry(self, entry, node, version, rev):
356 if gettype(entry[0]):
358 if gettype(entry[0]):
357 raise RevlogError(_("index entry flags need RevlogNG"))
359 raise RevlogError(_("index entry flags need RevlogNG"))
358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
360 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 node(entry[5]), node(entry[6]), entry[7])
361 node(entry[5]), node(entry[6]), entry[7])
360 return _pack(indexformatv0, *e2)
362 return _pack(indexformatv0, *e2)
361
363
362 # index ng:
364 # index ng:
363 # 6 bytes: offset
365 # 6 bytes: offset
364 # 2 bytes: flags
366 # 2 bytes: flags
365 # 4 bytes: compressed length
367 # 4 bytes: compressed length
366 # 4 bytes: uncompressed length
368 # 4 bytes: uncompressed length
367 # 4 bytes: base rev
369 # 4 bytes: base rev
368 # 4 bytes: link rev
370 # 4 bytes: link rev
369 # 4 bytes: parent 1 rev
371 # 4 bytes: parent 1 rev
370 # 4 bytes: parent 2 rev
372 # 4 bytes: parent 2 rev
371 # 32 bytes: nodeid
373 # 32 bytes: nodeid
372 indexformatng = ">Qiiiiii20s12x"
374 indexformatng = ">Qiiiiii20s12x"
373 ngshaoffset = 32
375 ngshaoffset = 32
374 versionformat = ">I"
376 versionformat = ">I"
375
377
376 class revlogio(object):
378 class revlogio(object):
377 def __init__(self):
379 def __init__(self):
378 self.size = struct.calcsize(indexformatng)
380 self.size = struct.calcsize(indexformatng)
379
381
380 def parseindex(self, fp, data, inline):
382 def parseindex(self, fp, data, inline):
381 if len(data) == _prereadsize:
383 if len(data) == _prereadsize:
382 if util.openhardlinks() and not inline:
384 if util.openhardlinks() and not inline:
383 # big index, let's parse it on demand
385 # big index, let's parse it on demand
384 parser = lazyparser(fp)
386 parser = lazyparser(fp)
385 index = lazyindex(parser)
387 index = lazyindex(parser)
386 nodemap = lazymap(parser)
388 nodemap = lazymap(parser)
387 e = list(index[0])
389 e = list(index[0])
388 type = gettype(e[0])
390 type = gettype(e[0])
389 e[0] = offset_type(0, type)
391 e[0] = offset_type(0, type)
390 index[0] = e
392 index[0] = e
391 return index, nodemap, None
393 return index, nodemap, None
392 else:
394 else:
393 data += fp.read()
395 data += fp.read()
394
396
395 # call the C implementation to parse the index data
397 # call the C implementation to parse the index data
396 index, nodemap, cache = parsers.parse_index(data, inline)
398 index, nodemap, cache = parsers.parse_index(data, inline)
397 return index, nodemap, cache
399 return index, nodemap, cache
398
400
399 def packentry(self, entry, node, version, rev):
401 def packentry(self, entry, node, version, rev):
400 p = _pack(indexformatng, *entry)
402 p = _pack(indexformatng, *entry)
401 if rev == 0:
403 if rev == 0:
402 p = _pack(versionformat, version) + p[4:]
404 p = _pack(versionformat, version) + p[4:]
403 return p
405 return p
404
406
405 class revlog(object):
407 class revlog(object):
406 """
408 """
407 the underlying revision storage object
409 the underlying revision storage object
408
410
409 A revlog consists of two parts, an index and the revision data.
411 A revlog consists of two parts, an index and the revision data.
410
412
411 The index is a file with a fixed record size containing
413 The index is a file with a fixed record size containing
412 information on each revision, including its nodeid (hash), the
414 information on each revision, including its nodeid (hash), the
413 nodeids of its parents, the position and offset of its data within
415 nodeids of its parents, the position and offset of its data within
414 the data file, and the revision it's based on. Finally, each entry
416 the data file, and the revision it's based on. Finally, each entry
415 contains a linkrev entry that can serve as a pointer to external
417 contains a linkrev entry that can serve as a pointer to external
416 data.
418 data.
417
419
418 The revision data itself is a linear collection of data chunks.
420 The revision data itself is a linear collection of data chunks.
419 Each chunk represents a revision and is usually represented as a
421 Each chunk represents a revision and is usually represented as a
420 delta against the previous chunk. To bound lookup time, runs of
422 delta against the previous chunk. To bound lookup time, runs of
421 deltas are limited to about 2 times the length of the original
423 deltas are limited to about 2 times the length of the original
422 version data. This makes retrieval of a version proportional to
424 version data. This makes retrieval of a version proportional to
423 its size, or O(1) relative to the number of revisions.
425 its size, or O(1) relative to the number of revisions.
424
426
425 Both pieces of the revlog are written to in an append-only
427 Both pieces of the revlog are written to in an append-only
426 fashion, which means we never need to rewrite a file to insert or
428 fashion, which means we never need to rewrite a file to insert or
427 remove data, and can use some simple techniques to avoid the need
429 remove data, and can use some simple techniques to avoid the need
428 for locking while reading.
430 for locking while reading.
429 """
431 """
430 def __init__(self, opener, indexfile, shallowroot=None):
432 def __init__(self, opener, indexfile, shallowroot=None):
431 """
433 """
432 create a revlog object
434 create a revlog object
433
435
434 opener is a function that abstracts the file opening operation
436 opener is a function that abstracts the file opening operation
435 and can be used to implement COW semantics or the like.
437 and can be used to implement COW semantics or the like.
436 """
438 """
437 self.indexfile = indexfile
439 self.indexfile = indexfile
438 self.datafile = indexfile[:-2] + ".d"
440 self.datafile = indexfile[:-2] + ".d"
439 self.opener = opener
441 self.opener = opener
440 self._cache = None
442 self._cache = None
441 self._chunkcache = (0, '')
443 self._chunkcache = (0, '')
442 self.nodemap = {nullid: nullrev}
444 self.nodemap = {nullid: nullrev}
443 self.index = []
445 self.index = []
444 self._shallowroot = shallowroot
446 self._shallowroot = shallowroot
445 self._parentdelta = 0
447 self._parentdelta = 0
446
448
447 v = REVLOG_DEFAULT_VERSION
449 v = REVLOG_DEFAULT_VERSION
448 if hasattr(opener, 'options') and 'defversion' in opener.options:
450 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 v = opener.options['defversion']
451 v = opener.options['defversion']
450 if v & REVLOGNG:
452 if v & REVLOGNG:
451 v |= REVLOGNGINLINEDATA
453 v |= REVLOGNGINLINEDATA
452 if v & REVLOGNG and 'parentdelta' in opener.options:
454 if v & REVLOGNG and 'parentdelta' in opener.options:
453 self._parentdelta = 1
455 self._parentdelta = 1
454
456
455 if shallowroot:
457 if shallowroot:
456 v |= REVLOGSHALLOW
458 v |= REVLOGSHALLOW
457
459
458 i = ''
460 i = ''
459 try:
461 try:
460 f = self.opener(self.indexfile)
462 f = self.opener(self.indexfile)
461 if "nonlazy" in getattr(self.opener, 'options', {}):
463 if "nonlazy" in getattr(self.opener, 'options', {}):
462 i = f.read()
464 i = f.read()
463 else:
465 else:
464 i = f.read(_prereadsize)
466 i = f.read(_prereadsize)
465 if len(i) > 0:
467 if len(i) > 0:
466 v = struct.unpack(versionformat, i[:4])[0]
468 v = struct.unpack(versionformat, i[:4])[0]
467 except IOError, inst:
469 except IOError, inst:
468 if inst.errno != errno.ENOENT:
470 if inst.errno != errno.ENOENT:
469 raise
471 raise
470
472
471 self.version = v
473 self.version = v
472 self._inline = v & REVLOGNGINLINEDATA
474 self._inline = v & REVLOGNGINLINEDATA
473 self._shallow = v & REVLOGSHALLOW
475 self._shallow = v & REVLOGSHALLOW
474 flags = v & ~0xFFFF
476 flags = v & ~0xFFFF
475 fmt = v & 0xFFFF
477 fmt = v & 0xFFFF
476 if fmt == REVLOGV0 and flags:
478 if fmt == REVLOGV0 and flags:
477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 % (self.indexfile, flags >> 16))
480 % (self.indexfile, flags >> 16))
479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
481 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 % (self.indexfile, flags >> 16))
483 % (self.indexfile, flags >> 16))
482 elif fmt > REVLOGNG:
484 elif fmt > REVLOGNG:
483 raise RevlogError(_("index %s unknown format %d")
485 raise RevlogError(_("index %s unknown format %d")
484 % (self.indexfile, fmt))
486 % (self.indexfile, fmt))
485
487
486 self._io = revlogio()
488 self._io = revlogio()
487 if self.version == REVLOGV0:
489 if self.version == REVLOGV0:
488 self._io = revlogoldio()
490 self._io = revlogoldio()
489 if i:
491 if i:
490 try:
492 try:
491 d = self._io.parseindex(f, i, self._inline)
493 d = self._io.parseindex(f, i, self._inline)
492 except (ValueError, IndexError):
494 except (ValueError, IndexError):
493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
495 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 self.index, self.nodemap, self._chunkcache = d
496 self.index, self.nodemap, self._chunkcache = d
495 if not self._chunkcache:
497 if not self._chunkcache:
496 self._chunkclear()
498 self._chunkclear()
497
499
498 # add the magic null revision at -1 (if it hasn't been done already)
500 # add the magic null revision at -1 (if it hasn't been done already)
499 if (self.index == [] or isinstance(self.index, lazyindex) or
501 if (self.index == [] or isinstance(self.index, lazyindex) or
500 self.index[-1][7] != nullid) :
502 self.index[-1][7] != nullid) :
501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
503 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502
504
503 def _loadindex(self, start, end):
505 def _loadindex(self, start, end):
504 """load a block of indexes all at once from the lazy parser"""
506 """load a block of indexes all at once from the lazy parser"""
505 if isinstance(self.index, lazyindex):
507 if isinstance(self.index, lazyindex):
506 self.index.p.loadindex(start, end)
508 self.index.p.loadindex(start, end)
507
509
508 def _loadindexmap(self):
510 def _loadindexmap(self):
509 """loads both the map and the index from the lazy parser"""
511 """loads both the map and the index from the lazy parser"""
510 if isinstance(self.index, lazyindex):
512 if isinstance(self.index, lazyindex):
511 p = self.index.p
513 p = self.index.p
512 p.loadindex()
514 p.loadindex()
513 self.nodemap = p.map
515 self.nodemap = p.map
514
516
515 def _loadmap(self):
517 def _loadmap(self):
516 """loads the map from the lazy parser"""
518 """loads the map from the lazy parser"""
517 if isinstance(self.nodemap, lazymap):
519 if isinstance(self.nodemap, lazymap):
518 self.nodemap.p.loadmap()
520 self.nodemap.p.loadmap()
519 self.nodemap = self.nodemap.p.map
521 self.nodemap = self.nodemap.p.map
520
522
521 def tip(self):
523 def tip(self):
522 return self.node(len(self.index) - 2)
524 return self.node(len(self.index) - 2)
523 def __len__(self):
525 def __len__(self):
524 return len(self.index) - 1
526 return len(self.index) - 1
525 def __iter__(self):
527 def __iter__(self):
526 for i in xrange(len(self)):
528 for i in xrange(len(self)):
527 yield i
529 yield i
528 def rev(self, node):
530 def rev(self, node):
529 try:
531 try:
530 return self.nodemap[node]
532 return self.nodemap[node]
531 except KeyError:
533 except KeyError:
532 raise LookupError(node, self.indexfile, _('no node'))
534 raise LookupError(node, self.indexfile, _('no node'))
533 def node(self, rev):
535 def node(self, rev):
534 return self.index[rev][7]
536 return self.index[rev][7]
535 def linkrev(self, rev):
537 def linkrev(self, rev):
536 return self.index[rev][4]
538 return self.index[rev][4]
537 def parents(self, node):
539 def parents(self, node):
538 i = self.index
540 i = self.index
539 d = i[self.rev(node)]
541 d = i[self.rev(node)]
540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
542 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 def parentrevs(self, rev):
543 def parentrevs(self, rev):
542 return self.index[rev][5:7]
544 return self.index[rev][5:7]
543 def start(self, rev):
545 def start(self, rev):
544 return int(self.index[rev][0] >> 16)
546 return int(self.index[rev][0] >> 16)
545 def end(self, rev):
547 def end(self, rev):
546 return self.start(rev) + self.length(rev)
548 return self.start(rev) + self.length(rev)
547 def length(self, rev):
549 def length(self, rev):
548 return self.index[rev][1]
550 return self.index[rev][1]
549 def base(self, rev):
551 def base(self, rev):
550 return self.index[rev][3]
552 return self.index[rev][3]
551 def flags(self, rev):
553 def flags(self, rev):
552 return self.index[rev][0] & 0xFFFF
554 return self.index[rev][0] & 0xFFFF
553
555
554 def size(self, rev):
556 def size(self, rev):
555 """return the length of the uncompressed text for a given revision"""
557 """return the length of the uncompressed text for a given revision"""
556 l = self.index[rev][2]
558 l = self.index[rev][2]
557 if l >= 0:
559 if l >= 0:
558 return l
560 return l
559
561
560 t = self.revision(self.node(rev))
562 t = self.revision(self.node(rev))
561 return len(t)
563 return len(t)
562
564
563 def reachable(self, node, stop=None):
565 def reachable(self, node, stop=None):
564 """return the set of all nodes ancestral to a given node, including
566 """return the set of all nodes ancestral to a given node, including
565 the node itself, stopping when stop is matched"""
567 the node itself, stopping when stop is matched"""
566 reachable = set((node,))
568 reachable = set((node,))
567 visit = [node]
569 visit = [node]
568 if stop:
570 if stop:
569 stopn = self.rev(stop)
571 stopn = self.rev(stop)
570 else:
572 else:
571 stopn = 0
573 stopn = 0
572 while visit:
574 while visit:
573 n = visit.pop(0)
575 n = visit.pop(0)
574 if n == stop:
576 if n == stop:
575 continue
577 continue
576 if n == nullid:
578 if n == nullid:
577 continue
579 continue
578 for p in self.parents(n):
580 for p in self.parents(n):
579 if self.rev(p) < stopn:
581 if self.rev(p) < stopn:
580 continue
582 continue
581 if p not in reachable:
583 if p not in reachable:
582 reachable.add(p)
584 reachable.add(p)
583 visit.append(p)
585 visit.append(p)
584 return reachable
586 return reachable
585
587
586 def ancestors(self, *revs):
588 def ancestors(self, *revs):
587 """Generate the ancestors of 'revs' in reverse topological order.
589 """Generate the ancestors of 'revs' in reverse topological order.
588
590
589 Yield a sequence of revision numbers starting with the parents
591 Yield a sequence of revision numbers starting with the parents
590 of each revision in revs, i.e., each revision is *not* considered
592 of each revision in revs, i.e., each revision is *not* considered
591 an ancestor of itself. Results are in breadth-first order:
593 an ancestor of itself. Results are in breadth-first order:
592 parents of each rev in revs, then parents of those, etc. Result
594 parents of each rev in revs, then parents of those, etc. Result
593 does not include the null revision."""
595 does not include the null revision."""
594 visit = list(revs)
596 visit = list(revs)
595 seen = set([nullrev])
597 seen = set([nullrev])
596 while visit:
598 while visit:
597 for parent in self.parentrevs(visit.pop(0)):
599 for parent in self.parentrevs(visit.pop(0)):
598 if parent not in seen:
600 if parent not in seen:
599 visit.append(parent)
601 visit.append(parent)
600 seen.add(parent)
602 seen.add(parent)
601 yield parent
603 yield parent
602
604
603 def descendants(self, *revs):
605 def descendants(self, *revs):
604 """Generate the descendants of 'revs' in revision order.
606 """Generate the descendants of 'revs' in revision order.
605
607
606 Yield a sequence of revision numbers starting with a child of
608 Yield a sequence of revision numbers starting with a child of
607 some rev in revs, i.e., each revision is *not* considered a
609 some rev in revs, i.e., each revision is *not* considered a
608 descendant of itself. Results are ordered by revision number (a
610 descendant of itself. Results are ordered by revision number (a
609 topological sort)."""
611 topological sort)."""
610 seen = set(revs)
612 seen = set(revs)
611 for i in xrange(min(revs) + 1, len(self)):
613 for i in xrange(min(revs) + 1, len(self)):
612 for x in self.parentrevs(i):
614 for x in self.parentrevs(i):
613 if x != nullrev and x in seen:
615 if x != nullrev and x in seen:
614 seen.add(i)
616 seen.add(i)
615 yield i
617 yield i
616 break
618 break
617
619
618 def findmissing(self, common=None, heads=None):
620 def findmissing(self, common=None, heads=None):
619 """Return the ancestors of heads that are not ancestors of common.
621 """Return the ancestors of heads that are not ancestors of common.
620
622
621 More specifically, return a list of nodes N such that every N
623 More specifically, return a list of nodes N such that every N
622 satisfies the following constraints:
624 satisfies the following constraints:
623
625
624 1. N is an ancestor of some node in 'heads'
626 1. N is an ancestor of some node in 'heads'
625 2. N is not an ancestor of any node in 'common'
627 2. N is not an ancestor of any node in 'common'
626
628
627 The list is sorted by revision number, meaning it is
629 The list is sorted by revision number, meaning it is
628 topologically sorted.
630 topologically sorted.
629
631
630 'heads' and 'common' are both lists of node IDs. If heads is
632 'heads' and 'common' are both lists of node IDs. If heads is
631 not supplied, uses all of the revlog's heads. If common is not
633 not supplied, uses all of the revlog's heads. If common is not
632 supplied, uses nullid."""
634 supplied, uses nullid."""
633 if common is None:
635 if common is None:
634 common = [nullid]
636 common = [nullid]
635 if heads is None:
637 if heads is None:
636 heads = self.heads()
638 heads = self.heads()
637
639
638 common = [self.rev(n) for n in common]
640 common = [self.rev(n) for n in common]
639 heads = [self.rev(n) for n in heads]
641 heads = [self.rev(n) for n in heads]
640
642
641 # we want the ancestors, but inclusive
643 # we want the ancestors, but inclusive
642 has = set(self.ancestors(*common))
644 has = set(self.ancestors(*common))
643 has.add(nullrev)
645 has.add(nullrev)
644 has.update(common)
646 has.update(common)
645
647
646 # take all ancestors from heads that aren't in has
648 # take all ancestors from heads that aren't in has
647 missing = set()
649 missing = set()
648 visit = [r for r in heads if r not in has]
650 visit = [r for r in heads if r not in has]
649 while visit:
651 while visit:
650 r = visit.pop(0)
652 r = visit.pop(0)
651 if r in missing:
653 if r in missing:
652 continue
654 continue
653 else:
655 else:
654 missing.add(r)
656 missing.add(r)
655 for p in self.parentrevs(r):
657 for p in self.parentrevs(r):
656 if p not in has:
658 if p not in has:
657 visit.append(p)
659 visit.append(p)
658 missing = list(missing)
660 missing = list(missing)
659 missing.sort()
661 missing.sort()
660 return [self.node(r) for r in missing]
662 return [self.node(r) for r in missing]
661
663
662 def nodesbetween(self, roots=None, heads=None):
664 def nodesbetween(self, roots=None, heads=None):
663 """Return a topological path from 'roots' to 'heads'.
665 """Return a topological path from 'roots' to 'heads'.
664
666
665 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
667 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
666 topologically sorted list of all nodes N that satisfy both of
668 topologically sorted list of all nodes N that satisfy both of
667 these constraints:
669 these constraints:
668
670
669 1. N is a descendant of some node in 'roots'
671 1. N is a descendant of some node in 'roots'
670 2. N is an ancestor of some node in 'heads'
672 2. N is an ancestor of some node in 'heads'
671
673
672 Every node is considered to be both a descendant and an ancestor
674 Every node is considered to be both a descendant and an ancestor
673 of itself, so every reachable node in 'roots' and 'heads' will be
675 of itself, so every reachable node in 'roots' and 'heads' will be
674 included in 'nodes'.
676 included in 'nodes'.
675
677
676 'outroots' is the list of reachable nodes in 'roots', i.e., the
678 'outroots' is the list of reachable nodes in 'roots', i.e., the
677 subset of 'roots' that is returned in 'nodes'. Likewise,
679 subset of 'roots' that is returned in 'nodes'. Likewise,
678 'outheads' is the subset of 'heads' that is also in 'nodes'.
680 'outheads' is the subset of 'heads' that is also in 'nodes'.
679
681
680 'roots' and 'heads' are both lists of node IDs. If 'roots' is
682 'roots' and 'heads' are both lists of node IDs. If 'roots' is
681 unspecified, uses nullid as the only root. If 'heads' is
683 unspecified, uses nullid as the only root. If 'heads' is
682 unspecified, uses list of all of the revlog's heads."""
684 unspecified, uses list of all of the revlog's heads."""
683 nonodes = ([], [], [])
685 nonodes = ([], [], [])
684 if roots is not None:
686 if roots is not None:
685 roots = list(roots)
687 roots = list(roots)
686 if not roots:
688 if not roots:
687 return nonodes
689 return nonodes
688 lowestrev = min([self.rev(n) for n in roots])
690 lowestrev = min([self.rev(n) for n in roots])
689 else:
691 else:
690 roots = [nullid] # Everybody's a descendent of nullid
692 roots = [nullid] # Everybody's a descendent of nullid
691 lowestrev = nullrev
693 lowestrev = nullrev
692 if (lowestrev == nullrev) and (heads is None):
694 if (lowestrev == nullrev) and (heads is None):
693 # We want _all_ the nodes!
695 # We want _all_ the nodes!
694 return ([self.node(r) for r in self], [nullid], list(self.heads()))
696 return ([self.node(r) for r in self], [nullid], list(self.heads()))
695 if heads is None:
697 if heads is None:
696 # All nodes are ancestors, so the latest ancestor is the last
698 # All nodes are ancestors, so the latest ancestor is the last
697 # node.
699 # node.
698 highestrev = len(self) - 1
700 highestrev = len(self) - 1
699 # Set ancestors to None to signal that every node is an ancestor.
701 # Set ancestors to None to signal that every node is an ancestor.
700 ancestors = None
702 ancestors = None
701 # Set heads to an empty dictionary for later discovery of heads
703 # Set heads to an empty dictionary for later discovery of heads
702 heads = {}
704 heads = {}
703 else:
705 else:
704 heads = list(heads)
706 heads = list(heads)
705 if not heads:
707 if not heads:
706 return nonodes
708 return nonodes
707 ancestors = set()
709 ancestors = set()
708 # Turn heads into a dictionary so we can remove 'fake' heads.
710 # Turn heads into a dictionary so we can remove 'fake' heads.
709 # Also, later we will be using it to filter out the heads we can't
711 # Also, later we will be using it to filter out the heads we can't
710 # find from roots.
712 # find from roots.
711 heads = dict.fromkeys(heads, 0)
713 heads = dict.fromkeys(heads, 0)
712 # Start at the top and keep marking parents until we're done.
714 # Start at the top and keep marking parents until we're done.
713 nodestotag = set(heads)
715 nodestotag = set(heads)
714 # Remember where the top was so we can use it as a limit later.
716 # Remember where the top was so we can use it as a limit later.
715 highestrev = max([self.rev(n) for n in nodestotag])
717 highestrev = max([self.rev(n) for n in nodestotag])
716 while nodestotag:
718 while nodestotag:
717 # grab a node to tag
719 # grab a node to tag
718 n = nodestotag.pop()
720 n = nodestotag.pop()
719 # Never tag nullid
721 # Never tag nullid
720 if n == nullid:
722 if n == nullid:
721 continue
723 continue
722 # A node's revision number represents its place in a
724 # A node's revision number represents its place in a
723 # topologically sorted list of nodes.
725 # topologically sorted list of nodes.
724 r = self.rev(n)
726 r = self.rev(n)
725 if r >= lowestrev:
727 if r >= lowestrev:
726 if n not in ancestors:
728 if n not in ancestors:
727 # If we are possibly a descendent of one of the roots
729 # If we are possibly a descendent of one of the roots
728 # and we haven't already been marked as an ancestor
730 # and we haven't already been marked as an ancestor
729 ancestors.add(n) # Mark as ancestor
731 ancestors.add(n) # Mark as ancestor
730 # Add non-nullid parents to list of nodes to tag.
732 # Add non-nullid parents to list of nodes to tag.
731 nodestotag.update([p for p in self.parents(n) if
733 nodestotag.update([p for p in self.parents(n) if
732 p != nullid])
734 p != nullid])
733 elif n in heads: # We've seen it before, is it a fake head?
735 elif n in heads: # We've seen it before, is it a fake head?
734 # So it is, real heads should not be the ancestors of
736 # So it is, real heads should not be the ancestors of
735 # any other heads.
737 # any other heads.
736 heads.pop(n)
738 heads.pop(n)
737 if not ancestors:
739 if not ancestors:
738 return nonodes
740 return nonodes
739 # Now that we have our set of ancestors, we want to remove any
741 # Now that we have our set of ancestors, we want to remove any
740 # roots that are not ancestors.
742 # roots that are not ancestors.
741
743
742 # If one of the roots was nullid, everything is included anyway.
744 # If one of the roots was nullid, everything is included anyway.
743 if lowestrev > nullrev:
745 if lowestrev > nullrev:
744 # But, since we weren't, let's recompute the lowest rev to not
746 # But, since we weren't, let's recompute the lowest rev to not
745 # include roots that aren't ancestors.
747 # include roots that aren't ancestors.
746
748
747 # Filter out roots that aren't ancestors of heads
749 # Filter out roots that aren't ancestors of heads
748 roots = [n for n in roots if n in ancestors]
750 roots = [n for n in roots if n in ancestors]
749 # Recompute the lowest revision
751 # Recompute the lowest revision
750 if roots:
752 if roots:
751 lowestrev = min([self.rev(n) for n in roots])
753 lowestrev = min([self.rev(n) for n in roots])
752 else:
754 else:
753 # No more roots? Return empty list
755 # No more roots? Return empty list
754 return nonodes
756 return nonodes
755 else:
757 else:
756 # We are descending from nullid, and don't need to care about
758 # We are descending from nullid, and don't need to care about
757 # any other roots.
759 # any other roots.
758 lowestrev = nullrev
760 lowestrev = nullrev
759 roots = [nullid]
761 roots = [nullid]
760 # Transform our roots list into a set.
762 # Transform our roots list into a set.
761 descendents = set(roots)
763 descendents = set(roots)
762 # Also, keep the original roots so we can filter out roots that aren't
764 # Also, keep the original roots so we can filter out roots that aren't
763 # 'real' roots (i.e. are descended from other roots).
765 # 'real' roots (i.e. are descended from other roots).
764 roots = descendents.copy()
766 roots = descendents.copy()
765 # Our topologically sorted list of output nodes.
767 # Our topologically sorted list of output nodes.
766 orderedout = []
768 orderedout = []
767 # Don't start at nullid since we don't want nullid in our output list,
769 # Don't start at nullid since we don't want nullid in our output list,
768 # and if nullid shows up in descedents, empty parents will look like
770 # and if nullid shows up in descedents, empty parents will look like
769 # they're descendents.
771 # they're descendents.
770 for r in xrange(max(lowestrev, 0), highestrev + 1):
772 for r in xrange(max(lowestrev, 0), highestrev + 1):
771 n = self.node(r)
773 n = self.node(r)
772 isdescendent = False
774 isdescendent = False
773 if lowestrev == nullrev: # Everybody is a descendent of nullid
775 if lowestrev == nullrev: # Everybody is a descendent of nullid
774 isdescendent = True
776 isdescendent = True
775 elif n in descendents:
777 elif n in descendents:
776 # n is already a descendent
778 # n is already a descendent
777 isdescendent = True
779 isdescendent = True
778 # This check only needs to be done here because all the roots
780 # This check only needs to be done here because all the roots
779 # will start being marked is descendents before the loop.
781 # will start being marked is descendents before the loop.
780 if n in roots:
782 if n in roots:
781 # If n was a root, check if it's a 'real' root.
783 # If n was a root, check if it's a 'real' root.
782 p = tuple(self.parents(n))
784 p = tuple(self.parents(n))
783 # If any of its parents are descendents, it's not a root.
785 # If any of its parents are descendents, it's not a root.
784 if (p[0] in descendents) or (p[1] in descendents):
786 if (p[0] in descendents) or (p[1] in descendents):
785 roots.remove(n)
787 roots.remove(n)
786 else:
788 else:
787 p = tuple(self.parents(n))
789 p = tuple(self.parents(n))
788 # A node is a descendent if either of its parents are
790 # A node is a descendent if either of its parents are
789 # descendents. (We seeded the dependents list with the roots
791 # descendents. (We seeded the dependents list with the roots
790 # up there, remember?)
792 # up there, remember?)
791 if (p[0] in descendents) or (p[1] in descendents):
793 if (p[0] in descendents) or (p[1] in descendents):
792 descendents.add(n)
794 descendents.add(n)
793 isdescendent = True
795 isdescendent = True
794 if isdescendent and ((ancestors is None) or (n in ancestors)):
796 if isdescendent and ((ancestors is None) or (n in ancestors)):
795 # Only include nodes that are both descendents and ancestors.
797 # Only include nodes that are both descendents and ancestors.
796 orderedout.append(n)
798 orderedout.append(n)
797 if (ancestors is not None) and (n in heads):
799 if (ancestors is not None) and (n in heads):
798 # We're trying to figure out which heads are reachable
800 # We're trying to figure out which heads are reachable
799 # from roots.
801 # from roots.
800 # Mark this head as having been reached
802 # Mark this head as having been reached
801 heads[n] = 1
803 heads[n] = 1
802 elif ancestors is None:
804 elif ancestors is None:
803 # Otherwise, we're trying to discover the heads.
805 # Otherwise, we're trying to discover the heads.
804 # Assume this is a head because if it isn't, the next step
806 # Assume this is a head because if it isn't, the next step
805 # will eventually remove it.
807 # will eventually remove it.
806 heads[n] = 1
808 heads[n] = 1
807 # But, obviously its parents aren't.
809 # But, obviously its parents aren't.
808 for p in self.parents(n):
810 for p in self.parents(n):
809 heads.pop(p, None)
811 heads.pop(p, None)
810 heads = [n for n in heads.iterkeys() if heads[n] != 0]
812 heads = [n for n in heads.iterkeys() if heads[n] != 0]
811 roots = list(roots)
813 roots = list(roots)
812 assert orderedout
814 assert orderedout
813 assert roots
815 assert roots
814 assert heads
816 assert heads
815 return (orderedout, roots, heads)
817 return (orderedout, roots, heads)
816
818
817 def heads(self, start=None, stop=None):
819 def heads(self, start=None, stop=None):
818 """return the list of all nodes that have no children
820 """return the list of all nodes that have no children
819
821
820 if start is specified, only heads that are descendants of
822 if start is specified, only heads that are descendants of
821 start will be returned
823 start will be returned
822 if stop is specified, it will consider all the revs from stop
824 if stop is specified, it will consider all the revs from stop
823 as if they had no children
825 as if they had no children
824 """
826 """
825 if start is None and stop is None:
827 if start is None and stop is None:
826 count = len(self)
828 count = len(self)
827 if not count:
829 if not count:
828 return [nullid]
830 return [nullid]
829 ishead = [1] * (count + 1)
831 ishead = [1] * (count + 1)
830 index = self.index
832 index = self.index
831 for r in xrange(count):
833 for r in xrange(count):
832 e = index[r]
834 e = index[r]
833 ishead[e[5]] = ishead[e[6]] = 0
835 ishead[e[5]] = ishead[e[6]] = 0
834 return [self.node(r) for r in xrange(count) if ishead[r]]
836 return [self.node(r) for r in xrange(count) if ishead[r]]
835
837
836 if start is None:
838 if start is None:
837 start = nullid
839 start = nullid
838 if stop is None:
840 if stop is None:
839 stop = []
841 stop = []
840 stoprevs = set([self.rev(n) for n in stop])
842 stoprevs = set([self.rev(n) for n in stop])
841 startrev = self.rev(start)
843 startrev = self.rev(start)
842 reachable = set((startrev,))
844 reachable = set((startrev,))
843 heads = set((startrev,))
845 heads = set((startrev,))
844
846
845 parentrevs = self.parentrevs
847 parentrevs = self.parentrevs
846 for r in xrange(startrev + 1, len(self)):
848 for r in xrange(startrev + 1, len(self)):
847 for p in parentrevs(r):
849 for p in parentrevs(r):
848 if p in reachable:
850 if p in reachable:
849 if r not in stoprevs:
851 if r not in stoprevs:
850 reachable.add(r)
852 reachable.add(r)
851 heads.add(r)
853 heads.add(r)
852 if p in heads and p not in stoprevs:
854 if p in heads and p not in stoprevs:
853 heads.remove(p)
855 heads.remove(p)
854
856
855 return [self.node(r) for r in heads]
857 return [self.node(r) for r in heads]
856
858
857 def children(self, node):
859 def children(self, node):
858 """find the children of a given node"""
860 """find the children of a given node"""
859 c = []
861 c = []
860 p = self.rev(node)
862 p = self.rev(node)
861 for r in range(p + 1, len(self)):
863 for r in range(p + 1, len(self)):
862 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
864 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
863 if prevs:
865 if prevs:
864 for pr in prevs:
866 for pr in prevs:
865 if pr == p:
867 if pr == p:
866 c.append(self.node(r))
868 c.append(self.node(r))
867 elif p == nullrev:
869 elif p == nullrev:
868 c.append(self.node(r))
870 c.append(self.node(r))
869 return c
871 return c
870
872
871 def descendant(self, start, end):
873 def descendant(self, start, end):
872 for i in self.descendants(start):
874 for i in self.descendants(start):
873 if i == end:
875 if i == end:
874 return True
876 return True
875 elif i > end:
877 elif i > end:
876 break
878 break
877 return False
879 return False
878
880
879 def ancestor(self, a, b):
881 def ancestor(self, a, b):
880 """calculate the least common ancestor of nodes a and b"""
882 """calculate the least common ancestor of nodes a and b"""
881
883
882 # fast path, check if it is a descendant
884 # fast path, check if it is a descendant
883 a, b = self.rev(a), self.rev(b)
885 a, b = self.rev(a), self.rev(b)
884 start, end = sorted((a, b))
886 start, end = sorted((a, b))
885 if self.descendant(start, end):
887 if self.descendant(start, end):
886 return self.node(start)
888 return self.node(start)
887
889
888 def parents(rev):
890 def parents(rev):
889 return [p for p in self.parentrevs(rev) if p != nullrev]
891 return [p for p in self.parentrevs(rev) if p != nullrev]
890
892
891 c = ancestor.ancestor(a, b, parents)
893 c = ancestor.ancestor(a, b, parents)
892 if c is None:
894 if c is None:
893 return nullid
895 return nullid
894
896
895 return self.node(c)
897 return self.node(c)
896
898
897 def _match(self, id):
899 def _match(self, id):
898 if isinstance(id, (long, int)):
900 if isinstance(id, (long, int)):
899 # rev
901 # rev
900 return self.node(id)
902 return self.node(id)
901 if len(id) == 20:
903 if len(id) == 20:
902 # possibly a binary node
904 # possibly a binary node
903 # odds of a binary node being all hex in ASCII are 1 in 10**25
905 # odds of a binary node being all hex in ASCII are 1 in 10**25
904 try:
906 try:
905 node = id
907 node = id
906 self.rev(node) # quick search the index
908 self.rev(node) # quick search the index
907 return node
909 return node
908 except LookupError:
910 except LookupError:
909 pass # may be partial hex id
911 pass # may be partial hex id
910 try:
912 try:
911 # str(rev)
913 # str(rev)
912 rev = int(id)
914 rev = int(id)
913 if str(rev) != id:
915 if str(rev) != id:
914 raise ValueError
916 raise ValueError
915 if rev < 0:
917 if rev < 0:
916 rev = len(self) + rev
918 rev = len(self) + rev
917 if rev < 0 or rev >= len(self):
919 if rev < 0 or rev >= len(self):
918 raise ValueError
920 raise ValueError
919 return self.node(rev)
921 return self.node(rev)
920 except (ValueError, OverflowError):
922 except (ValueError, OverflowError):
921 pass
923 pass
922 if len(id) == 40:
924 if len(id) == 40:
923 try:
925 try:
924 # a full hex nodeid?
926 # a full hex nodeid?
925 node = bin(id)
927 node = bin(id)
926 self.rev(node)
928 self.rev(node)
927 return node
929 return node
928 except (TypeError, LookupError):
930 except (TypeError, LookupError):
929 pass
931 pass
930
932
931 def _partialmatch(self, id):
933 def _partialmatch(self, id):
932 if len(id) < 40:
934 if len(id) < 40:
933 try:
935 try:
934 # hex(node)[:...]
936 # hex(node)[:...]
935 l = len(id) // 2 # grab an even number of digits
937 l = len(id) // 2 # grab an even number of digits
936 bin_id = bin(id[:l * 2])
938 bin_id = bin(id[:l * 2])
937 nl = [n for n in self.nodemap if n[:l] == bin_id]
939 nl = [n for n in self.nodemap if n[:l] == bin_id]
938 nl = [n for n in nl if hex(n).startswith(id)]
940 nl = [n for n in nl if hex(n).startswith(id)]
939 if len(nl) > 0:
941 if len(nl) > 0:
940 if len(nl) == 1:
942 if len(nl) == 1:
941 return nl[0]
943 return nl[0]
942 raise LookupError(id, self.indexfile,
944 raise LookupError(id, self.indexfile,
943 _('ambiguous identifier'))
945 _('ambiguous identifier'))
944 return None
946 return None
945 except TypeError:
947 except TypeError:
946 pass
948 pass
947
949
948 def lookup(self, id):
950 def lookup(self, id):
949 """locate a node based on:
951 """locate a node based on:
950 - revision number or str(revision number)
952 - revision number or str(revision number)
951 - nodeid or subset of hex nodeid
953 - nodeid or subset of hex nodeid
952 """
954 """
953 n = self._match(id)
955 n = self._match(id)
954 if n is not None:
956 if n is not None:
955 return n
957 return n
956 n = self._partialmatch(id)
958 n = self._partialmatch(id)
957 if n:
959 if n:
958 return n
960 return n
959
961
960 raise LookupError(id, self.indexfile, _('no match found'))
962 raise LookupError(id, self.indexfile, _('no match found'))
961
963
962 def cmp(self, node, text):
964 def cmp(self, node, text):
963 """compare text with a given file revision
965 """compare text with a given file revision
964
966
965 returns True if text is different than what is stored.
967 returns True if text is different than what is stored.
966 """
968 """
967 p1, p2 = self.parents(node)
969 p1, p2 = self.parents(node)
968 return hash(text, p1, p2) != node
970 return hash(text, p1, p2) != node
969
971
970 def _addchunk(self, offset, data):
972 def _addchunk(self, offset, data):
971 o, d = self._chunkcache
973 o, d = self._chunkcache
972 # try to add to existing cache
974 # try to add to existing cache
973 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
975 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
974 self._chunkcache = o, d + data
976 self._chunkcache = o, d + data
975 else:
977 else:
976 self._chunkcache = offset, data
978 self._chunkcache = offset, data
977
979
978 def _loadchunk(self, offset, length):
980 def _loadchunk(self, offset, length):
979 if self._inline:
981 if self._inline:
980 df = self.opener(self.indexfile)
982 df = self.opener(self.indexfile)
981 else:
983 else:
982 df = self.opener(self.datafile)
984 df = self.opener(self.datafile)
983
985
984 readahead = max(65536, length)
986 readahead = max(65536, length)
985 df.seek(offset)
987 df.seek(offset)
986 d = df.read(readahead)
988 d = df.read(readahead)
987 self._addchunk(offset, d)
989 self._addchunk(offset, d)
988 if readahead > length:
990 if readahead > length:
989 return d[:length]
991 return d[:length]
990 return d
992 return d
991
993
992 def _getchunk(self, offset, length):
994 def _getchunk(self, offset, length):
993 o, d = self._chunkcache
995 o, d = self._chunkcache
994 l = len(d)
996 l = len(d)
995
997
996 # is it in the cache?
998 # is it in the cache?
997 cachestart = offset - o
999 cachestart = offset - o
998 cacheend = cachestart + length
1000 cacheend = cachestart + length
999 if cachestart >= 0 and cacheend <= l:
1001 if cachestart >= 0 and cacheend <= l:
1000 if cachestart == 0 and cacheend == l:
1002 if cachestart == 0 and cacheend == l:
1001 return d # avoid a copy
1003 return d # avoid a copy
1002 return d[cachestart:cacheend]
1004 return d[cachestart:cacheend]
1003
1005
1004 return self._loadchunk(offset, length)
1006 return self._loadchunk(offset, length)
1005
1007
1006 def _chunkraw(self, startrev, endrev):
1008 def _chunkraw(self, startrev, endrev):
1007 start = self.start(startrev)
1009 start = self.start(startrev)
1008 length = self.end(endrev) - start
1010 length = self.end(endrev) - start
1009 if self._inline:
1011 if self._inline:
1010 start += (startrev + 1) * self._io.size
1012 start += (startrev + 1) * self._io.size
1011 return self._getchunk(start, length)
1013 return self._getchunk(start, length)
1012
1014
1013 def _chunk(self, rev):
1015 def _chunk(self, rev):
1014 return decompress(self._chunkraw(rev, rev))
1016 return decompress(self._chunkraw(rev, rev))
1015
1017
1016 def _chunkclear(self):
1018 def _chunkclear(self):
1017 self._chunkcache = (0, '')
1019 self._chunkcache = (0, '')
1018
1020
1019 def deltaparent(self, rev):
1021 def deltaparent(self, rev):
1020 """return previous revision or parentrev according to flags"""
1022 """return previous revision or parentrev according to flags"""
1021 if self.base(rev) == rev:
1023 if self.base(rev) == rev:
1022 return nullrev
1024 return nullrev
1023 elif self.flags(rev) & REVIDX_PARENTDELTA:
1025 elif self.flags(rev) & REVIDX_PARENTDELTA:
1024 return self.parentrevs(rev)[0]
1026 return self.parentrevs(rev)[0]
1025 else:
1027 else:
1026 return rev - 1
1028 return rev - 1
1027
1029
1028
1030
1029 def deltachain(self, rev, cache):
1031 def deltachain(self, rev, cache):
1030 """return chain of revisions to construct a given revision"""
1032 """return chain of revisions to construct a given revision"""
1031 chain = []
1033 chain = []
1032 check = False
1034 check = False
1033 while self.base(rev) != rev and rev != cache:
1035 index = self.index
1036 e = index[rev]
1037 while rev != e[3] and rev != cache:
1034 chain.append(rev)
1038 chain.append(rev)
1035 rev = self.deltaparent(rev)
1039 if e[0] & REVIDX_PARENTDELTA:
1040 rev = e[5]
1041 else:
1042 rev -= 1
1043 e = index[rev]
1036 chain.reverse()
1044 chain.reverse()
1037 if rev == cache:
1045 if rev == cache:
1038 check = True
1046 check = True
1039 return check, rev, chain
1047 return check, rev, chain
1040
1048
1041 def revdiff(self, rev1, rev2):
1049 def revdiff(self, rev1, rev2):
1042 """return or calculate a delta between two revisions"""
1050 """return or calculate a delta between two revisions"""
1043 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1051 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1044 return self._chunk(rev2)
1052 return self._chunk(rev2)
1045
1053
1046 return mdiff.textdiff(self.revision(self.node(rev1)),
1054 return mdiff.textdiff(self.revision(self.node(rev1)),
1047 self.revision(self.node(rev2)))
1055 self.revision(self.node(rev2)))
1048
1056
1049 def revision(self, node):
1057 def revision(self, node):
1050 """return an uncompressed revision of a given node"""
1058 """return an uncompressed revision of a given node"""
1051 cache = nullrev
1059 cache = nullrev
1052 if node == nullid:
1060 if node == nullid:
1053 return ""
1061 return ""
1054 if self._cache:
1062 if self._cache:
1055 cache = self._cache[1]
1063 cache = self._cache[1]
1056 if self._cache[0] == node:
1064 if self._cache[0] == node:
1057 return self._cache[2]
1065 return self._cache[2]
1058
1066
1059 # look up what we need to read
1067 # look up what we need to read
1060 text = None
1068 text = None
1061 rev = self.rev(node)
1069 rev = self.rev(node)
1062 cache, base, chain = self.deltachain(rev, cache)
1070 cache, base, chain = self.deltachain(rev, cache)
1063
1071
1064 # check rev flags
1072 # check rev flags
1065 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1073 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1066 raise RevlogError(_('incompatible revision flag %x') %
1074 raise RevlogError(_('incompatible revision flag %x') %
1067 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1075 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1068
1076
1069 # do we have useful data cached?
1077 # do we have useful data cached?
1070 if cache and self._cache:
1078 if cache and self._cache:
1079 global _cached
1080 _cached += 1
1071 text = self._cache[2]
1081 text = self._cache[2]
1082 else:
1083 global _uncached
1084 _uncached += 1
1085
1072
1086
1073 # drop cache to save memory
1087 # drop cache to save memory
1074 self._cache = None
1088 self._cache = None
1075
1089
1076 self._loadindex(base, rev + 1)
1090 self._loadindex(base, rev + 1)
1077 self._chunkraw(base, rev)
1091 self._chunkraw(base, rev)
1078 if text is None:
1092 if text is None:
1079 text = self._chunk(base)
1093 text = self._chunk(base)
1080
1094
1081 bins = [self._chunk(r) for r in chain]
1095 bins = [self._chunk(r) for r in chain]
1082 text = mdiff.patches(text, bins)
1096 text = mdiff.patches(text, bins)
1083 p1, p2 = self.parents(node)
1097 p1, p2 = self.parents(node)
1084 if (node != hash(text, p1, p2) and
1098 if (node != hash(text, p1, p2) and
1085 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1099 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1086 raise RevlogError(_("integrity check failed on %s:%d")
1100 raise RevlogError(_("integrity check failed on %s:%d")
1087 % (self.indexfile, rev))
1101 % (self.indexfile, rev))
1088
1102
1089 self._cache = (node, rev, text)
1103 self._cache = (node, rev, text)
1090 return text
1104 return text
1091
1105
1092 def checkinlinesize(self, tr, fp=None):
1106 def checkinlinesize(self, tr, fp=None):
1093 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1107 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1094 return
1108 return
1095
1109
1096 trinfo = tr.find(self.indexfile)
1110 trinfo = tr.find(self.indexfile)
1097 if trinfo is None:
1111 if trinfo is None:
1098 raise RevlogError(_("%s not found in the transaction")
1112 raise RevlogError(_("%s not found in the transaction")
1099 % self.indexfile)
1113 % self.indexfile)
1100
1114
1101 trindex = trinfo[2]
1115 trindex = trinfo[2]
1102 dataoff = self.start(trindex)
1116 dataoff = self.start(trindex)
1103
1117
1104 tr.add(self.datafile, dataoff)
1118 tr.add(self.datafile, dataoff)
1105
1119
1106 if fp:
1120 if fp:
1107 fp.flush()
1121 fp.flush()
1108 fp.close()
1122 fp.close()
1109
1123
1110 df = self.opener(self.datafile, 'w')
1124 df = self.opener(self.datafile, 'w')
1111 try:
1125 try:
1112 for r in self:
1126 for r in self:
1113 df.write(self._chunkraw(r, r))
1127 df.write(self._chunkraw(r, r))
1114 finally:
1128 finally:
1115 df.close()
1129 df.close()
1116
1130
1117 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1131 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1118 self.version &= ~(REVLOGNGINLINEDATA)
1132 self.version &= ~(REVLOGNGINLINEDATA)
1119 self._inline = False
1133 self._inline = False
1120 for i in self:
1134 for i in self:
1121 e = self._io.packentry(self.index[i], self.node, self.version, i)
1135 e = self._io.packentry(self.index[i], self.node, self.version, i)
1122 fp.write(e)
1136 fp.write(e)
1123
1137
1124 # if we don't call rename, the temp file will never replace the
1138 # if we don't call rename, the temp file will never replace the
1125 # real index
1139 # real index
1126 fp.rename()
1140 fp.rename()
1127
1141
1128 tr.replace(self.indexfile, trindex * self._io.size)
1142 tr.replace(self.indexfile, trindex * self._io.size)
1129 self._chunkclear()
1143 self._chunkclear()
1130
1144
1131 def addrevision(self, text, transaction, link, p1, p2, d=None):
1145 def addrevision(self, text, transaction, link, p1, p2, d=None):
1132 """add a revision to the log
1146 """add a revision to the log
1133
1147
1134 text - the revision data to add
1148 text - the revision data to add
1135 transaction - the transaction object used for rollback
1149 transaction - the transaction object used for rollback
1136 link - the linkrev data to add
1150 link - the linkrev data to add
1137 p1, p2 - the parent nodeids of the revision
1151 p1, p2 - the parent nodeids of the revision
1138 d - an optional precomputed delta
1152 d - an optional precomputed delta
1139 """
1153 """
1140 dfh = None
1154 dfh = None
1141 if not self._inline:
1155 if not self._inline:
1142 dfh = self.opener(self.datafile, "a")
1156 dfh = self.opener(self.datafile, "a")
1143 ifh = self.opener(self.indexfile, "a+")
1157 ifh = self.opener(self.indexfile, "a+")
1144 try:
1158 try:
1145 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1159 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1146 finally:
1160 finally:
1147 if dfh:
1161 if dfh:
1148 dfh.close()
1162 dfh.close()
1149 ifh.close()
1163 ifh.close()
1150
1164
1151 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1165 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1152 node = hash(text, p1, p2)
1166 node = hash(text, p1, p2)
1153 if node in self.nodemap:
1167 if node in self.nodemap:
1154 return node
1168 return node
1155
1169
1156 curr = len(self)
1170 curr = len(self)
1157 prev = curr - 1
1171 prev = curr - 1
1158 base = self.base(prev)
1172 base = self.base(prev)
1159 offset = self.end(prev)
1173 offset = self.end(prev)
1160 flags = 0
1174 flags = 0
1161
1175
1162 if curr:
1176 if curr:
1163 if not d:
1177 if not d:
1164 if self._parentdelta:
1178 if self._parentdelta:
1165 ptext = self.revision(p1)
1179 ptext = self.revision(p1)
1166 flags = REVIDX_PARENTDELTA
1180 flags = REVIDX_PARENTDELTA
1167 else:
1181 else:
1168 ptext = self.revision(self.node(prev))
1182 ptext = self.revision(self.node(prev))
1169 d = mdiff.textdiff(ptext, text)
1183 d = mdiff.textdiff(ptext, text)
1170 data = compress(d)
1184 data = compress(d)
1171 l = len(data[1]) + len(data[0])
1185 l = len(data[1]) + len(data[0])
1172 dist = l + offset - self.start(base)
1186 dist = l + offset - self.start(base)
1173
1187
1174 # full versions are inserted when the needed deltas
1188 # full versions are inserted when the needed deltas
1175 # become comparable to the uncompressed text
1189 # become comparable to the uncompressed text
1176 # or the base revision is punched
1190 # or the base revision is punched
1177 if (not curr or dist > len(text) * 2 or
1191 if (not curr or dist > len(text) * 2 or
1178 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1192 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1179 data = compress(text)
1193 data = compress(text)
1180 l = len(data[1]) + len(data[0])
1194 l = len(data[1]) + len(data[0])
1181 base = curr
1195 base = curr
1182
1196
1183 e = (offset_type(offset, flags), l, len(text),
1197 e = (offset_type(offset, flags), l, len(text),
1184 base, link, self.rev(p1), self.rev(p2), node)
1198 base, link, self.rev(p1), self.rev(p2), node)
1185 self.index.insert(-1, e)
1199 self.index.insert(-1, e)
1186 self.nodemap[node] = curr
1200 self.nodemap[node] = curr
1187
1201
1188 entry = self._io.packentry(e, self.node, self.version, curr)
1202 entry = self._io.packentry(e, self.node, self.version, curr)
1189 if not self._inline:
1203 if not self._inline:
1190 transaction.add(self.datafile, offset)
1204 transaction.add(self.datafile, offset)
1191 transaction.add(self.indexfile, curr * len(entry))
1205 transaction.add(self.indexfile, curr * len(entry))
1192 if data[0]:
1206 if data[0]:
1193 dfh.write(data[0])
1207 dfh.write(data[0])
1194 dfh.write(data[1])
1208 dfh.write(data[1])
1195 dfh.flush()
1209 dfh.flush()
1196 ifh.write(entry)
1210 ifh.write(entry)
1197 else:
1211 else:
1198 offset += curr * self._io.size
1212 offset += curr * self._io.size
1199 transaction.add(self.indexfile, offset, curr)
1213 transaction.add(self.indexfile, offset, curr)
1200 ifh.write(entry)
1214 ifh.write(entry)
1201 ifh.write(data[0])
1215 ifh.write(data[0])
1202 ifh.write(data[1])
1216 ifh.write(data[1])
1203 self.checkinlinesize(transaction, ifh)
1217 self.checkinlinesize(transaction, ifh)
1204
1218
1205 if type(text) == str: # only accept immutable objects
1219 if type(text) == str: # only accept immutable objects
1206 self._cache = (node, curr, text)
1220 self._cache = (node, curr, text)
1207 return node
1221 return node
1208
1222
1209 def group(self, nodelist, lookup, infocollect=None):
1223 def group(self, nodelist, lookup, infocollect=None):
1210 """Calculate a delta group, yielding a sequence of changegroup chunks
1224 """Calculate a delta group, yielding a sequence of changegroup chunks
1211 (strings).
1225 (strings).
1212
1226
1213 Given a list of changeset revs, return a set of deltas and
1227 Given a list of changeset revs, return a set of deltas and
1214 metadata corresponding to nodes. the first delta is
1228 metadata corresponding to nodes. the first delta is
1215 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1229 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1216 have this parent as it has all history before these
1230 have this parent as it has all history before these
1217 changesets. parent is parent[0]
1231 changesets. parent is parent[0]
1218 """
1232 """
1219
1233
1220 revs = [self.rev(n) for n in nodelist]
1234 revs = [self.rev(n) for n in nodelist]
1221
1235
1222 # if we don't have any revisions touched by these changesets, bail
1236 # if we don't have any revisions touched by these changesets, bail
1223 if not revs:
1237 if not revs:
1224 yield changegroup.closechunk()
1238 yield changegroup.closechunk()
1225 return
1239 return
1226
1240
1227 # add the parent of the first rev
1241 # add the parent of the first rev
1228 p = self.parentrevs(revs[0])[0]
1242 p = self.parentrevs(revs[0])[0]
1229 revs.insert(0, p)
1243 revs.insert(0, p)
1230
1244
1231 # build deltas
1245 # build deltas
1232 for d in xrange(len(revs) - 1):
1246 for d in xrange(len(revs) - 1):
1233 a, b = revs[d], revs[d + 1]
1247 a, b = revs[d], revs[d + 1]
1234 nb = self.node(b)
1248 nb = self.node(b)
1235
1249
1236 if infocollect is not None:
1250 if infocollect is not None:
1237 infocollect(nb)
1251 infocollect(nb)
1238
1252
1239 p = self.parents(nb)
1253 p = self.parents(nb)
1240 meta = nb + p[0] + p[1] + lookup(nb)
1254 meta = nb + p[0] + p[1] + lookup(nb)
1241 if a == -1:
1255 if a == -1:
1242 d = self.revision(nb)
1256 d = self.revision(nb)
1243 meta += mdiff.trivialdiffheader(len(d))
1257 meta += mdiff.trivialdiffheader(len(d))
1244 else:
1258 else:
1245 d = self.revdiff(a, b)
1259 d = self.revdiff(a, b)
1246 yield changegroup.chunkheader(len(meta) + len(d))
1260 yield changegroup.chunkheader(len(meta) + len(d))
1247 yield meta
1261 yield meta
1248 yield d
1262 yield d
1249
1263
1250 yield changegroup.closechunk()
1264 yield changegroup.closechunk()
1251
1265
1252 def addgroup(self, revs, linkmapper, transaction):
1266 def addgroup(self, revs, linkmapper, transaction):
1253 """
1267 """
1254 add a delta group
1268 add a delta group
1255
1269
1256 given a set of deltas, add them to the revision log. the
1270 given a set of deltas, add them to the revision log. the
1257 first delta is against its parent, which should be in our
1271 first delta is against its parent, which should be in our
1258 log, the rest are against the previous delta.
1272 log, the rest are against the previous delta.
1259 """
1273 """
1260
1274
1261 #track the base of the current delta log
1275 #track the base of the current delta log
1262 r = len(self)
1276 r = len(self)
1263 t = r - 1
1277 t = r - 1
1264 node = None
1278 node = None
1265
1279
1266 base = prev = nullrev
1280 base = prev = nullrev
1267 start = end = textlen = 0
1281 start = end = textlen = 0
1268 if r:
1282 if r:
1269 end = self.end(t)
1283 end = self.end(t)
1270
1284
1271 ifh = self.opener(self.indexfile, "a+")
1285 ifh = self.opener(self.indexfile, "a+")
1272 isize = r * self._io.size
1286 isize = r * self._io.size
1273 if self._inline:
1287 if self._inline:
1274 transaction.add(self.indexfile, end + isize, r)
1288 transaction.add(self.indexfile, end + isize, r)
1275 dfh = None
1289 dfh = None
1276 else:
1290 else:
1277 transaction.add(self.indexfile, isize, r)
1291 transaction.add(self.indexfile, isize, r)
1278 transaction.add(self.datafile, end)
1292 transaction.add(self.datafile, end)
1279 dfh = self.opener(self.datafile, "a")
1293 dfh = self.opener(self.datafile, "a")
1280
1294
1281 try:
1295 try:
1282 # loop through our set of deltas
1296 # loop through our set of deltas
1283 chain = None
1297 chain = None
1284 for chunk in revs:
1298 for chunk in revs:
1285 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1299 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1286 link = linkmapper(cs)
1300 link = linkmapper(cs)
1287 if node in self.nodemap:
1301 if node in self.nodemap:
1288 # this can happen if two branches make the same change
1302 # this can happen if two branches make the same change
1289 chain = node
1303 chain = node
1290 continue
1304 continue
1291 delta = buffer(chunk, 80)
1305 delta = buffer(chunk, 80)
1292 del chunk
1306 del chunk
1293
1307
1294 for p in (p1, p2):
1308 for p in (p1, p2):
1295 if not p in self.nodemap:
1309 if not p in self.nodemap:
1296 raise LookupError(p, self.indexfile, _('unknown parent'))
1310 raise LookupError(p, self.indexfile, _('unknown parent'))
1297
1311
1298 if not chain:
1312 if not chain:
1299 # retrieve the parent revision of the delta chain
1313 # retrieve the parent revision of the delta chain
1300 chain = p1
1314 chain = p1
1301 if not chain in self.nodemap:
1315 if not chain in self.nodemap:
1302 raise LookupError(chain, self.indexfile, _('unknown base'))
1316 raise LookupError(chain, self.indexfile, _('unknown base'))
1303
1317
1304 # full versions are inserted when the needed deltas become
1318 # full versions are inserted when the needed deltas become
1305 # comparable to the uncompressed text or when the previous
1319 # comparable to the uncompressed text or when the previous
1306 # version is not the one we have a delta against. We use
1320 # version is not the one we have a delta against. We use
1307 # the size of the previous full rev as a proxy for the
1321 # the size of the previous full rev as a proxy for the
1308 # current size.
1322 # current size.
1309
1323
1310 if chain == prev:
1324 if chain == prev:
1311 cdelta = compress(delta)
1325 cdelta = compress(delta)
1312 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1326 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1313 textlen = mdiff.patchedsize(textlen, delta)
1327 textlen = mdiff.patchedsize(textlen, delta)
1314
1328
1315 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1329 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1316 # flush our writes here so we can read it in revision
1330 # flush our writes here so we can read it in revision
1317 if dfh:
1331 if dfh:
1318 dfh.flush()
1332 dfh.flush()
1319 ifh.flush()
1333 ifh.flush()
1320 text = self.revision(chain)
1334 text = self.revision(chain)
1321 if len(text) == 0:
1335 if len(text) == 0:
1322 # skip over trivial delta header
1336 # skip over trivial delta header
1323 text = buffer(delta, 12)
1337 text = buffer(delta, 12)
1324 else:
1338 else:
1325 text = mdiff.patches(text, [delta])
1339 text = mdiff.patches(text, [delta])
1326 del delta
1340 del delta
1327 chk = self._addrevision(text, transaction, link, p1, p2, None,
1341 chk = self._addrevision(text, transaction, link, p1, p2, None,
1328 ifh, dfh)
1342 ifh, dfh)
1329 if not dfh and not self._inline:
1343 if not dfh and not self._inline:
1330 # addrevision switched from inline to conventional
1344 # addrevision switched from inline to conventional
1331 # reopen the index
1345 # reopen the index
1332 dfh = self.opener(self.datafile, "a")
1346 dfh = self.opener(self.datafile, "a")
1333 ifh = self.opener(self.indexfile, "a")
1347 ifh = self.opener(self.indexfile, "a")
1334 if chk != node:
1348 if chk != node:
1335 raise RevlogError(_("consistency error adding group"))
1349 raise RevlogError(_("consistency error adding group"))
1336 textlen = len(text)
1350 textlen = len(text)
1337 else:
1351 else:
1338 e = (offset_type(end, 0), cdeltalen, textlen, base,
1352 e = (offset_type(end, 0), cdeltalen, textlen, base,
1339 link, self.rev(p1), self.rev(p2), node)
1353 link, self.rev(p1), self.rev(p2), node)
1340 self.index.insert(-1, e)
1354 self.index.insert(-1, e)
1341 self.nodemap[node] = r
1355 self.nodemap[node] = r
1342 entry = self._io.packentry(e, self.node, self.version, r)
1356 entry = self._io.packentry(e, self.node, self.version, r)
1343 if self._inline:
1357 if self._inline:
1344 ifh.write(entry)
1358 ifh.write(entry)
1345 ifh.write(cdelta[0])
1359 ifh.write(cdelta[0])
1346 ifh.write(cdelta[1])
1360 ifh.write(cdelta[1])
1347 self.checkinlinesize(transaction, ifh)
1361 self.checkinlinesize(transaction, ifh)
1348 if not self._inline:
1362 if not self._inline:
1349 dfh = self.opener(self.datafile, "a")
1363 dfh = self.opener(self.datafile, "a")
1350 ifh = self.opener(self.indexfile, "a")
1364 ifh = self.opener(self.indexfile, "a")
1351 else:
1365 else:
1352 dfh.write(cdelta[0])
1366 dfh.write(cdelta[0])
1353 dfh.write(cdelta[1])
1367 dfh.write(cdelta[1])
1354 ifh.write(entry)
1368 ifh.write(entry)
1355
1369
1356 t, r, chain, prev = r, r + 1, node, node
1370 t, r, chain, prev = r, r + 1, node, node
1357 base = self.base(t)
1371 base = self.base(t)
1358 start = self.start(base)
1372 start = self.start(base)
1359 end = self.end(t)
1373 end = self.end(t)
1360 finally:
1374 finally:
1361 if dfh:
1375 if dfh:
1362 dfh.close()
1376 dfh.close()
1363 ifh.close()
1377 ifh.close()
1364
1378
1365 return node
1379 return node
1366
1380
1367 def strip(self, minlink, transaction):
1381 def strip(self, minlink, transaction):
1368 """truncate the revlog on the first revision with a linkrev >= minlink
1382 """truncate the revlog on the first revision with a linkrev >= minlink
1369
1383
1370 This function is called when we're stripping revision minlink and
1384 This function is called when we're stripping revision minlink and
1371 its descendants from the repository.
1385 its descendants from the repository.
1372
1386
1373 We have to remove all revisions with linkrev >= minlink, because
1387 We have to remove all revisions with linkrev >= minlink, because
1374 the equivalent changelog revisions will be renumbered after the
1388 the equivalent changelog revisions will be renumbered after the
1375 strip.
1389 strip.
1376
1390
1377 So we truncate the revlog on the first of these revisions, and
1391 So we truncate the revlog on the first of these revisions, and
1378 trust that the caller has saved the revisions that shouldn't be
1392 trust that the caller has saved the revisions that shouldn't be
1379 removed and that it'll readd them after this truncation.
1393 removed and that it'll readd them after this truncation.
1380 """
1394 """
1381 if len(self) == 0:
1395 if len(self) == 0:
1382 return
1396 return
1383
1397
1384 if isinstance(self.index, lazyindex):
1398 if isinstance(self.index, lazyindex):
1385 self._loadindexmap()
1399 self._loadindexmap()
1386
1400
1387 for rev in self:
1401 for rev in self:
1388 if self.index[rev][4] >= minlink:
1402 if self.index[rev][4] >= minlink:
1389 break
1403 break
1390 else:
1404 else:
1391 return
1405 return
1392
1406
1393 # first truncate the files on disk
1407 # first truncate the files on disk
1394 end = self.start(rev)
1408 end = self.start(rev)
1395 if not self._inline:
1409 if not self._inline:
1396 transaction.add(self.datafile, end)
1410 transaction.add(self.datafile, end)
1397 end = rev * self._io.size
1411 end = rev * self._io.size
1398 else:
1412 else:
1399 end += rev * self._io.size
1413 end += rev * self._io.size
1400
1414
1401 transaction.add(self.indexfile, end)
1415 transaction.add(self.indexfile, end)
1402
1416
1403 # then reset internal state in memory to forget those revisions
1417 # then reset internal state in memory to forget those revisions
1404 self._cache = None
1418 self._cache = None
1405 self._chunkclear()
1419 self._chunkclear()
1406 for x in xrange(rev, len(self)):
1420 for x in xrange(rev, len(self)):
1407 del self.nodemap[self.node(x)]
1421 del self.nodemap[self.node(x)]
1408
1422
1409 del self.index[rev:-1]
1423 del self.index[rev:-1]
1410
1424
1411 def checksize(self):
1425 def checksize(self):
1412 expected = 0
1426 expected = 0
1413 if len(self):
1427 if len(self):
1414 expected = max(0, self.end(len(self) - 1))
1428 expected = max(0, self.end(len(self) - 1))
1415
1429
1416 try:
1430 try:
1417 f = self.opener(self.datafile)
1431 f = self.opener(self.datafile)
1418 f.seek(0, 2)
1432 f.seek(0, 2)
1419 actual = f.tell()
1433 actual = f.tell()
1420 dd = actual - expected
1434 dd = actual - expected
1421 except IOError, inst:
1435 except IOError, inst:
1422 if inst.errno != errno.ENOENT:
1436 if inst.errno != errno.ENOENT:
1423 raise
1437 raise
1424 dd = 0
1438 dd = 0
1425
1439
1426 try:
1440 try:
1427 f = self.opener(self.indexfile)
1441 f = self.opener(self.indexfile)
1428 f.seek(0, 2)
1442 f.seek(0, 2)
1429 actual = f.tell()
1443 actual = f.tell()
1430 s = self._io.size
1444 s = self._io.size
1431 i = max(0, actual // s)
1445 i = max(0, actual // s)
1432 di = actual - (i * s)
1446 di = actual - (i * s)
1433 if self._inline:
1447 if self._inline:
1434 databytes = 0
1448 databytes = 0
1435 for r in self:
1449 for r in self:
1436 databytes += max(0, self.length(r))
1450 databytes += max(0, self.length(r))
1437 dd = 0
1451 dd = 0
1438 di = actual - len(self) * s - databytes
1452 di = actual - len(self) * s - databytes
1439 except IOError, inst:
1453 except IOError, inst:
1440 if inst.errno != errno.ENOENT:
1454 if inst.errno != errno.ENOENT:
1441 raise
1455 raise
1442 di = 0
1456 di = 0
1443
1457
1444 return (dd, di)
1458 return (dd, di)
1445
1459
1446 def files(self):
1460 def files(self):
1447 res = [self.indexfile]
1461 res = [self.indexfile]
1448 if not self._inline:
1462 if not self._inline:
1449 res.append(self.datafile)
1463 res.append(self.datafile)
1450 return res
1464 return res
General Comments 0
You need to be logged in to leave comments. Login now