##// END OF EJS Templates
revlog.revision(): minor cleanup...
Benoit Boissinot -
r11995:ff84cd2b default
parent child Browse files
Show More
@@ -1,1476 +1,1479 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 _cached, _uncached = 0, 0
26 _cached, _uncached = 0, 0
27
27
28 # revlog header flags
28 # revlog header flags
29 REVLOGV0 = 0
29 REVLOGV0 = 0
30 REVLOGNG = 1
30 REVLOGNG = 1
31 REVLOGNGINLINEDATA = (1 << 16)
31 REVLOGNGINLINEDATA = (1 << 16)
32 REVLOGSHALLOW = (1 << 17)
32 REVLOGSHALLOW = (1 << 17)
33 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
33 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
34 REVLOG_DEFAULT_FORMAT = REVLOGNG
34 REVLOG_DEFAULT_FORMAT = REVLOGNG
35 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
35 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
36 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
36 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
37
37
38 # revlog index flags
38 # revlog index flags
39 REVIDX_PARENTDELTA = 1
39 REVIDX_PARENTDELTA = 1
40 REVIDX_PUNCHED_FLAG = 2
40 REVIDX_PUNCHED_FLAG = 2
41 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
41 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
42
42
43 # amount of data read unconditionally, should be >= 4
43 # amount of data read unconditionally, should be >= 4
44 # when not inline: threshold for using lazy index
44 # when not inline: threshold for using lazy index
45 _prereadsize = 1048576
45 _prereadsize = 1048576
46 # max size of revlog with inline data
46 # max size of revlog with inline data
47 _maxinline = 131072
47 _maxinline = 131072
48
48
49 RevlogError = error.RevlogError
49 RevlogError = error.RevlogError
50 LookupError = error.LookupError
50 LookupError = error.LookupError
51
51
52 def getoffset(q):
52 def getoffset(q):
53 return int(q >> 16)
53 return int(q >> 16)
54
54
55 def gettype(q):
55 def gettype(q):
56 return int(q & 0xFFFF)
56 return int(q & 0xFFFF)
57
57
58 def offset_type(offset, type):
58 def offset_type(offset, type):
59 return long(long(offset) << 16 | type)
59 return long(long(offset) << 16 | type)
60
60
61 nullhash = _sha(nullid)
61 nullhash = _sha(nullid)
62
62
63 def hash(text, p1, p2):
63 def hash(text, p1, p2):
64 """generate a hash from the given text and its parent hashes
64 """generate a hash from the given text and its parent hashes
65
65
66 This hash combines both the current file contents and its history
66 This hash combines both the current file contents and its history
67 in a manner that makes it easy to distinguish nodes with the same
67 in a manner that makes it easy to distinguish nodes with the same
68 content in the revision graph.
68 content in the revision graph.
69 """
69 """
70 # As of now, if one of the parent node is null, p2 is null
70 # As of now, if one of the parent node is null, p2 is null
71 if p2 == nullid:
71 if p2 == nullid:
72 # deep copy of a hash is faster than creating one
72 # deep copy of a hash is faster than creating one
73 s = nullhash.copy()
73 s = nullhash.copy()
74 s.update(p1)
74 s.update(p1)
75 else:
75 else:
76 # none of the parent nodes are nullid
76 # none of the parent nodes are nullid
77 l = [p1, p2]
77 l = [p1, p2]
78 l.sort()
78 l.sort()
79 s = _sha(l[0])
79 s = _sha(l[0])
80 s.update(l[1])
80 s.update(l[1])
81 s.update(text)
81 s.update(text)
82 return s.digest()
82 return s.digest()
83
83
84 def compress(text):
84 def compress(text):
85 """ generate a possibly-compressed representation of text """
85 """ generate a possibly-compressed representation of text """
86 if not text:
86 if not text:
87 return ("", text)
87 return ("", text)
88 l = len(text)
88 l = len(text)
89 bin = None
89 bin = None
90 if l < 44:
90 if l < 44:
91 pass
91 pass
92 elif l > 1000000:
92 elif l > 1000000:
93 # zlib makes an internal copy, thus doubling memory usage for
93 # zlib makes an internal copy, thus doubling memory usage for
94 # large files, so lets do this in pieces
94 # large files, so lets do this in pieces
95 z = zlib.compressobj()
95 z = zlib.compressobj()
96 p = []
96 p = []
97 pos = 0
97 pos = 0
98 while pos < l:
98 while pos < l:
99 pos2 = pos + 2**20
99 pos2 = pos + 2**20
100 p.append(z.compress(text[pos:pos2]))
100 p.append(z.compress(text[pos:pos2]))
101 pos = pos2
101 pos = pos2
102 p.append(z.flush())
102 p.append(z.flush())
103 if sum(map(len, p)) < l:
103 if sum(map(len, p)) < l:
104 bin = "".join(p)
104 bin = "".join(p)
105 else:
105 else:
106 bin = _compress(text)
106 bin = _compress(text)
107 if bin is None or len(bin) > l:
107 if bin is None or len(bin) > l:
108 if text[0] == '\0':
108 if text[0] == '\0':
109 return ("", text)
109 return ("", text)
110 return ('u', text)
110 return ('u', text)
111 return ("", bin)
111 return ("", bin)
112
112
113 def decompress(bin):
113 def decompress(bin):
114 """ decompress the given input """
114 """ decompress the given input """
115 if not bin:
115 if not bin:
116 return bin
116 return bin
117 t = bin[0]
117 t = bin[0]
118 if t == '\0':
118 if t == '\0':
119 return bin
119 return bin
120 if t == 'x':
120 if t == 'x':
121 return _decompress(bin)
121 return _decompress(bin)
122 if t == 'u':
122 if t == 'u':
123 return bin[1:]
123 return bin[1:]
124 raise RevlogError(_("unknown compression type %r") % t)
124 raise RevlogError(_("unknown compression type %r") % t)
125
125
126 class lazyparser(object):
126 class lazyparser(object):
127 """
127 """
128 this class avoids the need to parse the entirety of large indices
128 this class avoids the need to parse the entirety of large indices
129 """
129 """
130
130
131 # lazyparser is not safe to use on windows if win32 extensions not
131 # lazyparser is not safe to use on windows if win32 extensions not
132 # available. it keeps file handle open, which make it not possible
132 # available. it keeps file handle open, which make it not possible
133 # to break hardlinks on local cloned repos.
133 # to break hardlinks on local cloned repos.
134
134
135 def __init__(self, dataf):
135 def __init__(self, dataf):
136 try:
136 try:
137 size = util.fstat(dataf).st_size
137 size = util.fstat(dataf).st_size
138 except AttributeError:
138 except AttributeError:
139 size = 0
139 size = 0
140 self.dataf = dataf
140 self.dataf = dataf
141 self.s = struct.calcsize(indexformatng)
141 self.s = struct.calcsize(indexformatng)
142 self.datasize = size
142 self.datasize = size
143 self.l = size // self.s
143 self.l = size // self.s
144 self.index = [None] * self.l
144 self.index = [None] * self.l
145 self.map = {nullid: nullrev}
145 self.map = {nullid: nullrev}
146 self.allmap = 0
146 self.allmap = 0
147 self.all = 0
147 self.all = 0
148 self.mapfind_count = 0
148 self.mapfind_count = 0
149
149
150 def loadmap(self):
150 def loadmap(self):
151 """
151 """
152 during a commit, we need to make sure the rev being added is
152 during a commit, we need to make sure the rev being added is
153 not a duplicate. This requires loading the entire index,
153 not a duplicate. This requires loading the entire index,
154 which is fairly slow. loadmap can load up just the node map,
154 which is fairly slow. loadmap can load up just the node map,
155 which takes much less time.
155 which takes much less time.
156 """
156 """
157 if self.allmap:
157 if self.allmap:
158 return
158 return
159 end = self.datasize
159 end = self.datasize
160 self.allmap = 1
160 self.allmap = 1
161 cur = 0
161 cur = 0
162 count = 0
162 count = 0
163 blocksize = self.s * 256
163 blocksize = self.s * 256
164 self.dataf.seek(0)
164 self.dataf.seek(0)
165 while cur < end:
165 while cur < end:
166 data = self.dataf.read(blocksize)
166 data = self.dataf.read(blocksize)
167 off = 0
167 off = 0
168 for x in xrange(256):
168 for x in xrange(256):
169 n = data[off + ngshaoffset:off + ngshaoffset + 20]
169 n = data[off + ngshaoffset:off + ngshaoffset + 20]
170 self.map[n] = count
170 self.map[n] = count
171 count += 1
171 count += 1
172 if count >= self.l:
172 if count >= self.l:
173 break
173 break
174 off += self.s
174 off += self.s
175 cur += blocksize
175 cur += blocksize
176
176
177 def loadblock(self, blockstart, blocksize, data=None):
177 def loadblock(self, blockstart, blocksize, data=None):
178 if self.all:
178 if self.all:
179 return
179 return
180 if data is None:
180 if data is None:
181 self.dataf.seek(blockstart)
181 self.dataf.seek(blockstart)
182 if blockstart + blocksize > self.datasize:
182 if blockstart + blocksize > self.datasize:
183 # the revlog may have grown since we've started running,
183 # the revlog may have grown since we've started running,
184 # but we don't have space in self.index for more entries.
184 # but we don't have space in self.index for more entries.
185 # limit blocksize so that we don't get too much data.
185 # limit blocksize so that we don't get too much data.
186 blocksize = max(self.datasize - blockstart, 0)
186 blocksize = max(self.datasize - blockstart, 0)
187 data = self.dataf.read(blocksize)
187 data = self.dataf.read(blocksize)
188 lend = len(data) // self.s
188 lend = len(data) // self.s
189 i = blockstart // self.s
189 i = blockstart // self.s
190 off = 0
190 off = 0
191 # lazyindex supports __delitem__
191 # lazyindex supports __delitem__
192 if lend > len(self.index) - i:
192 if lend > len(self.index) - i:
193 lend = len(self.index) - i
193 lend = len(self.index) - i
194 for x in xrange(lend):
194 for x in xrange(lend):
195 if self.index[i + x] is None:
195 if self.index[i + x] is None:
196 b = data[off : off + self.s]
196 b = data[off : off + self.s]
197 self.index[i + x] = b
197 self.index[i + x] = b
198 n = b[ngshaoffset:ngshaoffset + 20]
198 n = b[ngshaoffset:ngshaoffset + 20]
199 self.map[n] = i + x
199 self.map[n] = i + x
200 off += self.s
200 off += self.s
201
201
202 def findnode(self, node):
202 def findnode(self, node):
203 """search backwards through the index file for a specific node"""
203 """search backwards through the index file for a specific node"""
204 if self.allmap:
204 if self.allmap:
205 return None
205 return None
206
206
207 # hg log will cause many many searches for the manifest
207 # hg log will cause many many searches for the manifest
208 # nodes. After we get called a few times, just load the whole
208 # nodes. After we get called a few times, just load the whole
209 # thing.
209 # thing.
210 if self.mapfind_count > 8:
210 if self.mapfind_count > 8:
211 self.loadmap()
211 self.loadmap()
212 if node in self.map:
212 if node in self.map:
213 return node
213 return node
214 return None
214 return None
215 self.mapfind_count += 1
215 self.mapfind_count += 1
216 last = self.l - 1
216 last = self.l - 1
217 while self.index[last] != None:
217 while self.index[last] != None:
218 if last == 0:
218 if last == 0:
219 self.all = 1
219 self.all = 1
220 self.allmap = 1
220 self.allmap = 1
221 return None
221 return None
222 last -= 1
222 last -= 1
223 end = (last + 1) * self.s
223 end = (last + 1) * self.s
224 blocksize = self.s * 256
224 blocksize = self.s * 256
225 while end >= 0:
225 while end >= 0:
226 start = max(end - blocksize, 0)
226 start = max(end - blocksize, 0)
227 self.dataf.seek(start)
227 self.dataf.seek(start)
228 data = self.dataf.read(end - start)
228 data = self.dataf.read(end - start)
229 findend = end - start
229 findend = end - start
230 while True:
230 while True:
231 # we're searching backwards, so we have to make sure
231 # we're searching backwards, so we have to make sure
232 # we don't find a changeset where this node is a parent
232 # we don't find a changeset where this node is a parent
233 off = data.find(node, 0, findend)
233 off = data.find(node, 0, findend)
234 findend = off
234 findend = off
235 if off >= 0:
235 if off >= 0:
236 i = off / self.s
236 i = off / self.s
237 off = i * self.s
237 off = i * self.s
238 n = data[off + ngshaoffset:off + ngshaoffset + 20]
238 n = data[off + ngshaoffset:off + ngshaoffset + 20]
239 if n == node:
239 if n == node:
240 self.map[n] = i + start / self.s
240 self.map[n] = i + start / self.s
241 return node
241 return node
242 else:
242 else:
243 break
243 break
244 end -= blocksize
244 end -= blocksize
245 return None
245 return None
246
246
247 def loadindex(self, i=None, end=None):
247 def loadindex(self, i=None, end=None):
248 if self.all:
248 if self.all:
249 return
249 return
250 all = False
250 all = False
251 if i is None:
251 if i is None:
252 blockstart = 0
252 blockstart = 0
253 blocksize = (65536 / self.s) * self.s
253 blocksize = (65536 / self.s) * self.s
254 end = self.datasize
254 end = self.datasize
255 all = True
255 all = True
256 else:
256 else:
257 if end:
257 if end:
258 blockstart = i * self.s
258 blockstart = i * self.s
259 end = end * self.s
259 end = end * self.s
260 blocksize = end - blockstart
260 blocksize = end - blockstart
261 else:
261 else:
262 blockstart = (i & ~1023) * self.s
262 blockstart = (i & ~1023) * self.s
263 blocksize = self.s * 1024
263 blocksize = self.s * 1024
264 end = blockstart + blocksize
264 end = blockstart + blocksize
265 while blockstart < end:
265 while blockstart < end:
266 self.loadblock(blockstart, blocksize)
266 self.loadblock(blockstart, blocksize)
267 blockstart += blocksize
267 blockstart += blocksize
268 if all:
268 if all:
269 self.all = True
269 self.all = True
270
270
271 class lazyindex(object):
271 class lazyindex(object):
272 """a lazy version of the index array"""
272 """a lazy version of the index array"""
273 def __init__(self, parser):
273 def __init__(self, parser):
274 self.p = parser
274 self.p = parser
275 def __len__(self):
275 def __len__(self):
276 return len(self.p.index)
276 return len(self.p.index)
277 def load(self, pos):
277 def load(self, pos):
278 if pos < 0:
278 if pos < 0:
279 pos += len(self.p.index)
279 pos += len(self.p.index)
280 self.p.loadindex(pos)
280 self.p.loadindex(pos)
281 return self.p.index[pos]
281 return self.p.index[pos]
282 def __getitem__(self, pos):
282 def __getitem__(self, pos):
283 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
283 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
284 def __setitem__(self, pos, item):
284 def __setitem__(self, pos, item):
285 self.p.index[pos] = _pack(indexformatng, *item)
285 self.p.index[pos] = _pack(indexformatng, *item)
286 def __delitem__(self, pos):
286 def __delitem__(self, pos):
287 del self.p.index[pos]
287 del self.p.index[pos]
288 def insert(self, pos, e):
288 def insert(self, pos, e):
289 self.p.index.insert(pos, _pack(indexformatng, *e))
289 self.p.index.insert(pos, _pack(indexformatng, *e))
290 def append(self, e):
290 def append(self, e):
291 self.p.index.append(_pack(indexformatng, *e))
291 self.p.index.append(_pack(indexformatng, *e))
292
292
293 class lazymap(object):
293 class lazymap(object):
294 """a lazy version of the node map"""
294 """a lazy version of the node map"""
295 def __init__(self, parser):
295 def __init__(self, parser):
296 self.p = parser
296 self.p = parser
297 def load(self, key):
297 def load(self, key):
298 n = self.p.findnode(key)
298 n = self.p.findnode(key)
299 if n is None:
299 if n is None:
300 raise KeyError(key)
300 raise KeyError(key)
301 def __contains__(self, key):
301 def __contains__(self, key):
302 if key in self.p.map:
302 if key in self.p.map:
303 return True
303 return True
304 self.p.loadmap()
304 self.p.loadmap()
305 return key in self.p.map
305 return key in self.p.map
306 def __iter__(self):
306 def __iter__(self):
307 yield nullid
307 yield nullid
308 for i, ret in enumerate(self.p.index):
308 for i, ret in enumerate(self.p.index):
309 if not ret:
309 if not ret:
310 self.p.loadindex(i)
310 self.p.loadindex(i)
311 ret = self.p.index[i]
311 ret = self.p.index[i]
312 if isinstance(ret, str):
312 if isinstance(ret, str):
313 ret = _unpack(indexformatng, ret)
313 ret = _unpack(indexformatng, ret)
314 yield ret[7]
314 yield ret[7]
315 def __getitem__(self, key):
315 def __getitem__(self, key):
316 try:
316 try:
317 return self.p.map[key]
317 return self.p.map[key]
318 except KeyError:
318 except KeyError:
319 try:
319 try:
320 self.load(key)
320 self.load(key)
321 return self.p.map[key]
321 return self.p.map[key]
322 except KeyError:
322 except KeyError:
323 raise KeyError("node " + hex(key))
323 raise KeyError("node " + hex(key))
324 def __setitem__(self, key, val):
324 def __setitem__(self, key, val):
325 self.p.map[key] = val
325 self.p.map[key] = val
326 def __delitem__(self, key):
326 def __delitem__(self, key):
327 del self.p.map[key]
327 del self.p.map[key]
328
328
329 indexformatv0 = ">4l20s20s20s"
329 indexformatv0 = ">4l20s20s20s"
330 v0shaoffset = 56
330 v0shaoffset = 56
331
331
332 class revlogoldio(object):
332 class revlogoldio(object):
333 def __init__(self):
333 def __init__(self):
334 self.size = struct.calcsize(indexformatv0)
334 self.size = struct.calcsize(indexformatv0)
335
335
336 def parseindex(self, fp, data, inline):
336 def parseindex(self, fp, data, inline):
337 s = self.size
337 s = self.size
338 index = []
338 index = []
339 nodemap = {nullid: nullrev}
339 nodemap = {nullid: nullrev}
340 n = off = 0
340 n = off = 0
341 if len(data) == _prereadsize:
341 if len(data) == _prereadsize:
342 data += fp.read() # read the rest
342 data += fp.read() # read the rest
343 l = len(data)
343 l = len(data)
344 while off + s <= l:
344 while off + s <= l:
345 cur = data[off:off + s]
345 cur = data[off:off + s]
346 off += s
346 off += s
347 e = _unpack(indexformatv0, cur)
347 e = _unpack(indexformatv0, cur)
348 # transform to revlogv1 format
348 # transform to revlogv1 format
349 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
349 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
350 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
350 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
351 index.append(e2)
351 index.append(e2)
352 nodemap[e[6]] = n
352 nodemap[e[6]] = n
353 n += 1
353 n += 1
354
354
355 return index, nodemap, None
355 return index, nodemap, None
356
356
357 def packentry(self, entry, node, version, rev):
357 def packentry(self, entry, node, version, rev):
358 if gettype(entry[0]):
358 if gettype(entry[0]):
359 raise RevlogError(_("index entry flags need RevlogNG"))
359 raise RevlogError(_("index entry flags need RevlogNG"))
360 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
360 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
361 node(entry[5]), node(entry[6]), entry[7])
361 node(entry[5]), node(entry[6]), entry[7])
362 return _pack(indexformatv0, *e2)
362 return _pack(indexformatv0, *e2)
363
363
364 # index ng:
364 # index ng:
365 # 6 bytes: offset
365 # 6 bytes: offset
366 # 2 bytes: flags
366 # 2 bytes: flags
367 # 4 bytes: compressed length
367 # 4 bytes: compressed length
368 # 4 bytes: uncompressed length
368 # 4 bytes: uncompressed length
369 # 4 bytes: base rev
369 # 4 bytes: base rev
370 # 4 bytes: link rev
370 # 4 bytes: link rev
371 # 4 bytes: parent 1 rev
371 # 4 bytes: parent 1 rev
372 # 4 bytes: parent 2 rev
372 # 4 bytes: parent 2 rev
373 # 32 bytes: nodeid
373 # 32 bytes: nodeid
374 indexformatng = ">Qiiiiii20s12x"
374 indexformatng = ">Qiiiiii20s12x"
375 ngshaoffset = 32
375 ngshaoffset = 32
376 versionformat = ">I"
376 versionformat = ">I"
377
377
378 class revlogio(object):
378 class revlogio(object):
379 def __init__(self):
379 def __init__(self):
380 self.size = struct.calcsize(indexformatng)
380 self.size = struct.calcsize(indexformatng)
381
381
382 def parseindex(self, fp, data, inline):
382 def parseindex(self, fp, data, inline):
383 if len(data) == _prereadsize:
383 if len(data) == _prereadsize:
384 if util.openhardlinks() and not inline:
384 if util.openhardlinks() and not inline:
385 # big index, let's parse it on demand
385 # big index, let's parse it on demand
386 parser = lazyparser(fp)
386 parser = lazyparser(fp)
387 index = lazyindex(parser)
387 index = lazyindex(parser)
388 nodemap = lazymap(parser)
388 nodemap = lazymap(parser)
389 e = list(index[0])
389 e = list(index[0])
390 type = gettype(e[0])
390 type = gettype(e[0])
391 e[0] = offset_type(0, type)
391 e[0] = offset_type(0, type)
392 index[0] = e
392 index[0] = e
393 return index, nodemap, None
393 return index, nodemap, None
394 else:
394 else:
395 data += fp.read()
395 data += fp.read()
396
396
397 # call the C implementation to parse the index data
397 # call the C implementation to parse the index data
398 index, nodemap, cache = parsers.parse_index(data, inline)
398 index, nodemap, cache = parsers.parse_index(data, inline)
399 return index, nodemap, cache
399 return index, nodemap, cache
400
400
401 def packentry(self, entry, node, version, rev):
401 def packentry(self, entry, node, version, rev):
402 p = _pack(indexformatng, *entry)
402 p = _pack(indexformatng, *entry)
403 if rev == 0:
403 if rev == 0:
404 p = _pack(versionformat, version) + p[4:]
404 p = _pack(versionformat, version) + p[4:]
405 return p
405 return p
406
406
407 class revlog(object):
407 class revlog(object):
408 """
408 """
409 the underlying revision storage object
409 the underlying revision storage object
410
410
411 A revlog consists of two parts, an index and the revision data.
411 A revlog consists of two parts, an index and the revision data.
412
412
413 The index is a file with a fixed record size containing
413 The index is a file with a fixed record size containing
414 information on each revision, including its nodeid (hash), the
414 information on each revision, including its nodeid (hash), the
415 nodeids of its parents, the position and offset of its data within
415 nodeids of its parents, the position and offset of its data within
416 the data file, and the revision it's based on. Finally, each entry
416 the data file, and the revision it's based on. Finally, each entry
417 contains a linkrev entry that can serve as a pointer to external
417 contains a linkrev entry that can serve as a pointer to external
418 data.
418 data.
419
419
420 The revision data itself is a linear collection of data chunks.
420 The revision data itself is a linear collection of data chunks.
421 Each chunk represents a revision and is usually represented as a
421 Each chunk represents a revision and is usually represented as a
422 delta against the previous chunk. To bound lookup time, runs of
422 delta against the previous chunk. To bound lookup time, runs of
423 deltas are limited to about 2 times the length of the original
423 deltas are limited to about 2 times the length of the original
424 version data. This makes retrieval of a version proportional to
424 version data. This makes retrieval of a version proportional to
425 its size, or O(1) relative to the number of revisions.
425 its size, or O(1) relative to the number of revisions.
426
426
427 Both pieces of the revlog are written to in an append-only
427 Both pieces of the revlog are written to in an append-only
428 fashion, which means we never need to rewrite a file to insert or
428 fashion, which means we never need to rewrite a file to insert or
429 remove data, and can use some simple techniques to avoid the need
429 remove data, and can use some simple techniques to avoid the need
430 for locking while reading.
430 for locking while reading.
431 """
431 """
432 def __init__(self, opener, indexfile, shallowroot=None):
432 def __init__(self, opener, indexfile, shallowroot=None):
433 """
433 """
434 create a revlog object
434 create a revlog object
435
435
436 opener is a function that abstracts the file opening operation
436 opener is a function that abstracts the file opening operation
437 and can be used to implement COW semantics or the like.
437 and can be used to implement COW semantics or the like.
438 """
438 """
439 self.indexfile = indexfile
439 self.indexfile = indexfile
440 self.datafile = indexfile[:-2] + ".d"
440 self.datafile = indexfile[:-2] + ".d"
441 self.opener = opener
441 self.opener = opener
442 self._cache = None
442 self._cache = None
443 self._chunkcache = (0, '')
443 self._chunkcache = (0, '')
444 self.nodemap = {nullid: nullrev}
444 self.nodemap = {nullid: nullrev}
445 self.index = []
445 self.index = []
446 self._shallowroot = shallowroot
446 self._shallowroot = shallowroot
447 self._parentdelta = 0
447 self._parentdelta = 0
448
448
449 v = REVLOG_DEFAULT_VERSION
449 v = REVLOG_DEFAULT_VERSION
450 if hasattr(opener, 'options') and 'defversion' in opener.options:
450 if hasattr(opener, 'options') and 'defversion' in opener.options:
451 v = opener.options['defversion']
451 v = opener.options['defversion']
452 if v & REVLOGNG:
452 if v & REVLOGNG:
453 v |= REVLOGNGINLINEDATA
453 v |= REVLOGNGINLINEDATA
454 if v & REVLOGNG and 'parentdelta' in opener.options:
454 if v & REVLOGNG and 'parentdelta' in opener.options:
455 self._parentdelta = 1
455 self._parentdelta = 1
456
456
457 if shallowroot:
457 if shallowroot:
458 v |= REVLOGSHALLOW
458 v |= REVLOGSHALLOW
459
459
460 i = ''
460 i = ''
461 try:
461 try:
462 f = self.opener(self.indexfile)
462 f = self.opener(self.indexfile)
463 if "nonlazy" in getattr(self.opener, 'options', {}):
463 if "nonlazy" in getattr(self.opener, 'options', {}):
464 i = f.read()
464 i = f.read()
465 else:
465 else:
466 i = f.read(_prereadsize)
466 i = f.read(_prereadsize)
467 if len(i) > 0:
467 if len(i) > 0:
468 v = struct.unpack(versionformat, i[:4])[0]
468 v = struct.unpack(versionformat, i[:4])[0]
469 except IOError, inst:
469 except IOError, inst:
470 if inst.errno != errno.ENOENT:
470 if inst.errno != errno.ENOENT:
471 raise
471 raise
472
472
473 self.version = v
473 self.version = v
474 self._inline = v & REVLOGNGINLINEDATA
474 self._inline = v & REVLOGNGINLINEDATA
475 self._shallow = v & REVLOGSHALLOW
475 self._shallow = v & REVLOGSHALLOW
476 flags = v & ~0xFFFF
476 flags = v & ~0xFFFF
477 fmt = v & 0xFFFF
477 fmt = v & 0xFFFF
478 if fmt == REVLOGV0 and flags:
478 if fmt == REVLOGV0 and flags:
479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
479 raise RevlogError(_("index %s unknown flags %#04x for format v0")
480 % (self.indexfile, flags >> 16))
480 % (self.indexfile, flags >> 16))
481 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
481 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
482 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
483 % (self.indexfile, flags >> 16))
483 % (self.indexfile, flags >> 16))
484 elif fmt > REVLOGNG:
484 elif fmt > REVLOGNG:
485 raise RevlogError(_("index %s unknown format %d")
485 raise RevlogError(_("index %s unknown format %d")
486 % (self.indexfile, fmt))
486 % (self.indexfile, fmt))
487
487
488 self._io = revlogio()
488 self._io = revlogio()
489 if self.version == REVLOGV0:
489 if self.version == REVLOGV0:
490 self._io = revlogoldio()
490 self._io = revlogoldio()
491 if i:
491 if i:
492 try:
492 try:
493 d = self._io.parseindex(f, i, self._inline)
493 d = self._io.parseindex(f, i, self._inline)
494 except (ValueError, IndexError):
494 except (ValueError, IndexError):
495 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
495 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
496 self.index, self.nodemap, self._chunkcache = d
496 self.index, self.nodemap, self._chunkcache = d
497 if not self._chunkcache:
497 if not self._chunkcache:
498 self._chunkclear()
498 self._chunkclear()
499
499
500 # add the magic null revision at -1 (if it hasn't been done already)
500 # add the magic null revision at -1 (if it hasn't been done already)
501 if (self.index == [] or isinstance(self.index, lazyindex) or
501 if (self.index == [] or isinstance(self.index, lazyindex) or
502 self.index[-1][7] != nullid) :
502 self.index[-1][7] != nullid) :
503 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
503 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
504
504
505 def _loadindex(self, start, end):
505 def _loadindex(self, start, end):
506 """load a block of indexes all at once from the lazy parser"""
506 """load a block of indexes all at once from the lazy parser"""
507 if isinstance(self.index, lazyindex):
507 if isinstance(self.index, lazyindex):
508 self.index.p.loadindex(start, end)
508 self.index.p.loadindex(start, end)
509
509
510 def _loadindexmap(self):
510 def _loadindexmap(self):
511 """loads both the map and the index from the lazy parser"""
511 """loads both the map and the index from the lazy parser"""
512 if isinstance(self.index, lazyindex):
512 if isinstance(self.index, lazyindex):
513 p = self.index.p
513 p = self.index.p
514 p.loadindex()
514 p.loadindex()
515 self.nodemap = p.map
515 self.nodemap = p.map
516
516
517 def _loadmap(self):
517 def _loadmap(self):
518 """loads the map from the lazy parser"""
518 """loads the map from the lazy parser"""
519 if isinstance(self.nodemap, lazymap):
519 if isinstance(self.nodemap, lazymap):
520 self.nodemap.p.loadmap()
520 self.nodemap.p.loadmap()
521 self.nodemap = self.nodemap.p.map
521 self.nodemap = self.nodemap.p.map
522
522
523 def tip(self):
523 def tip(self):
524 return self.node(len(self.index) - 2)
524 return self.node(len(self.index) - 2)
525 def __len__(self):
525 def __len__(self):
526 return len(self.index) - 1
526 return len(self.index) - 1
527 def __iter__(self):
527 def __iter__(self):
528 for i in xrange(len(self)):
528 for i in xrange(len(self)):
529 yield i
529 yield i
530 def rev(self, node):
530 def rev(self, node):
531 try:
531 try:
532 return self.nodemap[node]
532 return self.nodemap[node]
533 except KeyError:
533 except KeyError:
534 raise LookupError(node, self.indexfile, _('no node'))
534 raise LookupError(node, self.indexfile, _('no node'))
535 def node(self, rev):
535 def node(self, rev):
536 return self.index[rev][7]
536 return self.index[rev][7]
537 def linkrev(self, rev):
537 def linkrev(self, rev):
538 return self.index[rev][4]
538 return self.index[rev][4]
539 def parents(self, node):
539 def parents(self, node):
540 i = self.index
540 i = self.index
541 d = i[self.rev(node)]
541 d = i[self.rev(node)]
542 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
542 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
543 def parentrevs(self, rev):
543 def parentrevs(self, rev):
544 return self.index[rev][5:7]
544 return self.index[rev][5:7]
545 def start(self, rev):
545 def start(self, rev):
546 return int(self.index[rev][0] >> 16)
546 return int(self.index[rev][0] >> 16)
547 def end(self, rev):
547 def end(self, rev):
548 return self.start(rev) + self.length(rev)
548 return self.start(rev) + self.length(rev)
549 def length(self, rev):
549 def length(self, rev):
550 return self.index[rev][1]
550 return self.index[rev][1]
551 def base(self, rev):
551 def base(self, rev):
552 return self.index[rev][3]
552 return self.index[rev][3]
553 def flags(self, rev):
553 def flags(self, rev):
554 return self.index[rev][0] & 0xFFFF
554 return self.index[rev][0] & 0xFFFF
555
555
556 def size(self, rev):
556 def size(self, rev):
557 """return the length of the uncompressed text for a given revision"""
557 """return the length of the uncompressed text for a given revision"""
558 l = self.index[rev][2]
558 l = self.index[rev][2]
559 if l >= 0:
559 if l >= 0:
560 return l
560 return l
561
561
562 t = self.revision(self.node(rev))
562 t = self.revision(self.node(rev))
563 return len(t)
563 return len(t)
564
564
565 def reachable(self, node, stop=None):
565 def reachable(self, node, stop=None):
566 """return the set of all nodes ancestral to a given node, including
566 """return the set of all nodes ancestral to a given node, including
567 the node itself, stopping when stop is matched"""
567 the node itself, stopping when stop is matched"""
568 reachable = set((node,))
568 reachable = set((node,))
569 visit = [node]
569 visit = [node]
570 if stop:
570 if stop:
571 stopn = self.rev(stop)
571 stopn = self.rev(stop)
572 else:
572 else:
573 stopn = 0
573 stopn = 0
574 while visit:
574 while visit:
575 n = visit.pop(0)
575 n = visit.pop(0)
576 if n == stop:
576 if n == stop:
577 continue
577 continue
578 if n == nullid:
578 if n == nullid:
579 continue
579 continue
580 for p in self.parents(n):
580 for p in self.parents(n):
581 if self.rev(p) < stopn:
581 if self.rev(p) < stopn:
582 continue
582 continue
583 if p not in reachable:
583 if p not in reachable:
584 reachable.add(p)
584 reachable.add(p)
585 visit.append(p)
585 visit.append(p)
586 return reachable
586 return reachable
587
587
588 def ancestors(self, *revs):
588 def ancestors(self, *revs):
589 """Generate the ancestors of 'revs' in reverse topological order.
589 """Generate the ancestors of 'revs' in reverse topological order.
590
590
591 Yield a sequence of revision numbers starting with the parents
591 Yield a sequence of revision numbers starting with the parents
592 of each revision in revs, i.e., each revision is *not* considered
592 of each revision in revs, i.e., each revision is *not* considered
593 an ancestor of itself. Results are in breadth-first order:
593 an ancestor of itself. Results are in breadth-first order:
594 parents of each rev in revs, then parents of those, etc. Result
594 parents of each rev in revs, then parents of those, etc. Result
595 does not include the null revision."""
595 does not include the null revision."""
596 visit = list(revs)
596 visit = list(revs)
597 seen = set([nullrev])
597 seen = set([nullrev])
598 while visit:
598 while visit:
599 for parent in self.parentrevs(visit.pop(0)):
599 for parent in self.parentrevs(visit.pop(0)):
600 if parent not in seen:
600 if parent not in seen:
601 visit.append(parent)
601 visit.append(parent)
602 seen.add(parent)
602 seen.add(parent)
603 yield parent
603 yield parent
604
604
605 def descendants(self, *revs):
605 def descendants(self, *revs):
606 """Generate the descendants of 'revs' in revision order.
606 """Generate the descendants of 'revs' in revision order.
607
607
608 Yield a sequence of revision numbers starting with a child of
608 Yield a sequence of revision numbers starting with a child of
609 some rev in revs, i.e., each revision is *not* considered a
609 some rev in revs, i.e., each revision is *not* considered a
610 descendant of itself. Results are ordered by revision number (a
610 descendant of itself. Results are ordered by revision number (a
611 topological sort)."""
611 topological sort)."""
612 seen = set(revs)
612 seen = set(revs)
613 for i in xrange(min(revs) + 1, len(self)):
613 for i in xrange(min(revs) + 1, len(self)):
614 for x in self.parentrevs(i):
614 for x in self.parentrevs(i):
615 if x != nullrev and x in seen:
615 if x != nullrev and x in seen:
616 seen.add(i)
616 seen.add(i)
617 yield i
617 yield i
618 break
618 break
619
619
620 def findmissing(self, common=None, heads=None):
620 def findmissing(self, common=None, heads=None):
621 """Return the ancestors of heads that are not ancestors of common.
621 """Return the ancestors of heads that are not ancestors of common.
622
622
623 More specifically, return a list of nodes N such that every N
623 More specifically, return a list of nodes N such that every N
624 satisfies the following constraints:
624 satisfies the following constraints:
625
625
626 1. N is an ancestor of some node in 'heads'
626 1. N is an ancestor of some node in 'heads'
627 2. N is not an ancestor of any node in 'common'
627 2. N is not an ancestor of any node in 'common'
628
628
629 The list is sorted by revision number, meaning it is
629 The list is sorted by revision number, meaning it is
630 topologically sorted.
630 topologically sorted.
631
631
632 'heads' and 'common' are both lists of node IDs. If heads is
632 'heads' and 'common' are both lists of node IDs. If heads is
633 not supplied, uses all of the revlog's heads. If common is not
633 not supplied, uses all of the revlog's heads. If common is not
634 supplied, uses nullid."""
634 supplied, uses nullid."""
635 if common is None:
635 if common is None:
636 common = [nullid]
636 common = [nullid]
637 if heads is None:
637 if heads is None:
638 heads = self.heads()
638 heads = self.heads()
639
639
640 common = [self.rev(n) for n in common]
640 common = [self.rev(n) for n in common]
641 heads = [self.rev(n) for n in heads]
641 heads = [self.rev(n) for n in heads]
642
642
643 # we want the ancestors, but inclusive
643 # we want the ancestors, but inclusive
644 has = set(self.ancestors(*common))
644 has = set(self.ancestors(*common))
645 has.add(nullrev)
645 has.add(nullrev)
646 has.update(common)
646 has.update(common)
647
647
648 # take all ancestors from heads that aren't in has
648 # take all ancestors from heads that aren't in has
649 missing = set()
649 missing = set()
650 visit = [r for r in heads if r not in has]
650 visit = [r for r in heads if r not in has]
651 while visit:
651 while visit:
652 r = visit.pop(0)
652 r = visit.pop(0)
653 if r in missing:
653 if r in missing:
654 continue
654 continue
655 else:
655 else:
656 missing.add(r)
656 missing.add(r)
657 for p in self.parentrevs(r):
657 for p in self.parentrevs(r):
658 if p not in has:
658 if p not in has:
659 visit.append(p)
659 visit.append(p)
660 missing = list(missing)
660 missing = list(missing)
661 missing.sort()
661 missing.sort()
662 return [self.node(r) for r in missing]
662 return [self.node(r) for r in missing]
663
663
664 def nodesbetween(self, roots=None, heads=None):
664 def nodesbetween(self, roots=None, heads=None):
665 """Return a topological path from 'roots' to 'heads'.
665 """Return a topological path from 'roots' to 'heads'.
666
666
667 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
667 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
668 topologically sorted list of all nodes N that satisfy both of
668 topologically sorted list of all nodes N that satisfy both of
669 these constraints:
669 these constraints:
670
670
671 1. N is a descendant of some node in 'roots'
671 1. N is a descendant of some node in 'roots'
672 2. N is an ancestor of some node in 'heads'
672 2. N is an ancestor of some node in 'heads'
673
673
674 Every node is considered to be both a descendant and an ancestor
674 Every node is considered to be both a descendant and an ancestor
675 of itself, so every reachable node in 'roots' and 'heads' will be
675 of itself, so every reachable node in 'roots' and 'heads' will be
676 included in 'nodes'.
676 included in 'nodes'.
677
677
678 'outroots' is the list of reachable nodes in 'roots', i.e., the
678 'outroots' is the list of reachable nodes in 'roots', i.e., the
679 subset of 'roots' that is returned in 'nodes'. Likewise,
679 subset of 'roots' that is returned in 'nodes'. Likewise,
680 'outheads' is the subset of 'heads' that is also in 'nodes'.
680 'outheads' is the subset of 'heads' that is also in 'nodes'.
681
681
682 'roots' and 'heads' are both lists of node IDs. If 'roots' is
682 'roots' and 'heads' are both lists of node IDs. If 'roots' is
683 unspecified, uses nullid as the only root. If 'heads' is
683 unspecified, uses nullid as the only root. If 'heads' is
684 unspecified, uses list of all of the revlog's heads."""
684 unspecified, uses list of all of the revlog's heads."""
685 nonodes = ([], [], [])
685 nonodes = ([], [], [])
686 if roots is not None:
686 if roots is not None:
687 roots = list(roots)
687 roots = list(roots)
688 if not roots:
688 if not roots:
689 return nonodes
689 return nonodes
690 lowestrev = min([self.rev(n) for n in roots])
690 lowestrev = min([self.rev(n) for n in roots])
691 else:
691 else:
692 roots = [nullid] # Everybody's a descendent of nullid
692 roots = [nullid] # Everybody's a descendent of nullid
693 lowestrev = nullrev
693 lowestrev = nullrev
694 if (lowestrev == nullrev) and (heads is None):
694 if (lowestrev == nullrev) and (heads is None):
695 # We want _all_ the nodes!
695 # We want _all_ the nodes!
696 return ([self.node(r) for r in self], [nullid], list(self.heads()))
696 return ([self.node(r) for r in self], [nullid], list(self.heads()))
697 if heads is None:
697 if heads is None:
698 # All nodes are ancestors, so the latest ancestor is the last
698 # All nodes are ancestors, so the latest ancestor is the last
699 # node.
699 # node.
700 highestrev = len(self) - 1
700 highestrev = len(self) - 1
701 # Set ancestors to None to signal that every node is an ancestor.
701 # Set ancestors to None to signal that every node is an ancestor.
702 ancestors = None
702 ancestors = None
703 # Set heads to an empty dictionary for later discovery of heads
703 # Set heads to an empty dictionary for later discovery of heads
704 heads = {}
704 heads = {}
705 else:
705 else:
706 heads = list(heads)
706 heads = list(heads)
707 if not heads:
707 if not heads:
708 return nonodes
708 return nonodes
709 ancestors = set()
709 ancestors = set()
710 # Turn heads into a dictionary so we can remove 'fake' heads.
710 # Turn heads into a dictionary so we can remove 'fake' heads.
711 # Also, later we will be using it to filter out the heads we can't
711 # Also, later we will be using it to filter out the heads we can't
712 # find from roots.
712 # find from roots.
713 heads = dict.fromkeys(heads, 0)
713 heads = dict.fromkeys(heads, 0)
714 # Start at the top and keep marking parents until we're done.
714 # Start at the top and keep marking parents until we're done.
715 nodestotag = set(heads)
715 nodestotag = set(heads)
716 # Remember where the top was so we can use it as a limit later.
716 # Remember where the top was so we can use it as a limit later.
717 highestrev = max([self.rev(n) for n in nodestotag])
717 highestrev = max([self.rev(n) for n in nodestotag])
718 while nodestotag:
718 while nodestotag:
719 # grab a node to tag
719 # grab a node to tag
720 n = nodestotag.pop()
720 n = nodestotag.pop()
721 # Never tag nullid
721 # Never tag nullid
722 if n == nullid:
722 if n == nullid:
723 continue
723 continue
724 # A node's revision number represents its place in a
724 # A node's revision number represents its place in a
725 # topologically sorted list of nodes.
725 # topologically sorted list of nodes.
726 r = self.rev(n)
726 r = self.rev(n)
727 if r >= lowestrev:
727 if r >= lowestrev:
728 if n not in ancestors:
728 if n not in ancestors:
729 # If we are possibly a descendent of one of the roots
729 # If we are possibly a descendent of one of the roots
730 # and we haven't already been marked as an ancestor
730 # and we haven't already been marked as an ancestor
731 ancestors.add(n) # Mark as ancestor
731 ancestors.add(n) # Mark as ancestor
732 # Add non-nullid parents to list of nodes to tag.
732 # Add non-nullid parents to list of nodes to tag.
733 nodestotag.update([p for p in self.parents(n) if
733 nodestotag.update([p for p in self.parents(n) if
734 p != nullid])
734 p != nullid])
735 elif n in heads: # We've seen it before, is it a fake head?
735 elif n in heads: # We've seen it before, is it a fake head?
736 # So it is, real heads should not be the ancestors of
736 # So it is, real heads should not be the ancestors of
737 # any other heads.
737 # any other heads.
738 heads.pop(n)
738 heads.pop(n)
739 if not ancestors:
739 if not ancestors:
740 return nonodes
740 return nonodes
741 # Now that we have our set of ancestors, we want to remove any
741 # Now that we have our set of ancestors, we want to remove any
742 # roots that are not ancestors.
742 # roots that are not ancestors.
743
743
744 # If one of the roots was nullid, everything is included anyway.
744 # If one of the roots was nullid, everything is included anyway.
745 if lowestrev > nullrev:
745 if lowestrev > nullrev:
746 # But, since we weren't, let's recompute the lowest rev to not
746 # But, since we weren't, let's recompute the lowest rev to not
747 # include roots that aren't ancestors.
747 # include roots that aren't ancestors.
748
748
749 # Filter out roots that aren't ancestors of heads
749 # Filter out roots that aren't ancestors of heads
750 roots = [n for n in roots if n in ancestors]
750 roots = [n for n in roots if n in ancestors]
751 # Recompute the lowest revision
751 # Recompute the lowest revision
752 if roots:
752 if roots:
753 lowestrev = min([self.rev(n) for n in roots])
753 lowestrev = min([self.rev(n) for n in roots])
754 else:
754 else:
755 # No more roots? Return empty list
755 # No more roots? Return empty list
756 return nonodes
756 return nonodes
757 else:
757 else:
758 # We are descending from nullid, and don't need to care about
758 # We are descending from nullid, and don't need to care about
759 # any other roots.
759 # any other roots.
760 lowestrev = nullrev
760 lowestrev = nullrev
761 roots = [nullid]
761 roots = [nullid]
762 # Transform our roots list into a set.
762 # Transform our roots list into a set.
763 descendents = set(roots)
763 descendents = set(roots)
764 # Also, keep the original roots so we can filter out roots that aren't
764 # Also, keep the original roots so we can filter out roots that aren't
765 # 'real' roots (i.e. are descended from other roots).
765 # 'real' roots (i.e. are descended from other roots).
766 roots = descendents.copy()
766 roots = descendents.copy()
767 # Our topologically sorted list of output nodes.
767 # Our topologically sorted list of output nodes.
768 orderedout = []
768 orderedout = []
769 # Don't start at nullid since we don't want nullid in our output list,
769 # Don't start at nullid since we don't want nullid in our output list,
770 # and if nullid shows up in descedents, empty parents will look like
770 # and if nullid shows up in descedents, empty parents will look like
771 # they're descendents.
771 # they're descendents.
772 for r in xrange(max(lowestrev, 0), highestrev + 1):
772 for r in xrange(max(lowestrev, 0), highestrev + 1):
773 n = self.node(r)
773 n = self.node(r)
774 isdescendent = False
774 isdescendent = False
775 if lowestrev == nullrev: # Everybody is a descendent of nullid
775 if lowestrev == nullrev: # Everybody is a descendent of nullid
776 isdescendent = True
776 isdescendent = True
777 elif n in descendents:
777 elif n in descendents:
778 # n is already a descendent
778 # n is already a descendent
779 isdescendent = True
779 isdescendent = True
780 # This check only needs to be done here because all the roots
780 # This check only needs to be done here because all the roots
781 # will start being marked is descendents before the loop.
781 # will start being marked is descendents before the loop.
782 if n in roots:
782 if n in roots:
783 # If n was a root, check if it's a 'real' root.
783 # If n was a root, check if it's a 'real' root.
784 p = tuple(self.parents(n))
784 p = tuple(self.parents(n))
785 # If any of its parents are descendents, it's not a root.
785 # If any of its parents are descendents, it's not a root.
786 if (p[0] in descendents) or (p[1] in descendents):
786 if (p[0] in descendents) or (p[1] in descendents):
787 roots.remove(n)
787 roots.remove(n)
788 else:
788 else:
789 p = tuple(self.parents(n))
789 p = tuple(self.parents(n))
790 # A node is a descendent if either of its parents are
790 # A node is a descendent if either of its parents are
791 # descendents. (We seeded the dependents list with the roots
791 # descendents. (We seeded the dependents list with the roots
792 # up there, remember?)
792 # up there, remember?)
793 if (p[0] in descendents) or (p[1] in descendents):
793 if (p[0] in descendents) or (p[1] in descendents):
794 descendents.add(n)
794 descendents.add(n)
795 isdescendent = True
795 isdescendent = True
796 if isdescendent and ((ancestors is None) or (n in ancestors)):
796 if isdescendent and ((ancestors is None) or (n in ancestors)):
797 # Only include nodes that are both descendents and ancestors.
797 # Only include nodes that are both descendents and ancestors.
798 orderedout.append(n)
798 orderedout.append(n)
799 if (ancestors is not None) and (n in heads):
799 if (ancestors is not None) and (n in heads):
800 # We're trying to figure out which heads are reachable
800 # We're trying to figure out which heads are reachable
801 # from roots.
801 # from roots.
802 # Mark this head as having been reached
802 # Mark this head as having been reached
803 heads[n] = 1
803 heads[n] = 1
804 elif ancestors is None:
804 elif ancestors is None:
805 # Otherwise, we're trying to discover the heads.
805 # Otherwise, we're trying to discover the heads.
806 # Assume this is a head because if it isn't, the next step
806 # Assume this is a head because if it isn't, the next step
807 # will eventually remove it.
807 # will eventually remove it.
808 heads[n] = 1
808 heads[n] = 1
809 # But, obviously its parents aren't.
809 # But, obviously its parents aren't.
810 for p in self.parents(n):
810 for p in self.parents(n):
811 heads.pop(p, None)
811 heads.pop(p, None)
812 heads = [n for n in heads.iterkeys() if heads[n] != 0]
812 heads = [n for n in heads.iterkeys() if heads[n] != 0]
813 roots = list(roots)
813 roots = list(roots)
814 assert orderedout
814 assert orderedout
815 assert roots
815 assert roots
816 assert heads
816 assert heads
817 return (orderedout, roots, heads)
817 return (orderedout, roots, heads)
818
818
819 def heads(self, start=None, stop=None):
819 def heads(self, start=None, stop=None):
820 """return the list of all nodes that have no children
820 """return the list of all nodes that have no children
821
821
822 if start is specified, only heads that are descendants of
822 if start is specified, only heads that are descendants of
823 start will be returned
823 start will be returned
824 if stop is specified, it will consider all the revs from stop
824 if stop is specified, it will consider all the revs from stop
825 as if they had no children
825 as if they had no children
826 """
826 """
827 if start is None and stop is None:
827 if start is None and stop is None:
828 count = len(self)
828 count = len(self)
829 if not count:
829 if not count:
830 return [nullid]
830 return [nullid]
831 ishead = [1] * (count + 1)
831 ishead = [1] * (count + 1)
832 index = self.index
832 index = self.index
833 for r in xrange(count):
833 for r in xrange(count):
834 e = index[r]
834 e = index[r]
835 ishead[e[5]] = ishead[e[6]] = 0
835 ishead[e[5]] = ishead[e[6]] = 0
836 return [self.node(r) for r in xrange(count) if ishead[r]]
836 return [self.node(r) for r in xrange(count) if ishead[r]]
837
837
838 if start is None:
838 if start is None:
839 start = nullid
839 start = nullid
840 if stop is None:
840 if stop is None:
841 stop = []
841 stop = []
842 stoprevs = set([self.rev(n) for n in stop])
842 stoprevs = set([self.rev(n) for n in stop])
843 startrev = self.rev(start)
843 startrev = self.rev(start)
844 reachable = set((startrev,))
844 reachable = set((startrev,))
845 heads = set((startrev,))
845 heads = set((startrev,))
846
846
847 parentrevs = self.parentrevs
847 parentrevs = self.parentrevs
848 for r in xrange(startrev + 1, len(self)):
848 for r in xrange(startrev + 1, len(self)):
849 for p in parentrevs(r):
849 for p in parentrevs(r):
850 if p in reachable:
850 if p in reachable:
851 if r not in stoprevs:
851 if r not in stoprevs:
852 reachable.add(r)
852 reachable.add(r)
853 heads.add(r)
853 heads.add(r)
854 if p in heads and p not in stoprevs:
854 if p in heads and p not in stoprevs:
855 heads.remove(p)
855 heads.remove(p)
856
856
857 return [self.node(r) for r in heads]
857 return [self.node(r) for r in heads]
858
858
859 def children(self, node):
859 def children(self, node):
860 """find the children of a given node"""
860 """find the children of a given node"""
861 c = []
861 c = []
862 p = self.rev(node)
862 p = self.rev(node)
863 for r in range(p + 1, len(self)):
863 for r in range(p + 1, len(self)):
864 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
864 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
865 if prevs:
865 if prevs:
866 for pr in prevs:
866 for pr in prevs:
867 if pr == p:
867 if pr == p:
868 c.append(self.node(r))
868 c.append(self.node(r))
869 elif p == nullrev:
869 elif p == nullrev:
870 c.append(self.node(r))
870 c.append(self.node(r))
871 return c
871 return c
872
872
873 def descendant(self, start, end):
873 def descendant(self, start, end):
874 for i in self.descendants(start):
874 for i in self.descendants(start):
875 if i == end:
875 if i == end:
876 return True
876 return True
877 elif i > end:
877 elif i > end:
878 break
878 break
879 return False
879 return False
880
880
881 def ancestor(self, a, b):
881 def ancestor(self, a, b):
882 """calculate the least common ancestor of nodes a and b"""
882 """calculate the least common ancestor of nodes a and b"""
883
883
884 # fast path, check if it is a descendant
884 # fast path, check if it is a descendant
885 a, b = self.rev(a), self.rev(b)
885 a, b = self.rev(a), self.rev(b)
886 start, end = sorted((a, b))
886 start, end = sorted((a, b))
887 if self.descendant(start, end):
887 if self.descendant(start, end):
888 return self.node(start)
888 return self.node(start)
889
889
890 def parents(rev):
890 def parents(rev):
891 return [p for p in self.parentrevs(rev) if p != nullrev]
891 return [p for p in self.parentrevs(rev) if p != nullrev]
892
892
893 c = ancestor.ancestor(a, b, parents)
893 c = ancestor.ancestor(a, b, parents)
894 if c is None:
894 if c is None:
895 return nullid
895 return nullid
896
896
897 return self.node(c)
897 return self.node(c)
898
898
899 def _match(self, id):
899 def _match(self, id):
900 if isinstance(id, (long, int)):
900 if isinstance(id, (long, int)):
901 # rev
901 # rev
902 return self.node(id)
902 return self.node(id)
903 if len(id) == 20:
903 if len(id) == 20:
904 # possibly a binary node
904 # possibly a binary node
905 # odds of a binary node being all hex in ASCII are 1 in 10**25
905 # odds of a binary node being all hex in ASCII are 1 in 10**25
906 try:
906 try:
907 node = id
907 node = id
908 self.rev(node) # quick search the index
908 self.rev(node) # quick search the index
909 return node
909 return node
910 except LookupError:
910 except LookupError:
911 pass # may be partial hex id
911 pass # may be partial hex id
912 try:
912 try:
913 # str(rev)
913 # str(rev)
914 rev = int(id)
914 rev = int(id)
915 if str(rev) != id:
915 if str(rev) != id:
916 raise ValueError
916 raise ValueError
917 if rev < 0:
917 if rev < 0:
918 rev = len(self) + rev
918 rev = len(self) + rev
919 if rev < 0 or rev >= len(self):
919 if rev < 0 or rev >= len(self):
920 raise ValueError
920 raise ValueError
921 return self.node(rev)
921 return self.node(rev)
922 except (ValueError, OverflowError):
922 except (ValueError, OverflowError):
923 pass
923 pass
924 if len(id) == 40:
924 if len(id) == 40:
925 try:
925 try:
926 # a full hex nodeid?
926 # a full hex nodeid?
927 node = bin(id)
927 node = bin(id)
928 self.rev(node)
928 self.rev(node)
929 return node
929 return node
930 except (TypeError, LookupError):
930 except (TypeError, LookupError):
931 pass
931 pass
932
932
933 def _partialmatch(self, id):
933 def _partialmatch(self, id):
934 if len(id) < 40:
934 if len(id) < 40:
935 try:
935 try:
936 # hex(node)[:...]
936 # hex(node)[:...]
937 l = len(id) // 2 # grab an even number of digits
937 l = len(id) // 2 # grab an even number of digits
938 bin_id = bin(id[:l * 2])
938 bin_id = bin(id[:l * 2])
939 nl = [n for n in self.nodemap if n[:l] == bin_id]
939 nl = [n for n in self.nodemap if n[:l] == bin_id]
940 nl = [n for n in nl if hex(n).startswith(id)]
940 nl = [n for n in nl if hex(n).startswith(id)]
941 if len(nl) > 0:
941 if len(nl) > 0:
942 if len(nl) == 1:
942 if len(nl) == 1:
943 return nl[0]
943 return nl[0]
944 raise LookupError(id, self.indexfile,
944 raise LookupError(id, self.indexfile,
945 _('ambiguous identifier'))
945 _('ambiguous identifier'))
946 return None
946 return None
947 except TypeError:
947 except TypeError:
948 pass
948 pass
949
949
950 def lookup(self, id):
950 def lookup(self, id):
951 """locate a node based on:
951 """locate a node based on:
952 - revision number or str(revision number)
952 - revision number or str(revision number)
953 - nodeid or subset of hex nodeid
953 - nodeid or subset of hex nodeid
954 """
954 """
955 n = self._match(id)
955 n = self._match(id)
956 if n is not None:
956 if n is not None:
957 return n
957 return n
958 n = self._partialmatch(id)
958 n = self._partialmatch(id)
959 if n:
959 if n:
960 return n
960 return n
961
961
962 raise LookupError(id, self.indexfile, _('no match found'))
962 raise LookupError(id, self.indexfile, _('no match found'))
963
963
964 def cmp(self, node, text):
964 def cmp(self, node, text):
965 """compare text with a given file revision
965 """compare text with a given file revision
966
966
967 returns True if text is different than what is stored.
967 returns True if text is different than what is stored.
968 """
968 """
969 p1, p2 = self.parents(node)
969 p1, p2 = self.parents(node)
970 return hash(text, p1, p2) != node
970 return hash(text, p1, p2) != node
971
971
972 def _addchunk(self, offset, data):
972 def _addchunk(self, offset, data):
973 o, d = self._chunkcache
973 o, d = self._chunkcache
974 # try to add to existing cache
974 # try to add to existing cache
975 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
975 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
976 self._chunkcache = o, d + data
976 self._chunkcache = o, d + data
977 else:
977 else:
978 self._chunkcache = offset, data
978 self._chunkcache = offset, data
979
979
980 def _loadchunk(self, offset, length):
980 def _loadchunk(self, offset, length):
981 if self._inline:
981 if self._inline:
982 df = self.opener(self.indexfile)
982 df = self.opener(self.indexfile)
983 else:
983 else:
984 df = self.opener(self.datafile)
984 df = self.opener(self.datafile)
985
985
986 readahead = max(65536, length)
986 readahead = max(65536, length)
987 df.seek(offset)
987 df.seek(offset)
988 d = df.read(readahead)
988 d = df.read(readahead)
989 self._addchunk(offset, d)
989 self._addchunk(offset, d)
990 if readahead > length:
990 if readahead > length:
991 return d[:length]
991 return d[:length]
992 return d
992 return d
993
993
994 def _getchunk(self, offset, length):
994 def _getchunk(self, offset, length):
995 o, d = self._chunkcache
995 o, d = self._chunkcache
996 l = len(d)
996 l = len(d)
997
997
998 # is it in the cache?
998 # is it in the cache?
999 cachestart = offset - o
999 cachestart = offset - o
1000 cacheend = cachestart + length
1000 cacheend = cachestart + length
1001 if cachestart >= 0 and cacheend <= l:
1001 if cachestart >= 0 and cacheend <= l:
1002 if cachestart == 0 and cacheend == l:
1002 if cachestart == 0 and cacheend == l:
1003 return d # avoid a copy
1003 return d # avoid a copy
1004 return d[cachestart:cacheend]
1004 return d[cachestart:cacheend]
1005
1005
1006 return self._loadchunk(offset, length)
1006 return self._loadchunk(offset, length)
1007
1007
1008 def _chunkraw(self, startrev, endrev):
1008 def _chunkraw(self, startrev, endrev):
1009 start = self.start(startrev)
1009 start = self.start(startrev)
1010 length = self.end(endrev) - start
1010 length = self.end(endrev) - start
1011 if self._inline:
1011 if self._inline:
1012 start += (startrev + 1) * self._io.size
1012 start += (startrev + 1) * self._io.size
1013 return self._getchunk(start, length)
1013 return self._getchunk(start, length)
1014
1014
1015 def _chunk(self, rev):
1015 def _chunk(self, rev):
1016 return decompress(self._chunkraw(rev, rev))
1016 return decompress(self._chunkraw(rev, rev))
1017
1017
1018 def _chunkclear(self):
1018 def _chunkclear(self):
1019 self._chunkcache = (0, '')
1019 self._chunkcache = (0, '')
1020
1020
1021 def deltaparent(self, rev):
1021 def deltaparent(self, rev):
1022 """return previous revision or parentrev according to flags"""
1022 """return previous revision or parentrev according to flags"""
1023 if self.base(rev) == rev:
1023 if self.base(rev) == rev:
1024 return nullrev
1024 return nullrev
1025 elif self.flags(rev) & REVIDX_PARENTDELTA:
1025 elif self.flags(rev) & REVIDX_PARENTDELTA:
1026 return self.parentrevs(rev)[0]
1026 return self.parentrevs(rev)[0]
1027 else:
1027 else:
1028 return rev - 1
1028 return rev - 1
1029
1029
1030
1030
1031 def deltachain(self, rev, cache):
1031 def deltachain(self, rev, cache):
1032 """return chain of revisions to construct a given revision"""
1032 """return chain of revisions to construct a given revision"""
1033 chain = []
1033 chain = []
1034 check = False
1034 check = False
1035 index = self.index
1035 index = self.index
1036 e = index[rev]
1036 e = index[rev]
1037 while rev != e[3] and rev != cache:
1037 while rev != e[3] and rev != cache:
1038 chain.append(rev)
1038 chain.append(rev)
1039 if e[0] & REVIDX_PARENTDELTA:
1039 if e[0] & REVIDX_PARENTDELTA:
1040 rev = e[5]
1040 rev = e[5]
1041 else:
1041 else:
1042 rev -= 1
1042 rev -= 1
1043 e = index[rev]
1043 e = index[rev]
1044 chain.reverse()
1044 chain.reverse()
1045 if rev == cache:
1045 if rev == cache:
1046 check = True
1046 check = True
1047 return check, rev, chain
1047 return check, rev, chain
1048
1048
1049 def revdiff(self, rev1, rev2):
1049 def revdiff(self, rev1, rev2):
1050 """return or calculate a delta between two revisions"""
1050 """return or calculate a delta between two revisions"""
1051 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1051 if rev1 != nullrev and self.deltaparent(rev2) == rev1:
1052 return self._chunk(rev2)
1052 return self._chunk(rev2)
1053
1053
1054 return mdiff.textdiff(self.revision(self.node(rev1)),
1054 return mdiff.textdiff(self.revision(self.node(rev1)),
1055 self.revision(self.node(rev2)))
1055 self.revision(self.node(rev2)))
1056
1056
1057 def revision(self, node):
1057 def revision(self, node):
1058 """return an uncompressed revision of a given node"""
1058 """return an uncompressed revision of a given node"""
1059 cache = nullrev
1059 cachedrev = nullrev
1060 if node == nullid:
1060 if node == nullid:
1061 return ""
1061 return ""
1062 if self._cache:
1062 if self._cache:
1063 cache = self._cache[1]
1064 if self._cache[0] == node:
1063 if self._cache[0] == node:
1065 return self._cache[2]
1064 return self._cache[2]
1065 cachedrev = self._cache[1]
1066
1066
1067 # look up what we need to read
1067 # look up what we need to read
1068 text = None
1068 text = None
1069 rev = self.rev(node)
1069 rev = self.rev(node)
1070 cache, base, chain = self.deltachain(rev, cache)
1070 base = self.base(rev)
1071
1071
1072 # check rev flags
1072 # check rev flags
1073 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1073 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1074 raise RevlogError(_('incompatible revision flag %x') %
1074 raise RevlogError(_('incompatible revision flag %x') %
1075 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1075 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1076
1076
1077 # build delta chain
1078 self._loadindex(base, rev + 1)
1079 cachehit, base, chain = self.deltachain(rev, cachedrev)
1080
1077 # do we have useful data cached?
1081 # do we have useful data cached?
1078 if cache and self._cache:
1082 if cachehit and self._cache:
1079 global _cached
1083 global _cached
1080 _cached += 1
1084 _cached += 1
1081 text = self._cache[2]
1085 text = self._cache[2]
1082 else:
1086 else:
1083 global _uncached
1087 global _uncached
1084 _uncached += 1
1088 _uncached += 1
1085
1089
1086
1090
1087 # drop cache to save memory
1091 # drop cache to save memory
1088 self._cache = None
1092 self._cache = None
1089
1093
1090 self._loadindex(base, rev + 1)
1091 self._chunkraw(base, rev)
1094 self._chunkraw(base, rev)
1092 if text is None:
1095 if text is None:
1093 text = self._chunk(base)
1096 text = self._chunk(base)
1094
1097
1095 bins = [self._chunk(r) for r in chain]
1098 bins = [self._chunk(r) for r in chain]
1096 text = mdiff.patches(text, bins)
1099 text = mdiff.patches(text, bins)
1097 p1, p2 = self.parents(node)
1100 p1, p2 = self.parents(node)
1098 if (node != hash(text, p1, p2) and
1101 if (node != hash(text, p1, p2) and
1099 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1102 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1100 raise RevlogError(_("integrity check failed on %s:%d")
1103 raise RevlogError(_("integrity check failed on %s:%d")
1101 % (self.indexfile, rev))
1104 % (self.indexfile, rev))
1102
1105
1103 self._cache = (node, rev, text)
1106 self._cache = (node, rev, text)
1104 return text
1107 return text
1105
1108
1106 def checkinlinesize(self, tr, fp=None):
1109 def checkinlinesize(self, tr, fp=None):
1107 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1110 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1108 return
1111 return
1109
1112
1110 trinfo = tr.find(self.indexfile)
1113 trinfo = tr.find(self.indexfile)
1111 if trinfo is None:
1114 if trinfo is None:
1112 raise RevlogError(_("%s not found in the transaction")
1115 raise RevlogError(_("%s not found in the transaction")
1113 % self.indexfile)
1116 % self.indexfile)
1114
1117
1115 trindex = trinfo[2]
1118 trindex = trinfo[2]
1116 dataoff = self.start(trindex)
1119 dataoff = self.start(trindex)
1117
1120
1118 tr.add(self.datafile, dataoff)
1121 tr.add(self.datafile, dataoff)
1119
1122
1120 if fp:
1123 if fp:
1121 fp.flush()
1124 fp.flush()
1122 fp.close()
1125 fp.close()
1123
1126
1124 df = self.opener(self.datafile, 'w')
1127 df = self.opener(self.datafile, 'w')
1125 try:
1128 try:
1126 for r in self:
1129 for r in self:
1127 df.write(self._chunkraw(r, r))
1130 df.write(self._chunkraw(r, r))
1128 finally:
1131 finally:
1129 df.close()
1132 df.close()
1130
1133
1131 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1134 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1132 self.version &= ~(REVLOGNGINLINEDATA)
1135 self.version &= ~(REVLOGNGINLINEDATA)
1133 self._inline = False
1136 self._inline = False
1134 for i in self:
1137 for i in self:
1135 e = self._io.packentry(self.index[i], self.node, self.version, i)
1138 e = self._io.packentry(self.index[i], self.node, self.version, i)
1136 fp.write(e)
1139 fp.write(e)
1137
1140
1138 # if we don't call rename, the temp file will never replace the
1141 # if we don't call rename, the temp file will never replace the
1139 # real index
1142 # real index
1140 fp.rename()
1143 fp.rename()
1141
1144
1142 tr.replace(self.indexfile, trindex * self._io.size)
1145 tr.replace(self.indexfile, trindex * self._io.size)
1143 self._chunkclear()
1146 self._chunkclear()
1144
1147
1145 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1148 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1146 """add a revision to the log
1149 """add a revision to the log
1147
1150
1148 text - the revision data to add
1151 text - the revision data to add
1149 transaction - the transaction object used for rollback
1152 transaction - the transaction object used for rollback
1150 link - the linkrev data to add
1153 link - the linkrev data to add
1151 p1, p2 - the parent nodeids of the revision
1154 p1, p2 - the parent nodeids of the revision
1152 d - an optional precomputed delta
1155 d - an optional precomputed delta
1153 """
1156 """
1154 dfh = None
1157 dfh = None
1155 if not self._inline:
1158 if not self._inline:
1156 dfh = self.opener(self.datafile, "a")
1159 dfh = self.opener(self.datafile, "a")
1157 ifh = self.opener(self.indexfile, "a+")
1160 ifh = self.opener(self.indexfile, "a+")
1158 try:
1161 try:
1159 return self._addrevision(text, transaction, link, p1, p2,
1162 return self._addrevision(text, transaction, link, p1, p2,
1160 cachedelta, ifh, dfh)
1163 cachedelta, ifh, dfh)
1161 finally:
1164 finally:
1162 if dfh:
1165 if dfh:
1163 dfh.close()
1166 dfh.close()
1164 ifh.close()
1167 ifh.close()
1165
1168
1166 def _addrevision(self, text, transaction, link, p1, p2,
1169 def _addrevision(self, text, transaction, link, p1, p2,
1167 cachedelta, ifh, dfh):
1170 cachedelta, ifh, dfh):
1168 node = hash(text, p1, p2)
1171 node = hash(text, p1, p2)
1169 if node in self.nodemap:
1172 if node in self.nodemap:
1170 return node
1173 return node
1171
1174
1172 curr = len(self)
1175 curr = len(self)
1173 prev = curr - 1
1176 prev = curr - 1
1174 base = curr
1177 base = curr
1175 offset = self.end(prev)
1178 offset = self.end(prev)
1176 flags = 0
1179 flags = 0
1177 d = None
1180 d = None
1178
1181
1179 if self._parentdelta:
1182 if self._parentdelta:
1180 deltarev, deltanode = self.rev(p1), p1
1183 deltarev, deltanode = self.rev(p1), p1
1181 flags = REVIDX_PARENTDELTA
1184 flags = REVIDX_PARENTDELTA
1182 else:
1185 else:
1183 deltarev, deltanode = prev, self.node(prev)
1186 deltarev, deltanode = prev, self.node(prev)
1184
1187
1185 # should we try to build a delta?
1188 # should we try to build a delta?
1186 if deltarev != nullrev:
1189 if deltarev != nullrev:
1187 # can we use the cached delta?
1190 # can we use the cached delta?
1188 if cachedelta:
1191 if cachedelta:
1189 cacherev, d = cachedelta
1192 cacherev, d = cachedelta
1190 if cacherev != deltarev:
1193 if cacherev != deltarev:
1191 d = None
1194 d = None
1192 if d is None:
1195 if d is None:
1193 ptext = self.revision(deltanode)
1196 ptext = self.revision(deltanode)
1194 d = mdiff.textdiff(ptext, text)
1197 d = mdiff.textdiff(ptext, text)
1195 data = compress(d)
1198 data = compress(d)
1196 l = len(data[1]) + len(data[0])
1199 l = len(data[1]) + len(data[0])
1197 base = self.base(deltarev)
1200 base = self.base(deltarev)
1198 dist = l + offset - self.start(base)
1201 dist = l + offset - self.start(base)
1199
1202
1200 # full versions are inserted when the needed deltas
1203 # full versions are inserted when the needed deltas
1201 # become comparable to the uncompressed text
1204 # become comparable to the uncompressed text
1202 # or the base revision is punched
1205 # or the base revision is punched
1203 if (d is None or dist > len(text) * 2 or
1206 if (d is None or dist > len(text) * 2 or
1204 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1207 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1205 data = compress(text)
1208 data = compress(text)
1206 l = len(data[1]) + len(data[0])
1209 l = len(data[1]) + len(data[0])
1207 base = curr
1210 base = curr
1208
1211
1209 e = (offset_type(offset, flags), l, len(text),
1212 e = (offset_type(offset, flags), l, len(text),
1210 base, link, self.rev(p1), self.rev(p2), node)
1213 base, link, self.rev(p1), self.rev(p2), node)
1211 self.index.insert(-1, e)
1214 self.index.insert(-1, e)
1212 self.nodemap[node] = curr
1215 self.nodemap[node] = curr
1213
1216
1214 entry = self._io.packentry(e, self.node, self.version, curr)
1217 entry = self._io.packentry(e, self.node, self.version, curr)
1215 if not self._inline:
1218 if not self._inline:
1216 transaction.add(self.datafile, offset)
1219 transaction.add(self.datafile, offset)
1217 transaction.add(self.indexfile, curr * len(entry))
1220 transaction.add(self.indexfile, curr * len(entry))
1218 if data[0]:
1221 if data[0]:
1219 dfh.write(data[0])
1222 dfh.write(data[0])
1220 dfh.write(data[1])
1223 dfh.write(data[1])
1221 dfh.flush()
1224 dfh.flush()
1222 ifh.write(entry)
1225 ifh.write(entry)
1223 else:
1226 else:
1224 offset += curr * self._io.size
1227 offset += curr * self._io.size
1225 transaction.add(self.indexfile, offset, curr)
1228 transaction.add(self.indexfile, offset, curr)
1226 ifh.write(entry)
1229 ifh.write(entry)
1227 ifh.write(data[0])
1230 ifh.write(data[0])
1228 ifh.write(data[1])
1231 ifh.write(data[1])
1229 self.checkinlinesize(transaction, ifh)
1232 self.checkinlinesize(transaction, ifh)
1230
1233
1231 if type(text) == str: # only accept immutable objects
1234 if type(text) == str: # only accept immutable objects
1232 self._cache = (node, curr, text)
1235 self._cache = (node, curr, text)
1233 return node
1236 return node
1234
1237
1235 def group(self, nodelist, lookup, infocollect=None):
1238 def group(self, nodelist, lookup, infocollect=None):
1236 """Calculate a delta group, yielding a sequence of changegroup chunks
1239 """Calculate a delta group, yielding a sequence of changegroup chunks
1237 (strings).
1240 (strings).
1238
1241
1239 Given a list of changeset revs, return a set of deltas and
1242 Given a list of changeset revs, return a set of deltas and
1240 metadata corresponding to nodes. the first delta is
1243 metadata corresponding to nodes. the first delta is
1241 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1244 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1242 have this parent as it has all history before these
1245 have this parent as it has all history before these
1243 changesets. parent is parent[0]
1246 changesets. parent is parent[0]
1244 """
1247 """
1245
1248
1246 revs = [self.rev(n) for n in nodelist]
1249 revs = [self.rev(n) for n in nodelist]
1247
1250
1248 # if we don't have any revisions touched by these changesets, bail
1251 # if we don't have any revisions touched by these changesets, bail
1249 if not revs:
1252 if not revs:
1250 yield changegroup.closechunk()
1253 yield changegroup.closechunk()
1251 return
1254 return
1252
1255
1253 # add the parent of the first rev
1256 # add the parent of the first rev
1254 p = self.parentrevs(revs[0])[0]
1257 p = self.parentrevs(revs[0])[0]
1255 revs.insert(0, p)
1258 revs.insert(0, p)
1256
1259
1257 # build deltas
1260 # build deltas
1258 for d in xrange(len(revs) - 1):
1261 for d in xrange(len(revs) - 1):
1259 a, b = revs[d], revs[d + 1]
1262 a, b = revs[d], revs[d + 1]
1260 nb = self.node(b)
1263 nb = self.node(b)
1261
1264
1262 if infocollect is not None:
1265 if infocollect is not None:
1263 infocollect(nb)
1266 infocollect(nb)
1264
1267
1265 p = self.parents(nb)
1268 p = self.parents(nb)
1266 meta = nb + p[0] + p[1] + lookup(nb)
1269 meta = nb + p[0] + p[1] + lookup(nb)
1267 if a == -1:
1270 if a == -1:
1268 d = self.revision(nb)
1271 d = self.revision(nb)
1269 meta += mdiff.trivialdiffheader(len(d))
1272 meta += mdiff.trivialdiffheader(len(d))
1270 else:
1273 else:
1271 d = self.revdiff(a, b)
1274 d = self.revdiff(a, b)
1272 yield changegroup.chunkheader(len(meta) + len(d))
1275 yield changegroup.chunkheader(len(meta) + len(d))
1273 yield meta
1276 yield meta
1274 yield d
1277 yield d
1275
1278
1276 yield changegroup.closechunk()
1279 yield changegroup.closechunk()
1277
1280
1278 def addgroup(self, revs, linkmapper, transaction):
1281 def addgroup(self, revs, linkmapper, transaction):
1279 """
1282 """
1280 add a delta group
1283 add a delta group
1281
1284
1282 given a set of deltas, add them to the revision log. the
1285 given a set of deltas, add them to the revision log. the
1283 first delta is against its parent, which should be in our
1286 first delta is against its parent, which should be in our
1284 log, the rest are against the previous delta.
1287 log, the rest are against the previous delta.
1285 """
1288 """
1286
1289
1287 #track the base of the current delta log
1290 #track the base of the current delta log
1288 r = len(self)
1291 r = len(self)
1289 t = r - 1
1292 t = r - 1
1290 node = None
1293 node = None
1291
1294
1292 base = prev = nullrev
1295 base = prev = nullrev
1293 start = end = textlen = 0
1296 start = end = textlen = 0
1294 if r:
1297 if r:
1295 end = self.end(t)
1298 end = self.end(t)
1296
1299
1297 ifh = self.opener(self.indexfile, "a+")
1300 ifh = self.opener(self.indexfile, "a+")
1298 isize = r * self._io.size
1301 isize = r * self._io.size
1299 if self._inline:
1302 if self._inline:
1300 transaction.add(self.indexfile, end + isize, r)
1303 transaction.add(self.indexfile, end + isize, r)
1301 dfh = None
1304 dfh = None
1302 else:
1305 else:
1303 transaction.add(self.indexfile, isize, r)
1306 transaction.add(self.indexfile, isize, r)
1304 transaction.add(self.datafile, end)
1307 transaction.add(self.datafile, end)
1305 dfh = self.opener(self.datafile, "a")
1308 dfh = self.opener(self.datafile, "a")
1306
1309
1307 try:
1310 try:
1308 # loop through our set of deltas
1311 # loop through our set of deltas
1309 chain = None
1312 chain = None
1310 for chunk in revs:
1313 for chunk in revs:
1311 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1314 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1312 link = linkmapper(cs)
1315 link = linkmapper(cs)
1313 if node in self.nodemap:
1316 if node in self.nodemap:
1314 # this can happen if two branches make the same change
1317 # this can happen if two branches make the same change
1315 chain = node
1318 chain = node
1316 continue
1319 continue
1317 delta = buffer(chunk, 80)
1320 delta = buffer(chunk, 80)
1318 del chunk
1321 del chunk
1319
1322
1320 for p in (p1, p2):
1323 for p in (p1, p2):
1321 if not p in self.nodemap:
1324 if not p in self.nodemap:
1322 raise LookupError(p, self.indexfile, _('unknown parent'))
1325 raise LookupError(p, self.indexfile, _('unknown parent'))
1323
1326
1324 if not chain:
1327 if not chain:
1325 # retrieve the parent revision of the delta chain
1328 # retrieve the parent revision of the delta chain
1326 chain = p1
1329 chain = p1
1327 if not chain in self.nodemap:
1330 if not chain in self.nodemap:
1328 raise LookupError(chain, self.indexfile, _('unknown base'))
1331 raise LookupError(chain, self.indexfile, _('unknown base'))
1329
1332
1330 # full versions are inserted when the needed deltas become
1333 # full versions are inserted when the needed deltas become
1331 # comparable to the uncompressed text or when the previous
1334 # comparable to the uncompressed text or when the previous
1332 # version is not the one we have a delta against. We use
1335 # version is not the one we have a delta against. We use
1333 # the size of the previous full rev as a proxy for the
1336 # the size of the previous full rev as a proxy for the
1334 # current size.
1337 # current size.
1335
1338
1336 if chain == prev:
1339 if chain == prev:
1337 cdelta = compress(delta)
1340 cdelta = compress(delta)
1338 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1341 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1339 textlen = mdiff.patchedsize(textlen, delta)
1342 textlen = mdiff.patchedsize(textlen, delta)
1340
1343
1341 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1344 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1342 # flush our writes here so we can read it in revision
1345 # flush our writes here so we can read it in revision
1343 if dfh:
1346 if dfh:
1344 dfh.flush()
1347 dfh.flush()
1345 ifh.flush()
1348 ifh.flush()
1346 text = self.revision(chain)
1349 text = self.revision(chain)
1347 if len(text) == 0:
1350 if len(text) == 0:
1348 # skip over trivial delta header
1351 # skip over trivial delta header
1349 text = buffer(delta, 12)
1352 text = buffer(delta, 12)
1350 else:
1353 else:
1351 text = mdiff.patches(text, [delta])
1354 text = mdiff.patches(text, [delta])
1352 del delta
1355 del delta
1353 chk = self._addrevision(text, transaction, link, p1, p2, None,
1356 chk = self._addrevision(text, transaction, link, p1, p2, None,
1354 ifh, dfh)
1357 ifh, dfh)
1355 if not dfh and not self._inline:
1358 if not dfh and not self._inline:
1356 # addrevision switched from inline to conventional
1359 # addrevision switched from inline to conventional
1357 # reopen the index
1360 # reopen the index
1358 dfh = self.opener(self.datafile, "a")
1361 dfh = self.opener(self.datafile, "a")
1359 ifh = self.opener(self.indexfile, "a")
1362 ifh = self.opener(self.indexfile, "a")
1360 if chk != node:
1363 if chk != node:
1361 raise RevlogError(_("consistency error adding group"))
1364 raise RevlogError(_("consistency error adding group"))
1362 textlen = len(text)
1365 textlen = len(text)
1363 else:
1366 else:
1364 e = (offset_type(end, 0), cdeltalen, textlen, base,
1367 e = (offset_type(end, 0), cdeltalen, textlen, base,
1365 link, self.rev(p1), self.rev(p2), node)
1368 link, self.rev(p1), self.rev(p2), node)
1366 self.index.insert(-1, e)
1369 self.index.insert(-1, e)
1367 self.nodemap[node] = r
1370 self.nodemap[node] = r
1368 entry = self._io.packentry(e, self.node, self.version, r)
1371 entry = self._io.packentry(e, self.node, self.version, r)
1369 if self._inline:
1372 if self._inline:
1370 ifh.write(entry)
1373 ifh.write(entry)
1371 ifh.write(cdelta[0])
1374 ifh.write(cdelta[0])
1372 ifh.write(cdelta[1])
1375 ifh.write(cdelta[1])
1373 self.checkinlinesize(transaction, ifh)
1376 self.checkinlinesize(transaction, ifh)
1374 if not self._inline:
1377 if not self._inline:
1375 dfh = self.opener(self.datafile, "a")
1378 dfh = self.opener(self.datafile, "a")
1376 ifh = self.opener(self.indexfile, "a")
1379 ifh = self.opener(self.indexfile, "a")
1377 else:
1380 else:
1378 dfh.write(cdelta[0])
1381 dfh.write(cdelta[0])
1379 dfh.write(cdelta[1])
1382 dfh.write(cdelta[1])
1380 ifh.write(entry)
1383 ifh.write(entry)
1381
1384
1382 t, r, chain, prev = r, r + 1, node, node
1385 t, r, chain, prev = r, r + 1, node, node
1383 base = self.base(t)
1386 base = self.base(t)
1384 start = self.start(base)
1387 start = self.start(base)
1385 end = self.end(t)
1388 end = self.end(t)
1386 finally:
1389 finally:
1387 if dfh:
1390 if dfh:
1388 dfh.close()
1391 dfh.close()
1389 ifh.close()
1392 ifh.close()
1390
1393
1391 return node
1394 return node
1392
1395
1393 def strip(self, minlink, transaction):
1396 def strip(self, minlink, transaction):
1394 """truncate the revlog on the first revision with a linkrev >= minlink
1397 """truncate the revlog on the first revision with a linkrev >= minlink
1395
1398
1396 This function is called when we're stripping revision minlink and
1399 This function is called when we're stripping revision minlink and
1397 its descendants from the repository.
1400 its descendants from the repository.
1398
1401
1399 We have to remove all revisions with linkrev >= minlink, because
1402 We have to remove all revisions with linkrev >= minlink, because
1400 the equivalent changelog revisions will be renumbered after the
1403 the equivalent changelog revisions will be renumbered after the
1401 strip.
1404 strip.
1402
1405
1403 So we truncate the revlog on the first of these revisions, and
1406 So we truncate the revlog on the first of these revisions, and
1404 trust that the caller has saved the revisions that shouldn't be
1407 trust that the caller has saved the revisions that shouldn't be
1405 removed and that it'll readd them after this truncation.
1408 removed and that it'll readd them after this truncation.
1406 """
1409 """
1407 if len(self) == 0:
1410 if len(self) == 0:
1408 return
1411 return
1409
1412
1410 if isinstance(self.index, lazyindex):
1413 if isinstance(self.index, lazyindex):
1411 self._loadindexmap()
1414 self._loadindexmap()
1412
1415
1413 for rev in self:
1416 for rev in self:
1414 if self.index[rev][4] >= minlink:
1417 if self.index[rev][4] >= minlink:
1415 break
1418 break
1416 else:
1419 else:
1417 return
1420 return
1418
1421
1419 # first truncate the files on disk
1422 # first truncate the files on disk
1420 end = self.start(rev)
1423 end = self.start(rev)
1421 if not self._inline:
1424 if not self._inline:
1422 transaction.add(self.datafile, end)
1425 transaction.add(self.datafile, end)
1423 end = rev * self._io.size
1426 end = rev * self._io.size
1424 else:
1427 else:
1425 end += rev * self._io.size
1428 end += rev * self._io.size
1426
1429
1427 transaction.add(self.indexfile, end)
1430 transaction.add(self.indexfile, end)
1428
1431
1429 # then reset internal state in memory to forget those revisions
1432 # then reset internal state in memory to forget those revisions
1430 self._cache = None
1433 self._cache = None
1431 self._chunkclear()
1434 self._chunkclear()
1432 for x in xrange(rev, len(self)):
1435 for x in xrange(rev, len(self)):
1433 del self.nodemap[self.node(x)]
1436 del self.nodemap[self.node(x)]
1434
1437
1435 del self.index[rev:-1]
1438 del self.index[rev:-1]
1436
1439
1437 def checksize(self):
1440 def checksize(self):
1438 expected = 0
1441 expected = 0
1439 if len(self):
1442 if len(self):
1440 expected = max(0, self.end(len(self) - 1))
1443 expected = max(0, self.end(len(self) - 1))
1441
1444
1442 try:
1445 try:
1443 f = self.opener(self.datafile)
1446 f = self.opener(self.datafile)
1444 f.seek(0, 2)
1447 f.seek(0, 2)
1445 actual = f.tell()
1448 actual = f.tell()
1446 dd = actual - expected
1449 dd = actual - expected
1447 except IOError, inst:
1450 except IOError, inst:
1448 if inst.errno != errno.ENOENT:
1451 if inst.errno != errno.ENOENT:
1449 raise
1452 raise
1450 dd = 0
1453 dd = 0
1451
1454
1452 try:
1455 try:
1453 f = self.opener(self.indexfile)
1456 f = self.opener(self.indexfile)
1454 f.seek(0, 2)
1457 f.seek(0, 2)
1455 actual = f.tell()
1458 actual = f.tell()
1456 s = self._io.size
1459 s = self._io.size
1457 i = max(0, actual // s)
1460 i = max(0, actual // s)
1458 di = actual - (i * s)
1461 di = actual - (i * s)
1459 if self._inline:
1462 if self._inline:
1460 databytes = 0
1463 databytes = 0
1461 for r in self:
1464 for r in self:
1462 databytes += max(0, self.length(r))
1465 databytes += max(0, self.length(r))
1463 dd = 0
1466 dd = 0
1464 di = actual - len(self) * s - databytes
1467 di = actual - len(self) * s - databytes
1465 except IOError, inst:
1468 except IOError, inst:
1466 if inst.errno != errno.ENOENT:
1469 if inst.errno != errno.ENOENT:
1467 raise
1470 raise
1468 di = 0
1471 di = 0
1469
1472
1470 return (dd, di)
1473 return (dd, di)
1471
1474
1472 def files(self):
1475 def files(self):
1473 res = [self.indexfile]
1476 res = [self.indexfile]
1474 if not self._inline:
1477 if not self._inline:
1475 res.append(self.datafile)
1478 res.append(self.datafile)
1476 return res
1479 return res
General Comments 0
You need to be logged in to leave comments. Login now