##// END OF EJS Templates
revlog: break hash checking into subfunction
Matt Mackall -
r13239:12ed25f3 default
parent child Browse files
Show More
@@ -1,1477 +1,1482 b''
1 # revlog.py - storage back-end for mercurial
1 # revlog.py - storage back-end for mercurial
2 #
2 #
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 """Storage back-end for Mercurial.
8 """Storage back-end for Mercurial.
9
9
10 This provides efficient delta storage with O(1) retrieve and append
10 This provides efficient delta storage with O(1) retrieve and append
11 and O(changes) merge between branches.
11 and O(changes) merge between branches.
12 """
12 """
13
13
14 # import stuff from node for others to import from revlog
14 # import stuff from node for others to import from revlog
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
15 from node import bin, hex, nullid, nullrev, short #@UnusedImport
16 from i18n import _
16 from i18n import _
17 import changegroup, ancestor, mdiff, parsers, error, util
17 import changegroup, ancestor, mdiff, parsers, error, util
18 import struct, zlib, errno
18 import struct, zlib, errno
19
19
20 _pack = struct.pack
20 _pack = struct.pack
21 _unpack = struct.unpack
21 _unpack = struct.unpack
22 _compress = zlib.compress
22 _compress = zlib.compress
23 _decompress = zlib.decompress
23 _decompress = zlib.decompress
24 _sha = util.sha1
24 _sha = util.sha1
25
25
26 # revlog header flags
26 # revlog header flags
27 REVLOGV0 = 0
27 REVLOGV0 = 0
28 REVLOGNG = 1
28 REVLOGNG = 1
29 REVLOGNGINLINEDATA = (1 << 16)
29 REVLOGNGINLINEDATA = (1 << 16)
30 REVLOGSHALLOW = (1 << 17)
30 REVLOGSHALLOW = (1 << 17)
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
31 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
32 REVLOG_DEFAULT_FORMAT = REVLOGNG
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
33 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
34 REVLOGNG_FLAGS = REVLOGNGINLINEDATA | REVLOGSHALLOW
35
35
36 # revlog index flags
36 # revlog index flags
37 REVIDX_PARENTDELTA = 1
37 REVIDX_PARENTDELTA = 1
38 REVIDX_PUNCHED_FLAG = 2
38 REVIDX_PUNCHED_FLAG = 2
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
39 REVIDX_KNOWN_FLAGS = REVIDX_PUNCHED_FLAG | REVIDX_PARENTDELTA
40
40
41 # amount of data read unconditionally, should be >= 4
41 # amount of data read unconditionally, should be >= 4
42 # when not inline: threshold for using lazy index
42 # when not inline: threshold for using lazy index
43 _prereadsize = 1048576
43 _prereadsize = 1048576
44 # max size of revlog with inline data
44 # max size of revlog with inline data
45 _maxinline = 131072
45 _maxinline = 131072
46
46
47 RevlogError = error.RevlogError
47 RevlogError = error.RevlogError
48 LookupError = error.LookupError
48 LookupError = error.LookupError
49
49
50 def getoffset(q):
50 def getoffset(q):
51 return int(q >> 16)
51 return int(q >> 16)
52
52
53 def gettype(q):
53 def gettype(q):
54 return int(q & 0xFFFF)
54 return int(q & 0xFFFF)
55
55
56 def offset_type(offset, type):
56 def offset_type(offset, type):
57 return long(long(offset) << 16 | type)
57 return long(long(offset) << 16 | type)
58
58
59 nullhash = _sha(nullid)
59 nullhash = _sha(nullid)
60
60
61 def hash(text, p1, p2):
61 def hash(text, p1, p2):
62 """generate a hash from the given text and its parent hashes
62 """generate a hash from the given text and its parent hashes
63
63
64 This hash combines both the current file contents and its history
64 This hash combines both the current file contents and its history
65 in a manner that makes it easy to distinguish nodes with the same
65 in a manner that makes it easy to distinguish nodes with the same
66 content in the revision graph.
66 content in the revision graph.
67 """
67 """
68 # As of now, if one of the parent node is null, p2 is null
68 # As of now, if one of the parent node is null, p2 is null
69 if p2 == nullid:
69 if p2 == nullid:
70 # deep copy of a hash is faster than creating one
70 # deep copy of a hash is faster than creating one
71 s = nullhash.copy()
71 s = nullhash.copy()
72 s.update(p1)
72 s.update(p1)
73 else:
73 else:
74 # none of the parent nodes are nullid
74 # none of the parent nodes are nullid
75 l = [p1, p2]
75 l = [p1, p2]
76 l.sort()
76 l.sort()
77 s = _sha(l[0])
77 s = _sha(l[0])
78 s.update(l[1])
78 s.update(l[1])
79 s.update(text)
79 s.update(text)
80 return s.digest()
80 return s.digest()
81
81
82 def compress(text):
82 def compress(text):
83 """ generate a possibly-compressed representation of text """
83 """ generate a possibly-compressed representation of text """
84 if not text:
84 if not text:
85 return ("", text)
85 return ("", text)
86 l = len(text)
86 l = len(text)
87 bin = None
87 bin = None
88 if l < 44:
88 if l < 44:
89 pass
89 pass
90 elif l > 1000000:
90 elif l > 1000000:
91 # zlib makes an internal copy, thus doubling memory usage for
91 # zlib makes an internal copy, thus doubling memory usage for
92 # large files, so lets do this in pieces
92 # large files, so lets do this in pieces
93 z = zlib.compressobj()
93 z = zlib.compressobj()
94 p = []
94 p = []
95 pos = 0
95 pos = 0
96 while pos < l:
96 while pos < l:
97 pos2 = pos + 2**20
97 pos2 = pos + 2**20
98 p.append(z.compress(text[pos:pos2]))
98 p.append(z.compress(text[pos:pos2]))
99 pos = pos2
99 pos = pos2
100 p.append(z.flush())
100 p.append(z.flush())
101 if sum(map(len, p)) < l:
101 if sum(map(len, p)) < l:
102 bin = "".join(p)
102 bin = "".join(p)
103 else:
103 else:
104 bin = _compress(text)
104 bin = _compress(text)
105 if bin is None or len(bin) > l:
105 if bin is None or len(bin) > l:
106 if text[0] == '\0':
106 if text[0] == '\0':
107 return ("", text)
107 return ("", text)
108 return ('u', text)
108 return ('u', text)
109 return ("", bin)
109 return ("", bin)
110
110
111 def decompress(bin):
111 def decompress(bin):
112 """ decompress the given input """
112 """ decompress the given input """
113 if not bin:
113 if not bin:
114 return bin
114 return bin
115 t = bin[0]
115 t = bin[0]
116 if t == '\0':
116 if t == '\0':
117 return bin
117 return bin
118 if t == 'x':
118 if t == 'x':
119 return _decompress(bin)
119 return _decompress(bin)
120 if t == 'u':
120 if t == 'u':
121 return bin[1:]
121 return bin[1:]
122 raise RevlogError(_("unknown compression type %r") % t)
122 raise RevlogError(_("unknown compression type %r") % t)
123
123
124 class lazyparser(object):
124 class lazyparser(object):
125 """
125 """
126 this class avoids the need to parse the entirety of large indices
126 this class avoids the need to parse the entirety of large indices
127 """
127 """
128
128
129 # lazyparser is not safe to use on windows if win32 extensions not
129 # lazyparser is not safe to use on windows if win32 extensions not
130 # available. it keeps file handle open, which make it not possible
130 # available. it keeps file handle open, which make it not possible
131 # to break hardlinks on local cloned repos.
131 # to break hardlinks on local cloned repos.
132
132
133 def __init__(self, dataf):
133 def __init__(self, dataf):
134 try:
134 try:
135 size = util.fstat(dataf).st_size
135 size = util.fstat(dataf).st_size
136 except AttributeError:
136 except AttributeError:
137 size = 0
137 size = 0
138 self.dataf = dataf
138 self.dataf = dataf
139 self.s = struct.calcsize(indexformatng)
139 self.s = struct.calcsize(indexformatng)
140 self.datasize = size
140 self.datasize = size
141 self.l = size // self.s
141 self.l = size // self.s
142 self.index = [None] * self.l
142 self.index = [None] * self.l
143 self.map = {nullid: nullrev}
143 self.map = {nullid: nullrev}
144 self.allmap = 0
144 self.allmap = 0
145 self.all = 0
145 self.all = 0
146 self.mapfind_count = 0
146 self.mapfind_count = 0
147
147
148 def loadmap(self):
148 def loadmap(self):
149 """
149 """
150 during a commit, we need to make sure the rev being added is
150 during a commit, we need to make sure the rev being added is
151 not a duplicate. This requires loading the entire index,
151 not a duplicate. This requires loading the entire index,
152 which is fairly slow. loadmap can load up just the node map,
152 which is fairly slow. loadmap can load up just the node map,
153 which takes much less time.
153 which takes much less time.
154 """
154 """
155 if self.allmap:
155 if self.allmap:
156 return
156 return
157 end = self.datasize
157 end = self.datasize
158 self.allmap = 1
158 self.allmap = 1
159 cur = 0
159 cur = 0
160 count = 0
160 count = 0
161 blocksize = self.s * 256
161 blocksize = self.s * 256
162 self.dataf.seek(0)
162 self.dataf.seek(0)
163 while cur < end:
163 while cur < end:
164 data = self.dataf.read(blocksize)
164 data = self.dataf.read(blocksize)
165 off = 0
165 off = 0
166 for x in xrange(256):
166 for x in xrange(256):
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
167 n = data[off + ngshaoffset:off + ngshaoffset + 20]
168 self.map[n] = count
168 self.map[n] = count
169 count += 1
169 count += 1
170 if count >= self.l:
170 if count >= self.l:
171 break
171 break
172 off += self.s
172 off += self.s
173 cur += blocksize
173 cur += blocksize
174
174
175 def loadblock(self, blockstart, blocksize, data=None):
175 def loadblock(self, blockstart, blocksize, data=None):
176 if self.all:
176 if self.all:
177 return
177 return
178 if data is None:
178 if data is None:
179 self.dataf.seek(blockstart)
179 self.dataf.seek(blockstart)
180 if blockstart + blocksize > self.datasize:
180 if blockstart + blocksize > self.datasize:
181 # the revlog may have grown since we've started running,
181 # the revlog may have grown since we've started running,
182 # but we don't have space in self.index for more entries.
182 # but we don't have space in self.index for more entries.
183 # limit blocksize so that we don't get too much data.
183 # limit blocksize so that we don't get too much data.
184 blocksize = max(self.datasize - blockstart, 0)
184 blocksize = max(self.datasize - blockstart, 0)
185 data = self.dataf.read(blocksize)
185 data = self.dataf.read(blocksize)
186 lend = len(data) // self.s
186 lend = len(data) // self.s
187 i = blockstart // self.s
187 i = blockstart // self.s
188 off = 0
188 off = 0
189 # lazyindex supports __delitem__
189 # lazyindex supports __delitem__
190 if lend > len(self.index) - i:
190 if lend > len(self.index) - i:
191 lend = len(self.index) - i
191 lend = len(self.index) - i
192 for x in xrange(lend):
192 for x in xrange(lend):
193 if self.index[i + x] is None:
193 if self.index[i + x] is None:
194 b = data[off : off + self.s]
194 b = data[off : off + self.s]
195 self.index[i + x] = b
195 self.index[i + x] = b
196 n = b[ngshaoffset:ngshaoffset + 20]
196 n = b[ngshaoffset:ngshaoffset + 20]
197 self.map[n] = i + x
197 self.map[n] = i + x
198 off += self.s
198 off += self.s
199
199
200 def findnode(self, node):
200 def findnode(self, node):
201 """search backwards through the index file for a specific node"""
201 """search backwards through the index file for a specific node"""
202 if self.allmap:
202 if self.allmap:
203 return None
203 return None
204
204
205 # hg log will cause many many searches for the manifest
205 # hg log will cause many many searches for the manifest
206 # nodes. After we get called a few times, just load the whole
206 # nodes. After we get called a few times, just load the whole
207 # thing.
207 # thing.
208 if self.mapfind_count > 8:
208 if self.mapfind_count > 8:
209 self.loadmap()
209 self.loadmap()
210 if node in self.map:
210 if node in self.map:
211 return node
211 return node
212 return None
212 return None
213 self.mapfind_count += 1
213 self.mapfind_count += 1
214 last = self.l - 1
214 last = self.l - 1
215 while self.index[last] is not None:
215 while self.index[last] is not None:
216 if last == 0:
216 if last == 0:
217 self.all = 1
217 self.all = 1
218 self.allmap = 1
218 self.allmap = 1
219 return None
219 return None
220 last -= 1
220 last -= 1
221 end = (last + 1) * self.s
221 end = (last + 1) * self.s
222 blocksize = self.s * 256
222 blocksize = self.s * 256
223 while end >= 0:
223 while end >= 0:
224 start = max(end - blocksize, 0)
224 start = max(end - blocksize, 0)
225 self.dataf.seek(start)
225 self.dataf.seek(start)
226 data = self.dataf.read(end - start)
226 data = self.dataf.read(end - start)
227 findend = end - start
227 findend = end - start
228 while True:
228 while True:
229 # we're searching backwards, so we have to make sure
229 # we're searching backwards, so we have to make sure
230 # we don't find a changeset where this node is a parent
230 # we don't find a changeset where this node is a parent
231 off = data.find(node, 0, findend)
231 off = data.find(node, 0, findend)
232 findend = off
232 findend = off
233 if off >= 0:
233 if off >= 0:
234 i = off / self.s
234 i = off / self.s
235 off = i * self.s
235 off = i * self.s
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
236 n = data[off + ngshaoffset:off + ngshaoffset + 20]
237 if n == node:
237 if n == node:
238 self.map[n] = i + start / self.s
238 self.map[n] = i + start / self.s
239 return node
239 return node
240 else:
240 else:
241 break
241 break
242 end -= blocksize
242 end -= blocksize
243 return None
243 return None
244
244
245 def loadindex(self, i=None, end=None):
245 def loadindex(self, i=None, end=None):
246 if self.all:
246 if self.all:
247 return
247 return
248 all = False
248 all = False
249 if i is None:
249 if i is None:
250 blockstart = 0
250 blockstart = 0
251 blocksize = (65536 / self.s) * self.s
251 blocksize = (65536 / self.s) * self.s
252 end = self.datasize
252 end = self.datasize
253 all = True
253 all = True
254 else:
254 else:
255 if end:
255 if end:
256 blockstart = i * self.s
256 blockstart = i * self.s
257 end = end * self.s
257 end = end * self.s
258 blocksize = end - blockstart
258 blocksize = end - blockstart
259 else:
259 else:
260 blockstart = (i & ~1023) * self.s
260 blockstart = (i & ~1023) * self.s
261 blocksize = self.s * 1024
261 blocksize = self.s * 1024
262 end = blockstart + blocksize
262 end = blockstart + blocksize
263 while blockstart < end:
263 while blockstart < end:
264 self.loadblock(blockstart, blocksize)
264 self.loadblock(blockstart, blocksize)
265 blockstart += blocksize
265 blockstart += blocksize
266 if all:
266 if all:
267 self.all = True
267 self.all = True
268
268
269 class lazyindex(object):
269 class lazyindex(object):
270 """a lazy version of the index array"""
270 """a lazy version of the index array"""
271 def __init__(self, parser):
271 def __init__(self, parser):
272 self.p = parser
272 self.p = parser
273 def __len__(self):
273 def __len__(self):
274 return len(self.p.index)
274 return len(self.p.index)
275 def load(self, pos):
275 def load(self, pos):
276 if pos < 0:
276 if pos < 0:
277 pos += len(self.p.index)
277 pos += len(self.p.index)
278 self.p.loadindex(pos)
278 self.p.loadindex(pos)
279 return self.p.index[pos]
279 return self.p.index[pos]
280 def __getitem__(self, pos):
280 def __getitem__(self, pos):
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
281 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
282 def __setitem__(self, pos, item):
282 def __setitem__(self, pos, item):
283 self.p.index[pos] = _pack(indexformatng, *item)
283 self.p.index[pos] = _pack(indexformatng, *item)
284 def __delitem__(self, pos):
284 def __delitem__(self, pos):
285 del self.p.index[pos]
285 del self.p.index[pos]
286 def insert(self, pos, e):
286 def insert(self, pos, e):
287 self.p.index.insert(pos, _pack(indexformatng, *e))
287 self.p.index.insert(pos, _pack(indexformatng, *e))
288 def append(self, e):
288 def append(self, e):
289 self.p.index.append(_pack(indexformatng, *e))
289 self.p.index.append(_pack(indexformatng, *e))
290
290
291 class lazymap(object):
291 class lazymap(object):
292 """a lazy version of the node map"""
292 """a lazy version of the node map"""
293 def __init__(self, parser):
293 def __init__(self, parser):
294 self.p = parser
294 self.p = parser
295 def load(self, key):
295 def load(self, key):
296 n = self.p.findnode(key)
296 n = self.p.findnode(key)
297 if n is None:
297 if n is None:
298 raise KeyError(key)
298 raise KeyError(key)
299 def __contains__(self, key):
299 def __contains__(self, key):
300 if key in self.p.map:
300 if key in self.p.map:
301 return True
301 return True
302 self.p.loadmap()
302 self.p.loadmap()
303 return key in self.p.map
303 return key in self.p.map
304 def __iter__(self):
304 def __iter__(self):
305 yield nullid
305 yield nullid
306 for i, ret in enumerate(self.p.index):
306 for i, ret in enumerate(self.p.index):
307 if not ret:
307 if not ret:
308 self.p.loadindex(i)
308 self.p.loadindex(i)
309 ret = self.p.index[i]
309 ret = self.p.index[i]
310 if isinstance(ret, str):
310 if isinstance(ret, str):
311 ret = _unpack(indexformatng, ret)
311 ret = _unpack(indexformatng, ret)
312 yield ret[7]
312 yield ret[7]
313 def __getitem__(self, key):
313 def __getitem__(self, key):
314 try:
314 try:
315 return self.p.map[key]
315 return self.p.map[key]
316 except KeyError:
316 except KeyError:
317 try:
317 try:
318 self.load(key)
318 self.load(key)
319 return self.p.map[key]
319 return self.p.map[key]
320 except KeyError:
320 except KeyError:
321 raise KeyError("node " + hex(key))
321 raise KeyError("node " + hex(key))
322 def __setitem__(self, key, val):
322 def __setitem__(self, key, val):
323 self.p.map[key] = val
323 self.p.map[key] = val
324 def __delitem__(self, key):
324 def __delitem__(self, key):
325 del self.p.map[key]
325 del self.p.map[key]
326
326
327 indexformatv0 = ">4l20s20s20s"
327 indexformatv0 = ">4l20s20s20s"
328 v0shaoffset = 56
328 v0shaoffset = 56
329
329
330 class revlogoldio(object):
330 class revlogoldio(object):
331 def __init__(self):
331 def __init__(self):
332 self.size = struct.calcsize(indexformatv0)
332 self.size = struct.calcsize(indexformatv0)
333
333
334 def parseindex(self, fp, data, inline):
334 def parseindex(self, fp, data, inline):
335 s = self.size
335 s = self.size
336 index = []
336 index = []
337 nodemap = {nullid: nullrev}
337 nodemap = {nullid: nullrev}
338 n = off = 0
338 n = off = 0
339 if len(data) == _prereadsize:
339 if len(data) == _prereadsize:
340 data += fp.read() # read the rest
340 data += fp.read() # read the rest
341 l = len(data)
341 l = len(data)
342 while off + s <= l:
342 while off + s <= l:
343 cur = data[off:off + s]
343 cur = data[off:off + s]
344 off += s
344 off += s
345 e = _unpack(indexformatv0, cur)
345 e = _unpack(indexformatv0, cur)
346 # transform to revlogv1 format
346 # transform to revlogv1 format
347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
347 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
348 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
349 index.append(e2)
349 index.append(e2)
350 nodemap[e[6]] = n
350 nodemap[e[6]] = n
351 n += 1
351 n += 1
352
352
353 return index, nodemap, None
353 return index, nodemap, None
354
354
355 def packentry(self, entry, node, version, rev):
355 def packentry(self, entry, node, version, rev):
356 if gettype(entry[0]):
356 if gettype(entry[0]):
357 raise RevlogError(_("index entry flags need RevlogNG"))
357 raise RevlogError(_("index entry flags need RevlogNG"))
358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
358 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
359 node(entry[5]), node(entry[6]), entry[7])
359 node(entry[5]), node(entry[6]), entry[7])
360 return _pack(indexformatv0, *e2)
360 return _pack(indexformatv0, *e2)
361
361
362 # index ng:
362 # index ng:
363 # 6 bytes: offset
363 # 6 bytes: offset
364 # 2 bytes: flags
364 # 2 bytes: flags
365 # 4 bytes: compressed length
365 # 4 bytes: compressed length
366 # 4 bytes: uncompressed length
366 # 4 bytes: uncompressed length
367 # 4 bytes: base rev
367 # 4 bytes: base rev
368 # 4 bytes: link rev
368 # 4 bytes: link rev
369 # 4 bytes: parent 1 rev
369 # 4 bytes: parent 1 rev
370 # 4 bytes: parent 2 rev
370 # 4 bytes: parent 2 rev
371 # 32 bytes: nodeid
371 # 32 bytes: nodeid
372 indexformatng = ">Qiiiiii20s12x"
372 indexformatng = ">Qiiiiii20s12x"
373 ngshaoffset = 32
373 ngshaoffset = 32
374 versionformat = ">I"
374 versionformat = ">I"
375
375
376 class revlogio(object):
376 class revlogio(object):
377 def __init__(self):
377 def __init__(self):
378 self.size = struct.calcsize(indexformatng)
378 self.size = struct.calcsize(indexformatng)
379
379
380 def parseindex(self, fp, data, inline):
380 def parseindex(self, fp, data, inline):
381 if len(data) == _prereadsize:
381 if len(data) == _prereadsize:
382 if util.openhardlinks() and not inline:
382 if util.openhardlinks() and not inline:
383 # big index, let's parse it on demand
383 # big index, let's parse it on demand
384 parser = lazyparser(fp)
384 parser = lazyparser(fp)
385 index = lazyindex(parser)
385 index = lazyindex(parser)
386 nodemap = lazymap(parser)
386 nodemap = lazymap(parser)
387 e = list(index[0])
387 e = list(index[0])
388 type = gettype(e[0])
388 type = gettype(e[0])
389 e[0] = offset_type(0, type)
389 e[0] = offset_type(0, type)
390 index[0] = e
390 index[0] = e
391 return index, nodemap, None
391 return index, nodemap, None
392 else:
392 else:
393 data += fp.read()
393 data += fp.read()
394
394
395 # call the C implementation to parse the index data
395 # call the C implementation to parse the index data
396 index, nodemap, cache = parsers.parse_index(data, inline)
396 index, nodemap, cache = parsers.parse_index(data, inline)
397 return index, nodemap, cache
397 return index, nodemap, cache
398
398
399 def packentry(self, entry, node, version, rev):
399 def packentry(self, entry, node, version, rev):
400 p = _pack(indexformatng, *entry)
400 p = _pack(indexformatng, *entry)
401 if rev == 0:
401 if rev == 0:
402 p = _pack(versionformat, version) + p[4:]
402 p = _pack(versionformat, version) + p[4:]
403 return p
403 return p
404
404
405 class revlog(object):
405 class revlog(object):
406 """
406 """
407 the underlying revision storage object
407 the underlying revision storage object
408
408
409 A revlog consists of two parts, an index and the revision data.
409 A revlog consists of two parts, an index and the revision data.
410
410
411 The index is a file with a fixed record size containing
411 The index is a file with a fixed record size containing
412 information on each revision, including its nodeid (hash), the
412 information on each revision, including its nodeid (hash), the
413 nodeids of its parents, the position and offset of its data within
413 nodeids of its parents, the position and offset of its data within
414 the data file, and the revision it's based on. Finally, each entry
414 the data file, and the revision it's based on. Finally, each entry
415 contains a linkrev entry that can serve as a pointer to external
415 contains a linkrev entry that can serve as a pointer to external
416 data.
416 data.
417
417
418 The revision data itself is a linear collection of data chunks.
418 The revision data itself is a linear collection of data chunks.
419 Each chunk represents a revision and is usually represented as a
419 Each chunk represents a revision and is usually represented as a
420 delta against the previous chunk. To bound lookup time, runs of
420 delta against the previous chunk. To bound lookup time, runs of
421 deltas are limited to about 2 times the length of the original
421 deltas are limited to about 2 times the length of the original
422 version data. This makes retrieval of a version proportional to
422 version data. This makes retrieval of a version proportional to
423 its size, or O(1) relative to the number of revisions.
423 its size, or O(1) relative to the number of revisions.
424
424
425 Both pieces of the revlog are written to in an append-only
425 Both pieces of the revlog are written to in an append-only
426 fashion, which means we never need to rewrite a file to insert or
426 fashion, which means we never need to rewrite a file to insert or
427 remove data, and can use some simple techniques to avoid the need
427 remove data, and can use some simple techniques to avoid the need
428 for locking while reading.
428 for locking while reading.
429 """
429 """
430 def __init__(self, opener, indexfile, shallowroot=None):
430 def __init__(self, opener, indexfile, shallowroot=None):
431 """
431 """
432 create a revlog object
432 create a revlog object
433
433
434 opener is a function that abstracts the file opening operation
434 opener is a function that abstracts the file opening operation
435 and can be used to implement COW semantics or the like.
435 and can be used to implement COW semantics or the like.
436 """
436 """
437 self.indexfile = indexfile
437 self.indexfile = indexfile
438 self.datafile = indexfile[:-2] + ".d"
438 self.datafile = indexfile[:-2] + ".d"
439 self.opener = opener
439 self.opener = opener
440 self._cache = None
440 self._cache = None
441 self._chunkcache = (0, '')
441 self._chunkcache = (0, '')
442 self.nodemap = {nullid: nullrev}
442 self.nodemap = {nullid: nullrev}
443 self.index = []
443 self.index = []
444 self._shallowroot = shallowroot
444 self._shallowroot = shallowroot
445 self._parentdelta = 0
445 self._parentdelta = 0
446
446
447 v = REVLOG_DEFAULT_VERSION
447 v = REVLOG_DEFAULT_VERSION
448 if hasattr(opener, 'options') and 'defversion' in opener.options:
448 if hasattr(opener, 'options') and 'defversion' in opener.options:
449 v = opener.options['defversion']
449 v = opener.options['defversion']
450 if v & REVLOGNG:
450 if v & REVLOGNG:
451 v |= REVLOGNGINLINEDATA
451 v |= REVLOGNGINLINEDATA
452 if v & REVLOGNG and 'parentdelta' in opener.options:
452 if v & REVLOGNG and 'parentdelta' in opener.options:
453 self._parentdelta = 1
453 self._parentdelta = 1
454
454
455 if shallowroot:
455 if shallowroot:
456 v |= REVLOGSHALLOW
456 v |= REVLOGSHALLOW
457
457
458 i = ''
458 i = ''
459 try:
459 try:
460 f = self.opener(self.indexfile)
460 f = self.opener(self.indexfile)
461 if "nonlazy" in getattr(self.opener, 'options', {}):
461 if "nonlazy" in getattr(self.opener, 'options', {}):
462 i = f.read()
462 i = f.read()
463 else:
463 else:
464 i = f.read(_prereadsize)
464 i = f.read(_prereadsize)
465 if len(i) > 0:
465 if len(i) > 0:
466 v = struct.unpack(versionformat, i[:4])[0]
466 v = struct.unpack(versionformat, i[:4])[0]
467 except IOError, inst:
467 except IOError, inst:
468 if inst.errno != errno.ENOENT:
468 if inst.errno != errno.ENOENT:
469 raise
469 raise
470
470
471 self.version = v
471 self.version = v
472 self._inline = v & REVLOGNGINLINEDATA
472 self._inline = v & REVLOGNGINLINEDATA
473 self._shallow = v & REVLOGSHALLOW
473 self._shallow = v & REVLOGSHALLOW
474 flags = v & ~0xFFFF
474 flags = v & ~0xFFFF
475 fmt = v & 0xFFFF
475 fmt = v & 0xFFFF
476 if fmt == REVLOGV0 and flags:
476 if fmt == REVLOGV0 and flags:
477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
477 raise RevlogError(_("index %s unknown flags %#04x for format v0")
478 % (self.indexfile, flags >> 16))
478 % (self.indexfile, flags >> 16))
479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
479 elif fmt == REVLOGNG and flags & ~REVLOGNG_FLAGS:
480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
480 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
481 % (self.indexfile, flags >> 16))
481 % (self.indexfile, flags >> 16))
482 elif fmt > REVLOGNG:
482 elif fmt > REVLOGNG:
483 raise RevlogError(_("index %s unknown format %d")
483 raise RevlogError(_("index %s unknown format %d")
484 % (self.indexfile, fmt))
484 % (self.indexfile, fmt))
485
485
486 self._io = revlogio()
486 self._io = revlogio()
487 if self.version == REVLOGV0:
487 if self.version == REVLOGV0:
488 self._io = revlogoldio()
488 self._io = revlogoldio()
489 if i:
489 if i:
490 try:
490 try:
491 d = self._io.parseindex(f, i, self._inline)
491 d = self._io.parseindex(f, i, self._inline)
492 except (ValueError, IndexError):
492 except (ValueError, IndexError):
493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
493 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
494 self.index, self.nodemap, self._chunkcache = d
494 self.index, self.nodemap, self._chunkcache = d
495 if not self._chunkcache:
495 if not self._chunkcache:
496 self._chunkclear()
496 self._chunkclear()
497
497
498 # add the magic null revision at -1 (if it hasn't been done already)
498 # add the magic null revision at -1 (if it hasn't been done already)
499 if (self.index == [] or isinstance(self.index, lazyindex) or
499 if (self.index == [] or isinstance(self.index, lazyindex) or
500 self.index[-1][7] != nullid) :
500 self.index[-1][7] != nullid) :
501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
501 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
502
502
503 def _loadindex(self, start, end):
503 def _loadindex(self, start, end):
504 """load a block of indexes all at once from the lazy parser"""
504 """load a block of indexes all at once from the lazy parser"""
505 if isinstance(self.index, lazyindex):
505 if isinstance(self.index, lazyindex):
506 self.index.p.loadindex(start, end)
506 self.index.p.loadindex(start, end)
507
507
508 def _loadindexmap(self):
508 def _loadindexmap(self):
509 """loads both the map and the index from the lazy parser"""
509 """loads both the map and the index from the lazy parser"""
510 if isinstance(self.index, lazyindex):
510 if isinstance(self.index, lazyindex):
511 p = self.index.p
511 p = self.index.p
512 p.loadindex()
512 p.loadindex()
513 self.nodemap = p.map
513 self.nodemap = p.map
514
514
515 def _loadmap(self):
515 def _loadmap(self):
516 """loads the map from the lazy parser"""
516 """loads the map from the lazy parser"""
517 if isinstance(self.nodemap, lazymap):
517 if isinstance(self.nodemap, lazymap):
518 self.nodemap.p.loadmap()
518 self.nodemap.p.loadmap()
519 self.nodemap = self.nodemap.p.map
519 self.nodemap = self.nodemap.p.map
520
520
521 def tip(self):
521 def tip(self):
522 return self.node(len(self.index) - 2)
522 return self.node(len(self.index) - 2)
523 def __len__(self):
523 def __len__(self):
524 return len(self.index) - 1
524 return len(self.index) - 1
525 def __iter__(self):
525 def __iter__(self):
526 for i in xrange(len(self)):
526 for i in xrange(len(self)):
527 yield i
527 yield i
528 def rev(self, node):
528 def rev(self, node):
529 try:
529 try:
530 return self.nodemap[node]
530 return self.nodemap[node]
531 except KeyError:
531 except KeyError:
532 raise LookupError(node, self.indexfile, _('no node'))
532 raise LookupError(node, self.indexfile, _('no node'))
533 def node(self, rev):
533 def node(self, rev):
534 return self.index[rev][7]
534 return self.index[rev][7]
535 def linkrev(self, rev):
535 def linkrev(self, rev):
536 return self.index[rev][4]
536 return self.index[rev][4]
537 def parents(self, node):
537 def parents(self, node):
538 i = self.index
538 i = self.index
539 d = i[self.rev(node)]
539 d = i[self.rev(node)]
540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
540 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
541 def parentrevs(self, rev):
541 def parentrevs(self, rev):
542 return self.index[rev][5:7]
542 return self.index[rev][5:7]
543 def start(self, rev):
543 def start(self, rev):
544 return int(self.index[rev][0] >> 16)
544 return int(self.index[rev][0] >> 16)
545 def end(self, rev):
545 def end(self, rev):
546 return self.start(rev) + self.length(rev)
546 return self.start(rev) + self.length(rev)
547 def length(self, rev):
547 def length(self, rev):
548 return self.index[rev][1]
548 return self.index[rev][1]
549 def base(self, rev):
549 def base(self, rev):
550 return self.index[rev][3]
550 return self.index[rev][3]
551 def flags(self, rev):
551 def flags(self, rev):
552 return self.index[rev][0] & 0xFFFF
552 return self.index[rev][0] & 0xFFFF
553 def rawsize(self, rev):
553 def rawsize(self, rev):
554 """return the length of the uncompressed text for a given revision"""
554 """return the length of the uncompressed text for a given revision"""
555 l = self.index[rev][2]
555 l = self.index[rev][2]
556 if l >= 0:
556 if l >= 0:
557 return l
557 return l
558
558
559 t = self.revision(self.node(rev))
559 t = self.revision(self.node(rev))
560 return len(t)
560 return len(t)
561 size = rawsize
561 size = rawsize
562
562
563 def reachable(self, node, stop=None):
563 def reachable(self, node, stop=None):
564 """return the set of all nodes ancestral to a given node, including
564 """return the set of all nodes ancestral to a given node, including
565 the node itself, stopping when stop is matched"""
565 the node itself, stopping when stop is matched"""
566 reachable = set((node,))
566 reachable = set((node,))
567 visit = [node]
567 visit = [node]
568 if stop:
568 if stop:
569 stopn = self.rev(stop)
569 stopn = self.rev(stop)
570 else:
570 else:
571 stopn = 0
571 stopn = 0
572 while visit:
572 while visit:
573 n = visit.pop(0)
573 n = visit.pop(0)
574 if n == stop:
574 if n == stop:
575 continue
575 continue
576 if n == nullid:
576 if n == nullid:
577 continue
577 continue
578 for p in self.parents(n):
578 for p in self.parents(n):
579 if self.rev(p) < stopn:
579 if self.rev(p) < stopn:
580 continue
580 continue
581 if p not in reachable:
581 if p not in reachable:
582 reachable.add(p)
582 reachable.add(p)
583 visit.append(p)
583 visit.append(p)
584 return reachable
584 return reachable
585
585
586 def ancestors(self, *revs):
586 def ancestors(self, *revs):
587 """Generate the ancestors of 'revs' in reverse topological order.
587 """Generate the ancestors of 'revs' in reverse topological order.
588
588
589 Yield a sequence of revision numbers starting with the parents
589 Yield a sequence of revision numbers starting with the parents
590 of each revision in revs, i.e., each revision is *not* considered
590 of each revision in revs, i.e., each revision is *not* considered
591 an ancestor of itself. Results are in breadth-first order:
591 an ancestor of itself. Results are in breadth-first order:
592 parents of each rev in revs, then parents of those, etc. Result
592 parents of each rev in revs, then parents of those, etc. Result
593 does not include the null revision."""
593 does not include the null revision."""
594 visit = list(revs)
594 visit = list(revs)
595 seen = set([nullrev])
595 seen = set([nullrev])
596 while visit:
596 while visit:
597 for parent in self.parentrevs(visit.pop(0)):
597 for parent in self.parentrevs(visit.pop(0)):
598 if parent not in seen:
598 if parent not in seen:
599 visit.append(parent)
599 visit.append(parent)
600 seen.add(parent)
600 seen.add(parent)
601 yield parent
601 yield parent
602
602
603 def descendants(self, *revs):
603 def descendants(self, *revs):
604 """Generate the descendants of 'revs' in revision order.
604 """Generate the descendants of 'revs' in revision order.
605
605
606 Yield a sequence of revision numbers starting with a child of
606 Yield a sequence of revision numbers starting with a child of
607 some rev in revs, i.e., each revision is *not* considered a
607 some rev in revs, i.e., each revision is *not* considered a
608 descendant of itself. Results are ordered by revision number (a
608 descendant of itself. Results are ordered by revision number (a
609 topological sort)."""
609 topological sort)."""
610 first = min(revs)
610 first = min(revs)
611 if first == nullrev:
611 if first == nullrev:
612 for i in self:
612 for i in self:
613 yield i
613 yield i
614 return
614 return
615
615
616 seen = set(revs)
616 seen = set(revs)
617 for i in xrange(first + 1, len(self)):
617 for i in xrange(first + 1, len(self)):
618 for x in self.parentrevs(i):
618 for x in self.parentrevs(i):
619 if x != nullrev and x in seen:
619 if x != nullrev and x in seen:
620 seen.add(i)
620 seen.add(i)
621 yield i
621 yield i
622 break
622 break
623
623
624 def findmissing(self, common=None, heads=None):
624 def findmissing(self, common=None, heads=None):
625 """Return the ancestors of heads that are not ancestors of common.
625 """Return the ancestors of heads that are not ancestors of common.
626
626
627 More specifically, return a list of nodes N such that every N
627 More specifically, return a list of nodes N such that every N
628 satisfies the following constraints:
628 satisfies the following constraints:
629
629
630 1. N is an ancestor of some node in 'heads'
630 1. N is an ancestor of some node in 'heads'
631 2. N is not an ancestor of any node in 'common'
631 2. N is not an ancestor of any node in 'common'
632
632
633 The list is sorted by revision number, meaning it is
633 The list is sorted by revision number, meaning it is
634 topologically sorted.
634 topologically sorted.
635
635
636 'heads' and 'common' are both lists of node IDs. If heads is
636 'heads' and 'common' are both lists of node IDs. If heads is
637 not supplied, uses all of the revlog's heads. If common is not
637 not supplied, uses all of the revlog's heads. If common is not
638 supplied, uses nullid."""
638 supplied, uses nullid."""
639 if common is None:
639 if common is None:
640 common = [nullid]
640 common = [nullid]
641 if heads is None:
641 if heads is None:
642 heads = self.heads()
642 heads = self.heads()
643
643
644 common = [self.rev(n) for n in common]
644 common = [self.rev(n) for n in common]
645 heads = [self.rev(n) for n in heads]
645 heads = [self.rev(n) for n in heads]
646
646
647 # we want the ancestors, but inclusive
647 # we want the ancestors, but inclusive
648 has = set(self.ancestors(*common))
648 has = set(self.ancestors(*common))
649 has.add(nullrev)
649 has.add(nullrev)
650 has.update(common)
650 has.update(common)
651
651
652 # take all ancestors from heads that aren't in has
652 # take all ancestors from heads that aren't in has
653 missing = set()
653 missing = set()
654 visit = [r for r in heads if r not in has]
654 visit = [r for r in heads if r not in has]
655 while visit:
655 while visit:
656 r = visit.pop(0)
656 r = visit.pop(0)
657 if r in missing:
657 if r in missing:
658 continue
658 continue
659 else:
659 else:
660 missing.add(r)
660 missing.add(r)
661 for p in self.parentrevs(r):
661 for p in self.parentrevs(r):
662 if p not in has:
662 if p not in has:
663 visit.append(p)
663 visit.append(p)
664 missing = list(missing)
664 missing = list(missing)
665 missing.sort()
665 missing.sort()
666 return [self.node(r) for r in missing]
666 return [self.node(r) for r in missing]
667
667
668 def nodesbetween(self, roots=None, heads=None):
668 def nodesbetween(self, roots=None, heads=None):
669 """Return a topological path from 'roots' to 'heads'.
669 """Return a topological path from 'roots' to 'heads'.
670
670
671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
671 Return a tuple (nodes, outroots, outheads) where 'nodes' is a
672 topologically sorted list of all nodes N that satisfy both of
672 topologically sorted list of all nodes N that satisfy both of
673 these constraints:
673 these constraints:
674
674
675 1. N is a descendant of some node in 'roots'
675 1. N is a descendant of some node in 'roots'
676 2. N is an ancestor of some node in 'heads'
676 2. N is an ancestor of some node in 'heads'
677
677
678 Every node is considered to be both a descendant and an ancestor
678 Every node is considered to be both a descendant and an ancestor
679 of itself, so every reachable node in 'roots' and 'heads' will be
679 of itself, so every reachable node in 'roots' and 'heads' will be
680 included in 'nodes'.
680 included in 'nodes'.
681
681
682 'outroots' is the list of reachable nodes in 'roots', i.e., the
682 'outroots' is the list of reachable nodes in 'roots', i.e., the
683 subset of 'roots' that is returned in 'nodes'. Likewise,
683 subset of 'roots' that is returned in 'nodes'. Likewise,
684 'outheads' is the subset of 'heads' that is also in 'nodes'.
684 'outheads' is the subset of 'heads' that is also in 'nodes'.
685
685
686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
686 'roots' and 'heads' are both lists of node IDs. If 'roots' is
687 unspecified, uses nullid as the only root. If 'heads' is
687 unspecified, uses nullid as the only root. If 'heads' is
688 unspecified, uses list of all of the revlog's heads."""
688 unspecified, uses list of all of the revlog's heads."""
689 nonodes = ([], [], [])
689 nonodes = ([], [], [])
690 if roots is not None:
690 if roots is not None:
691 roots = list(roots)
691 roots = list(roots)
692 if not roots:
692 if not roots:
693 return nonodes
693 return nonodes
694 lowestrev = min([self.rev(n) for n in roots])
694 lowestrev = min([self.rev(n) for n in roots])
695 else:
695 else:
696 roots = [nullid] # Everybody's a descendent of nullid
696 roots = [nullid] # Everybody's a descendent of nullid
697 lowestrev = nullrev
697 lowestrev = nullrev
698 if (lowestrev == nullrev) and (heads is None):
698 if (lowestrev == nullrev) and (heads is None):
699 # We want _all_ the nodes!
699 # We want _all_ the nodes!
700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
700 return ([self.node(r) for r in self], [nullid], list(self.heads()))
701 if heads is None:
701 if heads is None:
702 # All nodes are ancestors, so the latest ancestor is the last
702 # All nodes are ancestors, so the latest ancestor is the last
703 # node.
703 # node.
704 highestrev = len(self) - 1
704 highestrev = len(self) - 1
705 # Set ancestors to None to signal that every node is an ancestor.
705 # Set ancestors to None to signal that every node is an ancestor.
706 ancestors = None
706 ancestors = None
707 # Set heads to an empty dictionary for later discovery of heads
707 # Set heads to an empty dictionary for later discovery of heads
708 heads = {}
708 heads = {}
709 else:
709 else:
710 heads = list(heads)
710 heads = list(heads)
711 if not heads:
711 if not heads:
712 return nonodes
712 return nonodes
713 ancestors = set()
713 ancestors = set()
714 # Turn heads into a dictionary so we can remove 'fake' heads.
714 # Turn heads into a dictionary so we can remove 'fake' heads.
715 # Also, later we will be using it to filter out the heads we can't
715 # Also, later we will be using it to filter out the heads we can't
716 # find from roots.
716 # find from roots.
717 heads = dict.fromkeys(heads, 0)
717 heads = dict.fromkeys(heads, 0)
718 # Start at the top and keep marking parents until we're done.
718 # Start at the top and keep marking parents until we're done.
719 nodestotag = set(heads)
719 nodestotag = set(heads)
720 # Remember where the top was so we can use it as a limit later.
720 # Remember where the top was so we can use it as a limit later.
721 highestrev = max([self.rev(n) for n in nodestotag])
721 highestrev = max([self.rev(n) for n in nodestotag])
722 while nodestotag:
722 while nodestotag:
723 # grab a node to tag
723 # grab a node to tag
724 n = nodestotag.pop()
724 n = nodestotag.pop()
725 # Never tag nullid
725 # Never tag nullid
726 if n == nullid:
726 if n == nullid:
727 continue
727 continue
728 # A node's revision number represents its place in a
728 # A node's revision number represents its place in a
729 # topologically sorted list of nodes.
729 # topologically sorted list of nodes.
730 r = self.rev(n)
730 r = self.rev(n)
731 if r >= lowestrev:
731 if r >= lowestrev:
732 if n not in ancestors:
732 if n not in ancestors:
733 # If we are possibly a descendent of one of the roots
733 # If we are possibly a descendent of one of the roots
734 # and we haven't already been marked as an ancestor
734 # and we haven't already been marked as an ancestor
735 ancestors.add(n) # Mark as ancestor
735 ancestors.add(n) # Mark as ancestor
736 # Add non-nullid parents to list of nodes to tag.
736 # Add non-nullid parents to list of nodes to tag.
737 nodestotag.update([p for p in self.parents(n) if
737 nodestotag.update([p for p in self.parents(n) if
738 p != nullid])
738 p != nullid])
739 elif n in heads: # We've seen it before, is it a fake head?
739 elif n in heads: # We've seen it before, is it a fake head?
740 # So it is, real heads should not be the ancestors of
740 # So it is, real heads should not be the ancestors of
741 # any other heads.
741 # any other heads.
742 heads.pop(n)
742 heads.pop(n)
743 if not ancestors:
743 if not ancestors:
744 return nonodes
744 return nonodes
745 # Now that we have our set of ancestors, we want to remove any
745 # Now that we have our set of ancestors, we want to remove any
746 # roots that are not ancestors.
746 # roots that are not ancestors.
747
747
748 # If one of the roots was nullid, everything is included anyway.
748 # If one of the roots was nullid, everything is included anyway.
749 if lowestrev > nullrev:
749 if lowestrev > nullrev:
750 # But, since we weren't, let's recompute the lowest rev to not
750 # But, since we weren't, let's recompute the lowest rev to not
751 # include roots that aren't ancestors.
751 # include roots that aren't ancestors.
752
752
753 # Filter out roots that aren't ancestors of heads
753 # Filter out roots that aren't ancestors of heads
754 roots = [n for n in roots if n in ancestors]
754 roots = [n for n in roots if n in ancestors]
755 # Recompute the lowest revision
755 # Recompute the lowest revision
756 if roots:
756 if roots:
757 lowestrev = min([self.rev(n) for n in roots])
757 lowestrev = min([self.rev(n) for n in roots])
758 else:
758 else:
759 # No more roots? Return empty list
759 # No more roots? Return empty list
760 return nonodes
760 return nonodes
761 else:
761 else:
762 # We are descending from nullid, and don't need to care about
762 # We are descending from nullid, and don't need to care about
763 # any other roots.
763 # any other roots.
764 lowestrev = nullrev
764 lowestrev = nullrev
765 roots = [nullid]
765 roots = [nullid]
766 # Transform our roots list into a set.
766 # Transform our roots list into a set.
767 descendents = set(roots)
767 descendents = set(roots)
768 # Also, keep the original roots so we can filter out roots that aren't
768 # Also, keep the original roots so we can filter out roots that aren't
769 # 'real' roots (i.e. are descended from other roots).
769 # 'real' roots (i.e. are descended from other roots).
770 roots = descendents.copy()
770 roots = descendents.copy()
771 # Our topologically sorted list of output nodes.
771 # Our topologically sorted list of output nodes.
772 orderedout = []
772 orderedout = []
773 # Don't start at nullid since we don't want nullid in our output list,
773 # Don't start at nullid since we don't want nullid in our output list,
774 # and if nullid shows up in descedents, empty parents will look like
774 # and if nullid shows up in descedents, empty parents will look like
775 # they're descendents.
775 # they're descendents.
776 for r in xrange(max(lowestrev, 0), highestrev + 1):
776 for r in xrange(max(lowestrev, 0), highestrev + 1):
777 n = self.node(r)
777 n = self.node(r)
778 isdescendent = False
778 isdescendent = False
779 if lowestrev == nullrev: # Everybody is a descendent of nullid
779 if lowestrev == nullrev: # Everybody is a descendent of nullid
780 isdescendent = True
780 isdescendent = True
781 elif n in descendents:
781 elif n in descendents:
782 # n is already a descendent
782 # n is already a descendent
783 isdescendent = True
783 isdescendent = True
784 # This check only needs to be done here because all the roots
784 # This check only needs to be done here because all the roots
785 # will start being marked is descendents before the loop.
785 # will start being marked is descendents before the loop.
786 if n in roots:
786 if n in roots:
787 # If n was a root, check if it's a 'real' root.
787 # If n was a root, check if it's a 'real' root.
788 p = tuple(self.parents(n))
788 p = tuple(self.parents(n))
789 # If any of its parents are descendents, it's not a root.
789 # If any of its parents are descendents, it's not a root.
790 if (p[0] in descendents) or (p[1] in descendents):
790 if (p[0] in descendents) or (p[1] in descendents):
791 roots.remove(n)
791 roots.remove(n)
792 else:
792 else:
793 p = tuple(self.parents(n))
793 p = tuple(self.parents(n))
794 # A node is a descendent if either of its parents are
794 # A node is a descendent if either of its parents are
795 # descendents. (We seeded the dependents list with the roots
795 # descendents. (We seeded the dependents list with the roots
796 # up there, remember?)
796 # up there, remember?)
797 if (p[0] in descendents) or (p[1] in descendents):
797 if (p[0] in descendents) or (p[1] in descendents):
798 descendents.add(n)
798 descendents.add(n)
799 isdescendent = True
799 isdescendent = True
800 if isdescendent and ((ancestors is None) or (n in ancestors)):
800 if isdescendent and ((ancestors is None) or (n in ancestors)):
801 # Only include nodes that are both descendents and ancestors.
801 # Only include nodes that are both descendents and ancestors.
802 orderedout.append(n)
802 orderedout.append(n)
803 if (ancestors is not None) and (n in heads):
803 if (ancestors is not None) and (n in heads):
804 # We're trying to figure out which heads are reachable
804 # We're trying to figure out which heads are reachable
805 # from roots.
805 # from roots.
806 # Mark this head as having been reached
806 # Mark this head as having been reached
807 heads[n] = 1
807 heads[n] = 1
808 elif ancestors is None:
808 elif ancestors is None:
809 # Otherwise, we're trying to discover the heads.
809 # Otherwise, we're trying to discover the heads.
810 # Assume this is a head because if it isn't, the next step
810 # Assume this is a head because if it isn't, the next step
811 # will eventually remove it.
811 # will eventually remove it.
812 heads[n] = 1
812 heads[n] = 1
813 # But, obviously its parents aren't.
813 # But, obviously its parents aren't.
814 for p in self.parents(n):
814 for p in self.parents(n):
815 heads.pop(p, None)
815 heads.pop(p, None)
816 heads = [n for n in heads.iterkeys() if heads[n] != 0]
816 heads = [n for n in heads.iterkeys() if heads[n] != 0]
817 roots = list(roots)
817 roots = list(roots)
818 assert orderedout
818 assert orderedout
819 assert roots
819 assert roots
820 assert heads
820 assert heads
821 return (orderedout, roots, heads)
821 return (orderedout, roots, heads)
822
822
823 def heads(self, start=None, stop=None):
823 def heads(self, start=None, stop=None):
824 """return the list of all nodes that have no children
824 """return the list of all nodes that have no children
825
825
826 if start is specified, only heads that are descendants of
826 if start is specified, only heads that are descendants of
827 start will be returned
827 start will be returned
828 if stop is specified, it will consider all the revs from stop
828 if stop is specified, it will consider all the revs from stop
829 as if they had no children
829 as if they had no children
830 """
830 """
831 if start is None and stop is None:
831 if start is None and stop is None:
832 count = len(self)
832 count = len(self)
833 if not count:
833 if not count:
834 return [nullid]
834 return [nullid]
835 ishead = [1] * (count + 1)
835 ishead = [1] * (count + 1)
836 index = self.index
836 index = self.index
837 for r in xrange(count):
837 for r in xrange(count):
838 e = index[r]
838 e = index[r]
839 ishead[e[5]] = ishead[e[6]] = 0
839 ishead[e[5]] = ishead[e[6]] = 0
840 return [self.node(r) for r in xrange(count) if ishead[r]]
840 return [self.node(r) for r in xrange(count) if ishead[r]]
841
841
842 if start is None:
842 if start is None:
843 start = nullid
843 start = nullid
844 if stop is None:
844 if stop is None:
845 stop = []
845 stop = []
846 stoprevs = set([self.rev(n) for n in stop])
846 stoprevs = set([self.rev(n) for n in stop])
847 startrev = self.rev(start)
847 startrev = self.rev(start)
848 reachable = set((startrev,))
848 reachable = set((startrev,))
849 heads = set((startrev,))
849 heads = set((startrev,))
850
850
851 parentrevs = self.parentrevs
851 parentrevs = self.parentrevs
852 for r in xrange(startrev + 1, len(self)):
852 for r in xrange(startrev + 1, len(self)):
853 for p in parentrevs(r):
853 for p in parentrevs(r):
854 if p in reachable:
854 if p in reachable:
855 if r not in stoprevs:
855 if r not in stoprevs:
856 reachable.add(r)
856 reachable.add(r)
857 heads.add(r)
857 heads.add(r)
858 if p in heads and p not in stoprevs:
858 if p in heads and p not in stoprevs:
859 heads.remove(p)
859 heads.remove(p)
860
860
861 return [self.node(r) for r in heads]
861 return [self.node(r) for r in heads]
862
862
863 def children(self, node):
863 def children(self, node):
864 """find the children of a given node"""
864 """find the children of a given node"""
865 c = []
865 c = []
866 p = self.rev(node)
866 p = self.rev(node)
867 for r in range(p + 1, len(self)):
867 for r in range(p + 1, len(self)):
868 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
868 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
869 if prevs:
869 if prevs:
870 for pr in prevs:
870 for pr in prevs:
871 if pr == p:
871 if pr == p:
872 c.append(self.node(r))
872 c.append(self.node(r))
873 elif p == nullrev:
873 elif p == nullrev:
874 c.append(self.node(r))
874 c.append(self.node(r))
875 return c
875 return c
876
876
877 def descendant(self, start, end):
877 def descendant(self, start, end):
878 if start == nullrev:
878 if start == nullrev:
879 return True
879 return True
880 for i in self.descendants(start):
880 for i in self.descendants(start):
881 if i == end:
881 if i == end:
882 return True
882 return True
883 elif i > end:
883 elif i > end:
884 break
884 break
885 return False
885 return False
886
886
887 def ancestor(self, a, b):
887 def ancestor(self, a, b):
888 """calculate the least common ancestor of nodes a and b"""
888 """calculate the least common ancestor of nodes a and b"""
889
889
890 # fast path, check if it is a descendant
890 # fast path, check if it is a descendant
891 a, b = self.rev(a), self.rev(b)
891 a, b = self.rev(a), self.rev(b)
892 start, end = sorted((a, b))
892 start, end = sorted((a, b))
893 if self.descendant(start, end):
893 if self.descendant(start, end):
894 return self.node(start)
894 return self.node(start)
895
895
896 def parents(rev):
896 def parents(rev):
897 return [p for p in self.parentrevs(rev) if p != nullrev]
897 return [p for p in self.parentrevs(rev) if p != nullrev]
898
898
899 c = ancestor.ancestor(a, b, parents)
899 c = ancestor.ancestor(a, b, parents)
900 if c is None:
900 if c is None:
901 return nullid
901 return nullid
902
902
903 return self.node(c)
903 return self.node(c)
904
904
905 def _match(self, id):
905 def _match(self, id):
906 if isinstance(id, (long, int)):
906 if isinstance(id, (long, int)):
907 # rev
907 # rev
908 return self.node(id)
908 return self.node(id)
909 if len(id) == 20:
909 if len(id) == 20:
910 # possibly a binary node
910 # possibly a binary node
911 # odds of a binary node being all hex in ASCII are 1 in 10**25
911 # odds of a binary node being all hex in ASCII are 1 in 10**25
912 try:
912 try:
913 node = id
913 node = id
914 self.rev(node) # quick search the index
914 self.rev(node) # quick search the index
915 return node
915 return node
916 except LookupError:
916 except LookupError:
917 pass # may be partial hex id
917 pass # may be partial hex id
918 try:
918 try:
919 # str(rev)
919 # str(rev)
920 rev = int(id)
920 rev = int(id)
921 if str(rev) != id:
921 if str(rev) != id:
922 raise ValueError
922 raise ValueError
923 if rev < 0:
923 if rev < 0:
924 rev = len(self) + rev
924 rev = len(self) + rev
925 if rev < 0 or rev >= len(self):
925 if rev < 0 or rev >= len(self):
926 raise ValueError
926 raise ValueError
927 return self.node(rev)
927 return self.node(rev)
928 except (ValueError, OverflowError):
928 except (ValueError, OverflowError):
929 pass
929 pass
930 if len(id) == 40:
930 if len(id) == 40:
931 try:
931 try:
932 # a full hex nodeid?
932 # a full hex nodeid?
933 node = bin(id)
933 node = bin(id)
934 self.rev(node)
934 self.rev(node)
935 return node
935 return node
936 except (TypeError, LookupError):
936 except (TypeError, LookupError):
937 pass
937 pass
938
938
939 def _partialmatch(self, id):
939 def _partialmatch(self, id):
940 if len(id) < 40:
940 if len(id) < 40:
941 try:
941 try:
942 # hex(node)[:...]
942 # hex(node)[:...]
943 l = len(id) // 2 # grab an even number of digits
943 l = len(id) // 2 # grab an even number of digits
944 bin_id = bin(id[:l * 2])
944 bin_id = bin(id[:l * 2])
945 nl = [n for n in self.nodemap if n[:l] == bin_id]
945 nl = [n for n in self.nodemap if n[:l] == bin_id]
946 nl = [n for n in nl if hex(n).startswith(id)]
946 nl = [n for n in nl if hex(n).startswith(id)]
947 if len(nl) > 0:
947 if len(nl) > 0:
948 if len(nl) == 1:
948 if len(nl) == 1:
949 return nl[0]
949 return nl[0]
950 raise LookupError(id, self.indexfile,
950 raise LookupError(id, self.indexfile,
951 _('ambiguous identifier'))
951 _('ambiguous identifier'))
952 return None
952 return None
953 except TypeError:
953 except TypeError:
954 pass
954 pass
955
955
956 def lookup(self, id):
956 def lookup(self, id):
957 """locate a node based on:
957 """locate a node based on:
958 - revision number or str(revision number)
958 - revision number or str(revision number)
959 - nodeid or subset of hex nodeid
959 - nodeid or subset of hex nodeid
960 """
960 """
961 n = self._match(id)
961 n = self._match(id)
962 if n is not None:
962 if n is not None:
963 return n
963 return n
964 n = self._partialmatch(id)
964 n = self._partialmatch(id)
965 if n:
965 if n:
966 return n
966 return n
967
967
968 raise LookupError(id, self.indexfile, _('no match found'))
968 raise LookupError(id, self.indexfile, _('no match found'))
969
969
970 def cmp(self, node, text):
970 def cmp(self, node, text):
971 """compare text with a given file revision
971 """compare text with a given file revision
972
972
973 returns True if text is different than what is stored.
973 returns True if text is different than what is stored.
974 """
974 """
975 p1, p2 = self.parents(node)
975 p1, p2 = self.parents(node)
976 return hash(text, p1, p2) != node
976 return hash(text, p1, p2) != node
977
977
978 def _addchunk(self, offset, data):
978 def _addchunk(self, offset, data):
979 o, d = self._chunkcache
979 o, d = self._chunkcache
980 # try to add to existing cache
980 # try to add to existing cache
981 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
981 if o + len(d) == offset and len(d) + len(data) < _prereadsize:
982 self._chunkcache = o, d + data
982 self._chunkcache = o, d + data
983 else:
983 else:
984 self._chunkcache = offset, data
984 self._chunkcache = offset, data
985
985
986 def _loadchunk(self, offset, length):
986 def _loadchunk(self, offset, length):
987 if self._inline:
987 if self._inline:
988 df = self.opener(self.indexfile)
988 df = self.opener(self.indexfile)
989 else:
989 else:
990 df = self.opener(self.datafile)
990 df = self.opener(self.datafile)
991
991
992 readahead = max(65536, length)
992 readahead = max(65536, length)
993 df.seek(offset)
993 df.seek(offset)
994 d = df.read(readahead)
994 d = df.read(readahead)
995 self._addchunk(offset, d)
995 self._addchunk(offset, d)
996 if readahead > length:
996 if readahead > length:
997 return d[:length]
997 return d[:length]
998 return d
998 return d
999
999
1000 def _getchunk(self, offset, length):
1000 def _getchunk(self, offset, length):
1001 o, d = self._chunkcache
1001 o, d = self._chunkcache
1002 l = len(d)
1002 l = len(d)
1003
1003
1004 # is it in the cache?
1004 # is it in the cache?
1005 cachestart = offset - o
1005 cachestart = offset - o
1006 cacheend = cachestart + length
1006 cacheend = cachestart + length
1007 if cachestart >= 0 and cacheend <= l:
1007 if cachestart >= 0 and cacheend <= l:
1008 if cachestart == 0 and cacheend == l:
1008 if cachestart == 0 and cacheend == l:
1009 return d # avoid a copy
1009 return d # avoid a copy
1010 return d[cachestart:cacheend]
1010 return d[cachestart:cacheend]
1011
1011
1012 return self._loadchunk(offset, length)
1012 return self._loadchunk(offset, length)
1013
1013
1014 def _chunkraw(self, startrev, endrev):
1014 def _chunkraw(self, startrev, endrev):
1015 start = self.start(startrev)
1015 start = self.start(startrev)
1016 length = self.end(endrev) - start
1016 length = self.end(endrev) - start
1017 if self._inline:
1017 if self._inline:
1018 start += (startrev + 1) * self._io.size
1018 start += (startrev + 1) * self._io.size
1019 return self._getchunk(start, length)
1019 return self._getchunk(start, length)
1020
1020
1021 def _chunk(self, rev):
1021 def _chunk(self, rev):
1022 return decompress(self._chunkraw(rev, rev))
1022 return decompress(self._chunkraw(rev, rev))
1023
1023
1024 def _chunkclear(self):
1024 def _chunkclear(self):
1025 self._chunkcache = (0, '')
1025 self._chunkcache = (0, '')
1026
1026
1027 def deltaparent(self, rev):
1027 def deltaparent(self, rev):
1028 """return previous revision or parentrev according to flags"""
1028 """return previous revision or parentrev according to flags"""
1029 if self.flags(rev) & REVIDX_PARENTDELTA:
1029 if self.flags(rev) & REVIDX_PARENTDELTA:
1030 return self.parentrevs(rev)[0]
1030 return self.parentrevs(rev)[0]
1031 else:
1031 else:
1032 return rev - 1
1032 return rev - 1
1033
1033
1034 def revdiff(self, rev1, rev2):
1034 def revdiff(self, rev1, rev2):
1035 """return or calculate a delta between two revisions"""
1035 """return or calculate a delta between two revisions"""
1036 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1036 if self.base(rev2) != rev2 and self.deltaparent(rev2) == rev1:
1037 return self._chunk(rev2)
1037 return self._chunk(rev2)
1038
1038
1039 return mdiff.textdiff(self.revision(self.node(rev1)),
1039 return mdiff.textdiff(self.revision(self.node(rev1)),
1040 self.revision(self.node(rev2)))
1040 self.revision(self.node(rev2)))
1041
1041
1042 def revision(self, node):
1042 def revision(self, node):
1043 """return an uncompressed revision of a given node"""
1043 """return an uncompressed revision of a given node"""
1044 cachedrev = None
1044 cachedrev = None
1045 if node == nullid:
1045 if node == nullid:
1046 return ""
1046 return ""
1047 if self._cache:
1047 if self._cache:
1048 if self._cache[0] == node:
1048 if self._cache[0] == node:
1049 return self._cache[2]
1049 return self._cache[2]
1050 cachedrev = self._cache[1]
1050 cachedrev = self._cache[1]
1051
1051
1052 # look up what we need to read
1052 # look up what we need to read
1053 text = None
1053 text = None
1054 rev = self.rev(node)
1054 rev = self.rev(node)
1055 base = self.base(rev)
1055 base = self.base(rev)
1056
1056
1057 # check rev flags
1057 # check rev flags
1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1058 if self.flags(rev) & ~REVIDX_KNOWN_FLAGS:
1059 raise RevlogError(_('incompatible revision flag %x') %
1059 raise RevlogError(_('incompatible revision flag %x') %
1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1060 (self.flags(rev) & ~REVIDX_KNOWN_FLAGS))
1061
1061
1062 # build delta chain
1062 # build delta chain
1063 self._loadindex(base, rev + 1)
1063 self._loadindex(base, rev + 1)
1064 chain = []
1064 chain = []
1065 index = self.index # for performance
1065 index = self.index # for performance
1066 iterrev = rev
1066 iterrev = rev
1067 e = index[iterrev]
1067 e = index[iterrev]
1068 while iterrev != base and iterrev != cachedrev:
1068 while iterrev != base and iterrev != cachedrev:
1069 chain.append(iterrev)
1069 chain.append(iterrev)
1070 if e[0] & REVIDX_PARENTDELTA:
1070 if e[0] & REVIDX_PARENTDELTA:
1071 iterrev = e[5]
1071 iterrev = e[5]
1072 else:
1072 else:
1073 iterrev -= 1
1073 iterrev -= 1
1074 e = index[iterrev]
1074 e = index[iterrev]
1075 chain.reverse()
1075 chain.reverse()
1076 base = iterrev
1076 base = iterrev
1077
1077
1078 if iterrev == cachedrev:
1078 if iterrev == cachedrev:
1079 # cache hit
1079 # cache hit
1080 text = self._cache[2]
1080 text = self._cache[2]
1081
1081
1082 # drop cache to save memory
1082 # drop cache to save memory
1083 self._cache = None
1083 self._cache = None
1084
1084
1085 self._chunkraw(base, rev)
1085 self._chunkraw(base, rev)
1086 if text is None:
1086 if text is None:
1087 text = self._chunk(base)
1087 text = self._chunk(base)
1088
1088
1089 bins = [self._chunk(r) for r in chain]
1089 bins = [self._chunk(r) for r in chain]
1090 text = mdiff.patches(text, bins)
1090 text = mdiff.patches(text, bins)
1091
1092 text = self._checkhash(text, node)
1093
1094 self._cache = (node, rev, text)
1095 return text
1096
1097 def _checkhash(self, text, node):
1091 p1, p2 = self.parents(node)
1098 p1, p2 = self.parents(node)
1092 if (node != hash(text, p1, p2) and
1099 if (node != hash(text, p1, p2) and
1093 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1100 not (self.flags(rev) & REVIDX_PUNCHED_FLAG)):
1094 raise RevlogError(_("integrity check failed on %s:%d")
1101 raise RevlogError(_("integrity check failed on %s:%d")
1095 % (self.indexfile, rev))
1102 % (self.indexfile, rev))
1096
1097 self._cache = (node, rev, text)
1098 return text
1103 return text
1099
1104
1100 def checkinlinesize(self, tr, fp=None):
1105 def checkinlinesize(self, tr, fp=None):
1101 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1106 if not self._inline or (self.start(-2) + self.length(-2)) < _maxinline:
1102 return
1107 return
1103
1108
1104 trinfo = tr.find(self.indexfile)
1109 trinfo = tr.find(self.indexfile)
1105 if trinfo is None:
1110 if trinfo is None:
1106 raise RevlogError(_("%s not found in the transaction")
1111 raise RevlogError(_("%s not found in the transaction")
1107 % self.indexfile)
1112 % self.indexfile)
1108
1113
1109 trindex = trinfo[2]
1114 trindex = trinfo[2]
1110 dataoff = self.start(trindex)
1115 dataoff = self.start(trindex)
1111
1116
1112 tr.add(self.datafile, dataoff)
1117 tr.add(self.datafile, dataoff)
1113
1118
1114 if fp:
1119 if fp:
1115 fp.flush()
1120 fp.flush()
1116 fp.close()
1121 fp.close()
1117
1122
1118 df = self.opener(self.datafile, 'w')
1123 df = self.opener(self.datafile, 'w')
1119 try:
1124 try:
1120 for r in self:
1125 for r in self:
1121 df.write(self._chunkraw(r, r))
1126 df.write(self._chunkraw(r, r))
1122 finally:
1127 finally:
1123 df.close()
1128 df.close()
1124
1129
1125 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1130 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1126 self.version &= ~(REVLOGNGINLINEDATA)
1131 self.version &= ~(REVLOGNGINLINEDATA)
1127 self._inline = False
1132 self._inline = False
1128 for i in self:
1133 for i in self:
1129 e = self._io.packentry(self.index[i], self.node, self.version, i)
1134 e = self._io.packentry(self.index[i], self.node, self.version, i)
1130 fp.write(e)
1135 fp.write(e)
1131
1136
1132 # if we don't call rename, the temp file will never replace the
1137 # if we don't call rename, the temp file will never replace the
1133 # real index
1138 # real index
1134 fp.rename()
1139 fp.rename()
1135
1140
1136 tr.replace(self.indexfile, trindex * self._io.size)
1141 tr.replace(self.indexfile, trindex * self._io.size)
1137 self._chunkclear()
1142 self._chunkclear()
1138
1143
1139 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1144 def addrevision(self, text, transaction, link, p1, p2, cachedelta=None):
1140 """add a revision to the log
1145 """add a revision to the log
1141
1146
1142 text - the revision data to add
1147 text - the revision data to add
1143 transaction - the transaction object used for rollback
1148 transaction - the transaction object used for rollback
1144 link - the linkrev data to add
1149 link - the linkrev data to add
1145 p1, p2 - the parent nodeids of the revision
1150 p1, p2 - the parent nodeids of the revision
1146 cachedelta - an optional precomputed delta
1151 cachedelta - an optional precomputed delta
1147 """
1152 """
1148 node = hash(text, p1, p2)
1153 node = hash(text, p1, p2)
1149 if (node in self.nodemap and
1154 if (node in self.nodemap and
1150 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1155 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1151 return node
1156 return node
1152
1157
1153 dfh = None
1158 dfh = None
1154 if not self._inline:
1159 if not self._inline:
1155 dfh = self.opener(self.datafile, "a")
1160 dfh = self.opener(self.datafile, "a")
1156 ifh = self.opener(self.indexfile, "a+")
1161 ifh = self.opener(self.indexfile, "a+")
1157 try:
1162 try:
1158 return self._addrevision(node, text, transaction, link, p1, p2,
1163 return self._addrevision(node, text, transaction, link, p1, p2,
1159 cachedelta, ifh, dfh)
1164 cachedelta, ifh, dfh)
1160 finally:
1165 finally:
1161 if dfh:
1166 if dfh:
1162 dfh.close()
1167 dfh.close()
1163 ifh.close()
1168 ifh.close()
1164
1169
1165 def _addrevision(self, node, text, transaction, link, p1, p2,
1170 def _addrevision(self, node, text, transaction, link, p1, p2,
1166 cachedelta, ifh, dfh):
1171 cachedelta, ifh, dfh):
1167
1172
1168 btext = [text]
1173 btext = [text]
1169 def buildtext():
1174 def buildtext():
1170 if btext[0] is not None:
1175 if btext[0] is not None:
1171 return btext[0]
1176 return btext[0]
1172 # flush any pending writes here so we can read it in revision
1177 # flush any pending writes here so we can read it in revision
1173 if dfh:
1178 if dfh:
1174 dfh.flush()
1179 dfh.flush()
1175 ifh.flush()
1180 ifh.flush()
1176 basetext = self.revision(self.node(cachedelta[0]))
1181 basetext = self.revision(self.node(cachedelta[0]))
1177 btext[0] = mdiff.patch(basetext, cachedelta[1])
1182 btext[0] = mdiff.patch(basetext, cachedelta[1])
1178 chk = hash(btext[0], p1, p2)
1183 chk = hash(btext[0], p1, p2)
1179 if chk != node:
1184 if chk != node:
1180 raise RevlogError(_("consistency error in delta"))
1185 raise RevlogError(_("consistency error in delta"))
1181 return btext[0]
1186 return btext[0]
1182
1187
1183 def builddelta(rev):
1188 def builddelta(rev):
1184 # can we use the cached delta?
1189 # can we use the cached delta?
1185 if cachedelta and cachedelta[0] == rev:
1190 if cachedelta and cachedelta[0] == rev:
1186 delta = cachedelta[1]
1191 delta = cachedelta[1]
1187 else:
1192 else:
1188 t = buildtext()
1193 t = buildtext()
1189 ptext = self.revision(self.node(rev))
1194 ptext = self.revision(self.node(rev))
1190 delta = mdiff.textdiff(ptext, t)
1195 delta = mdiff.textdiff(ptext, t)
1191 data = compress(delta)
1196 data = compress(delta)
1192 l = len(data[1]) + len(data[0])
1197 l = len(data[1]) + len(data[0])
1193 base = self.base(rev)
1198 base = self.base(rev)
1194 dist = l + offset - self.start(base)
1199 dist = l + offset - self.start(base)
1195 return dist, l, data, base
1200 return dist, l, data, base
1196
1201
1197 curr = len(self)
1202 curr = len(self)
1198 prev = curr - 1
1203 prev = curr - 1
1199 base = curr
1204 base = curr
1200 offset = self.end(prev)
1205 offset = self.end(prev)
1201 flags = 0
1206 flags = 0
1202 d = None
1207 d = None
1203 p1r, p2r = self.rev(p1), self.rev(p2)
1208 p1r, p2r = self.rev(p1), self.rev(p2)
1204
1209
1205 # should we try to build a delta?
1210 # should we try to build a delta?
1206 if prev != nullrev:
1211 if prev != nullrev:
1207 d = builddelta(prev)
1212 d = builddelta(prev)
1208 if self._parentdelta and prev != p1r:
1213 if self._parentdelta and prev != p1r:
1209 d2 = builddelta(p1r)
1214 d2 = builddelta(p1r)
1210 if d2 < d:
1215 if d2 < d:
1211 d = d2
1216 d = d2
1212 flags = REVIDX_PARENTDELTA
1217 flags = REVIDX_PARENTDELTA
1213 dist, l, data, base = d
1218 dist, l, data, base = d
1214
1219
1215 # full versions are inserted when the needed deltas
1220 # full versions are inserted when the needed deltas
1216 # become comparable to the uncompressed text
1221 # become comparable to the uncompressed text
1217 # or the base revision is punched
1222 # or the base revision is punched
1218 if text is None:
1223 if text is None:
1219 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1224 textlen = mdiff.patchedsize(self.rawsize(cachedelta[0]),
1220 cachedelta[1])
1225 cachedelta[1])
1221 else:
1226 else:
1222 textlen = len(text)
1227 textlen = len(text)
1223 if (d is None or dist > textlen * 2 or
1228 if (d is None or dist > textlen * 2 or
1224 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1229 (self.flags(base) & REVIDX_PUNCHED_FLAG)):
1225 text = buildtext()
1230 text = buildtext()
1226 data = compress(text)
1231 data = compress(text)
1227 l = len(data[1]) + len(data[0])
1232 l = len(data[1]) + len(data[0])
1228 base = curr
1233 base = curr
1229
1234
1230 e = (offset_type(offset, flags), l, textlen,
1235 e = (offset_type(offset, flags), l, textlen,
1231 base, link, p1r, p2r, node)
1236 base, link, p1r, p2r, node)
1232 self.index.insert(-1, e)
1237 self.index.insert(-1, e)
1233 self.nodemap[node] = curr
1238 self.nodemap[node] = curr
1234
1239
1235 entry = self._io.packentry(e, self.node, self.version, curr)
1240 entry = self._io.packentry(e, self.node, self.version, curr)
1236 if not self._inline:
1241 if not self._inline:
1237 transaction.add(self.datafile, offset)
1242 transaction.add(self.datafile, offset)
1238 transaction.add(self.indexfile, curr * len(entry))
1243 transaction.add(self.indexfile, curr * len(entry))
1239 if data[0]:
1244 if data[0]:
1240 dfh.write(data[0])
1245 dfh.write(data[0])
1241 dfh.write(data[1])
1246 dfh.write(data[1])
1242 dfh.flush()
1247 dfh.flush()
1243 ifh.write(entry)
1248 ifh.write(entry)
1244 else:
1249 else:
1245 offset += curr * self._io.size
1250 offset += curr * self._io.size
1246 transaction.add(self.indexfile, offset, curr)
1251 transaction.add(self.indexfile, offset, curr)
1247 ifh.write(entry)
1252 ifh.write(entry)
1248 ifh.write(data[0])
1253 ifh.write(data[0])
1249 ifh.write(data[1])
1254 ifh.write(data[1])
1250 self.checkinlinesize(transaction, ifh)
1255 self.checkinlinesize(transaction, ifh)
1251
1256
1252 if type(text) == str: # only accept immutable objects
1257 if type(text) == str: # only accept immutable objects
1253 self._cache = (node, curr, text)
1258 self._cache = (node, curr, text)
1254 return node
1259 return node
1255
1260
1256 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1261 def group(self, nodelist, lookup, infocollect=None, fullrev=False):
1257 """Calculate a delta group, yielding a sequence of changegroup chunks
1262 """Calculate a delta group, yielding a sequence of changegroup chunks
1258 (strings).
1263 (strings).
1259
1264
1260 Given a list of changeset revs, return a set of deltas and
1265 Given a list of changeset revs, return a set of deltas and
1261 metadata corresponding to nodes. The first delta is
1266 metadata corresponding to nodes. The first delta is
1262 first parent(nodelist[0]) -> nodelist[0], the receiver is
1267 first parent(nodelist[0]) -> nodelist[0], the receiver is
1263 guaranteed to have this parent as it has all history before
1268 guaranteed to have this parent as it has all history before
1264 these changesets. In the case firstparent is nullrev the
1269 these changesets. In the case firstparent is nullrev the
1265 changegroup starts with a full revision.
1270 changegroup starts with a full revision.
1266 fullrev forces the insertion of the full revision, necessary
1271 fullrev forces the insertion of the full revision, necessary
1267 in the case of shallow clones where the first parent might
1272 in the case of shallow clones where the first parent might
1268 not exist at the reciever.
1273 not exist at the reciever.
1269 """
1274 """
1270
1275
1271 revs = [self.rev(n) for n in nodelist]
1276 revs = [self.rev(n) for n in nodelist]
1272
1277
1273 # if we don't have any revisions touched by these changesets, bail
1278 # if we don't have any revisions touched by these changesets, bail
1274 if not revs:
1279 if not revs:
1275 yield changegroup.closechunk()
1280 yield changegroup.closechunk()
1276 return
1281 return
1277
1282
1278 # add the parent of the first rev
1283 # add the parent of the first rev
1279 p = self.parentrevs(revs[0])[0]
1284 p = self.parentrevs(revs[0])[0]
1280 revs.insert(0, p)
1285 revs.insert(0, p)
1281 if p == nullrev:
1286 if p == nullrev:
1282 fullrev = True
1287 fullrev = True
1283
1288
1284 # build deltas
1289 # build deltas
1285 for d in xrange(len(revs) - 1):
1290 for d in xrange(len(revs) - 1):
1286 a, b = revs[d], revs[d + 1]
1291 a, b = revs[d], revs[d + 1]
1287 nb = self.node(b)
1292 nb = self.node(b)
1288
1293
1289 if infocollect is not None:
1294 if infocollect is not None:
1290 infocollect(nb)
1295 infocollect(nb)
1291
1296
1292 p = self.parents(nb)
1297 p = self.parents(nb)
1293 meta = nb + p[0] + p[1] + lookup(nb)
1298 meta = nb + p[0] + p[1] + lookup(nb)
1294 if fullrev:
1299 if fullrev:
1295 d = self.revision(nb)
1300 d = self.revision(nb)
1296 meta += mdiff.trivialdiffheader(len(d))
1301 meta += mdiff.trivialdiffheader(len(d))
1297 fullrev = False
1302 fullrev = False
1298 else:
1303 else:
1299 d = self.revdiff(a, b)
1304 d = self.revdiff(a, b)
1300 yield changegroup.chunkheader(len(meta) + len(d))
1305 yield changegroup.chunkheader(len(meta) + len(d))
1301 yield meta
1306 yield meta
1302 yield d
1307 yield d
1303
1308
1304 yield changegroup.closechunk()
1309 yield changegroup.closechunk()
1305
1310
1306 def addgroup(self, bundle, linkmapper, transaction):
1311 def addgroup(self, bundle, linkmapper, transaction):
1307 """
1312 """
1308 add a delta group
1313 add a delta group
1309
1314
1310 given a set of deltas, add them to the revision log. the
1315 given a set of deltas, add them to the revision log. the
1311 first delta is against its parent, which should be in our
1316 first delta is against its parent, which should be in our
1312 log, the rest are against the previous delta.
1317 log, the rest are against the previous delta.
1313 """
1318 """
1314
1319
1315 # track the base of the current delta log
1320 # track the base of the current delta log
1316 node = None
1321 node = None
1317
1322
1318 r = len(self)
1323 r = len(self)
1319 end = 0
1324 end = 0
1320 if r:
1325 if r:
1321 end = self.end(r - 1)
1326 end = self.end(r - 1)
1322 ifh = self.opener(self.indexfile, "a+")
1327 ifh = self.opener(self.indexfile, "a+")
1323 isize = r * self._io.size
1328 isize = r * self._io.size
1324 if self._inline:
1329 if self._inline:
1325 transaction.add(self.indexfile, end + isize, r)
1330 transaction.add(self.indexfile, end + isize, r)
1326 dfh = None
1331 dfh = None
1327 else:
1332 else:
1328 transaction.add(self.indexfile, isize, r)
1333 transaction.add(self.indexfile, isize, r)
1329 transaction.add(self.datafile, end)
1334 transaction.add(self.datafile, end)
1330 dfh = self.opener(self.datafile, "a")
1335 dfh = self.opener(self.datafile, "a")
1331
1336
1332 try:
1337 try:
1333 # loop through our set of deltas
1338 # loop through our set of deltas
1334 chain = None
1339 chain = None
1335 while 1:
1340 while 1:
1336 chunkdata = bundle.parsechunk()
1341 chunkdata = bundle.parsechunk()
1337 if not chunkdata:
1342 if not chunkdata:
1338 break
1343 break
1339 node = chunkdata['node']
1344 node = chunkdata['node']
1340 p1 = chunkdata['p1']
1345 p1 = chunkdata['p1']
1341 p2 = chunkdata['p2']
1346 p2 = chunkdata['p2']
1342 cs = chunkdata['cs']
1347 cs = chunkdata['cs']
1343 delta = chunkdata['data']
1348 delta = chunkdata['data']
1344
1349
1345 link = linkmapper(cs)
1350 link = linkmapper(cs)
1346 if (node in self.nodemap and
1351 if (node in self.nodemap and
1347 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1352 (not self.flags(self.rev(node)) & REVIDX_PUNCHED_FLAG)):
1348 # this can happen if two branches make the same change
1353 # this can happen if two branches make the same change
1349 chain = node
1354 chain = node
1350 continue
1355 continue
1351
1356
1352 for p in (p1, p2):
1357 for p in (p1, p2):
1353 if not p in self.nodemap:
1358 if not p in self.nodemap:
1354 if self._shallow:
1359 if self._shallow:
1355 # add null entries for missing parents
1360 # add null entries for missing parents
1356 # XXX FIXME
1361 # XXX FIXME
1357 #if base == nullrev:
1362 #if base == nullrev:
1358 # base = len(self)
1363 # base = len(self)
1359 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1364 #e = (offset_type(end, REVIDX_PUNCHED_FLAG),
1360 # 0, 0, base, nullrev, nullrev, nullrev, p)
1365 # 0, 0, base, nullrev, nullrev, nullrev, p)
1361 #self.index.insert(-1, e)
1366 #self.index.insert(-1, e)
1362 #self.nodemap[p] = r
1367 #self.nodemap[p] = r
1363 #entry = self._io.packentry(e, self.node,
1368 #entry = self._io.packentry(e, self.node,
1364 # self.version, r)
1369 # self.version, r)
1365 #ifh.write(entry)
1370 #ifh.write(entry)
1366 #t, r = r, r + 1
1371 #t, r = r, r + 1
1367 raise LookupError(p, self.indexfile,
1372 raise LookupError(p, self.indexfile,
1368 _('unknown parent'))
1373 _('unknown parent'))
1369 else:
1374 else:
1370 raise LookupError(p, self.indexfile,
1375 raise LookupError(p, self.indexfile,
1371 _('unknown parent'))
1376 _('unknown parent'))
1372
1377
1373 if not chain:
1378 if not chain:
1374 # retrieve the parent revision of the delta chain
1379 # retrieve the parent revision of the delta chain
1375 chain = p1
1380 chain = p1
1376 if not chain in self.nodemap:
1381 if not chain in self.nodemap:
1377 raise LookupError(chain, self.indexfile, _('unknown base'))
1382 raise LookupError(chain, self.indexfile, _('unknown base'))
1378
1383
1379 chainrev = self.rev(chain)
1384 chainrev = self.rev(chain)
1380 chain = self._addrevision(node, None, transaction, link,
1385 chain = self._addrevision(node, None, transaction, link,
1381 p1, p2, (chainrev, delta), ifh, dfh)
1386 p1, p2, (chainrev, delta), ifh, dfh)
1382 if not dfh and not self._inline:
1387 if not dfh and not self._inline:
1383 # addrevision switched from inline to conventional
1388 # addrevision switched from inline to conventional
1384 # reopen the index
1389 # reopen the index
1385 dfh = self.opener(self.datafile, "a")
1390 dfh = self.opener(self.datafile, "a")
1386 ifh = self.opener(self.indexfile, "a")
1391 ifh = self.opener(self.indexfile, "a")
1387 finally:
1392 finally:
1388 if dfh:
1393 if dfh:
1389 dfh.close()
1394 dfh.close()
1390 ifh.close()
1395 ifh.close()
1391
1396
1392 return node
1397 return node
1393
1398
1394 def strip(self, minlink, transaction):
1399 def strip(self, minlink, transaction):
1395 """truncate the revlog on the first revision with a linkrev >= minlink
1400 """truncate the revlog on the first revision with a linkrev >= minlink
1396
1401
1397 This function is called when we're stripping revision minlink and
1402 This function is called when we're stripping revision minlink and
1398 its descendants from the repository.
1403 its descendants from the repository.
1399
1404
1400 We have to remove all revisions with linkrev >= minlink, because
1405 We have to remove all revisions with linkrev >= minlink, because
1401 the equivalent changelog revisions will be renumbered after the
1406 the equivalent changelog revisions will be renumbered after the
1402 strip.
1407 strip.
1403
1408
1404 So we truncate the revlog on the first of these revisions, and
1409 So we truncate the revlog on the first of these revisions, and
1405 trust that the caller has saved the revisions that shouldn't be
1410 trust that the caller has saved the revisions that shouldn't be
1406 removed and that it'll readd them after this truncation.
1411 removed and that it'll readd them after this truncation.
1407 """
1412 """
1408 if len(self) == 0:
1413 if len(self) == 0:
1409 return
1414 return
1410
1415
1411 if isinstance(self.index, lazyindex):
1416 if isinstance(self.index, lazyindex):
1412 self._loadindexmap()
1417 self._loadindexmap()
1413
1418
1414 for rev in self:
1419 for rev in self:
1415 if self.index[rev][4] >= minlink:
1420 if self.index[rev][4] >= minlink:
1416 break
1421 break
1417 else:
1422 else:
1418 return
1423 return
1419
1424
1420 # first truncate the files on disk
1425 # first truncate the files on disk
1421 end = self.start(rev)
1426 end = self.start(rev)
1422 if not self._inline:
1427 if not self._inline:
1423 transaction.add(self.datafile, end)
1428 transaction.add(self.datafile, end)
1424 end = rev * self._io.size
1429 end = rev * self._io.size
1425 else:
1430 else:
1426 end += rev * self._io.size
1431 end += rev * self._io.size
1427
1432
1428 transaction.add(self.indexfile, end)
1433 transaction.add(self.indexfile, end)
1429
1434
1430 # then reset internal state in memory to forget those revisions
1435 # then reset internal state in memory to forget those revisions
1431 self._cache = None
1436 self._cache = None
1432 self._chunkclear()
1437 self._chunkclear()
1433 for x in xrange(rev, len(self)):
1438 for x in xrange(rev, len(self)):
1434 del self.nodemap[self.node(x)]
1439 del self.nodemap[self.node(x)]
1435
1440
1436 del self.index[rev:-1]
1441 del self.index[rev:-1]
1437
1442
1438 def checksize(self):
1443 def checksize(self):
1439 expected = 0
1444 expected = 0
1440 if len(self):
1445 if len(self):
1441 expected = max(0, self.end(len(self) - 1))
1446 expected = max(0, self.end(len(self) - 1))
1442
1447
1443 try:
1448 try:
1444 f = self.opener(self.datafile)
1449 f = self.opener(self.datafile)
1445 f.seek(0, 2)
1450 f.seek(0, 2)
1446 actual = f.tell()
1451 actual = f.tell()
1447 dd = actual - expected
1452 dd = actual - expected
1448 except IOError, inst:
1453 except IOError, inst:
1449 if inst.errno != errno.ENOENT:
1454 if inst.errno != errno.ENOENT:
1450 raise
1455 raise
1451 dd = 0
1456 dd = 0
1452
1457
1453 try:
1458 try:
1454 f = self.opener(self.indexfile)
1459 f = self.opener(self.indexfile)
1455 f.seek(0, 2)
1460 f.seek(0, 2)
1456 actual = f.tell()
1461 actual = f.tell()
1457 s = self._io.size
1462 s = self._io.size
1458 i = max(0, actual // s)
1463 i = max(0, actual // s)
1459 di = actual - (i * s)
1464 di = actual - (i * s)
1460 if self._inline:
1465 if self._inline:
1461 databytes = 0
1466 databytes = 0
1462 for r in self:
1467 for r in self:
1463 databytes += max(0, self.length(r))
1468 databytes += max(0, self.length(r))
1464 dd = 0
1469 dd = 0
1465 di = actual - len(self) * s - databytes
1470 di = actual - len(self) * s - databytes
1466 except IOError, inst:
1471 except IOError, inst:
1467 if inst.errno != errno.ENOENT:
1472 if inst.errno != errno.ENOENT:
1468 raise
1473 raise
1469 di = 0
1474 di = 0
1470
1475
1471 return (dd, di)
1476 return (dd, di)
1472
1477
1473 def files(self):
1478 def files(self):
1474 res = [self.indexfile]
1479 res = [self.indexfile]
1475 if not self._inline:
1480 if not self._inline:
1476 res.append(self.datafile)
1481 res.append(self.datafile)
1477 return res
1482 return res
General Comments 0
You need to be logged in to leave comments. Login now