##// END OF EJS Templates
raise RevlogError when parser can't parse the revlog index...
Benoit Boissinot -
r8016:baaa832f default
parent child Browse files
Show More
@@ -1,1360 +1,1363 b''
1 """
1 """
2 revlog.py - storage back-end for mercurial
2 revlog.py - storage back-end for mercurial
3
3
4 This provides efficient delta storage with O(1) retrieve and append
4 This provides efficient delta storage with O(1) retrieve and append
5 and O(changes) merge between branches
5 and O(changes) merge between branches
6
6
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
7 Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
8
8
9 This software may be used and distributed according to the terms
9 This software may be used and distributed according to the terms
10 of the GNU General Public License, incorporated herein by reference.
10 of the GNU General Public License, incorporated herein by reference.
11 """
11 """
12
12
13 from node import bin, hex, nullid, nullrev, short
13 from node import bin, hex, nullid, nullrev, short
14 from i18n import _
14 from i18n import _
15 import changegroup, errno, ancestor, mdiff, parsers
15 import changegroup, errno, ancestor, mdiff, parsers
16 import struct, util, zlib, error
16 import struct, util, zlib, error
17
17
18 _pack = struct.pack
18 _pack = struct.pack
19 _unpack = struct.unpack
19 _unpack = struct.unpack
20 _compress = zlib.compress
20 _compress = zlib.compress
21 _decompress = zlib.decompress
21 _decompress = zlib.decompress
22 _sha = util.sha1
22 _sha = util.sha1
23
23
24 # revlog flags
24 # revlog flags
25 REVLOGV0 = 0
25 REVLOGV0 = 0
26 REVLOGNG = 1
26 REVLOGNG = 1
27 REVLOGNGINLINEDATA = (1 << 16)
27 REVLOGNGINLINEDATA = (1 << 16)
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
28 REVLOG_DEFAULT_FLAGS = REVLOGNGINLINEDATA
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
29 REVLOG_DEFAULT_FORMAT = REVLOGNG
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
30 REVLOG_DEFAULT_VERSION = REVLOG_DEFAULT_FORMAT | REVLOG_DEFAULT_FLAGS
31
31
32 RevlogError = error.RevlogError
32 RevlogError = error.RevlogError
33 LookupError = error.LookupError
33 LookupError = error.LookupError
34
34
35 def getoffset(q):
35 def getoffset(q):
36 return int(q >> 16)
36 return int(q >> 16)
37
37
38 def gettype(q):
38 def gettype(q):
39 return int(q & 0xFFFF)
39 return int(q & 0xFFFF)
40
40
41 def offset_type(offset, type):
41 def offset_type(offset, type):
42 return long(long(offset) << 16 | type)
42 return long(long(offset) << 16 | type)
43
43
44 def hash(text, p1, p2):
44 def hash(text, p1, p2):
45 """generate a hash from the given text and its parent hashes
45 """generate a hash from the given text and its parent hashes
46
46
47 This hash combines both the current file contents and its history
47 This hash combines both the current file contents and its history
48 in a manner that makes it easy to distinguish nodes with the same
48 in a manner that makes it easy to distinguish nodes with the same
49 content in the revision graph.
49 content in the revision graph.
50 """
50 """
51 l = [p1, p2]
51 l = [p1, p2]
52 l.sort()
52 l.sort()
53 s = _sha(l[0])
53 s = _sha(l[0])
54 s.update(l[1])
54 s.update(l[1])
55 s.update(text)
55 s.update(text)
56 return s.digest()
56 return s.digest()
57
57
58 def compress(text):
58 def compress(text):
59 """ generate a possibly-compressed representation of text """
59 """ generate a possibly-compressed representation of text """
60 if not text:
60 if not text:
61 return ("", text)
61 return ("", text)
62 l = len(text)
62 l = len(text)
63 bin = None
63 bin = None
64 if l < 44:
64 if l < 44:
65 pass
65 pass
66 elif l > 1000000:
66 elif l > 1000000:
67 # zlib makes an internal copy, thus doubling memory usage for
67 # zlib makes an internal copy, thus doubling memory usage for
68 # large files, so lets do this in pieces
68 # large files, so lets do this in pieces
69 z = zlib.compressobj()
69 z = zlib.compressobj()
70 p = []
70 p = []
71 pos = 0
71 pos = 0
72 while pos < l:
72 while pos < l:
73 pos2 = pos + 2**20
73 pos2 = pos + 2**20
74 p.append(z.compress(text[pos:pos2]))
74 p.append(z.compress(text[pos:pos2]))
75 pos = pos2
75 pos = pos2
76 p.append(z.flush())
76 p.append(z.flush())
77 if sum(map(len, p)) < l:
77 if sum(map(len, p)) < l:
78 bin = "".join(p)
78 bin = "".join(p)
79 else:
79 else:
80 bin = _compress(text)
80 bin = _compress(text)
81 if bin is None or len(bin) > l:
81 if bin is None or len(bin) > l:
82 if text[0] == '\0':
82 if text[0] == '\0':
83 return ("", text)
83 return ("", text)
84 return ('u', text)
84 return ('u', text)
85 return ("", bin)
85 return ("", bin)
86
86
87 def decompress(bin):
87 def decompress(bin):
88 """ decompress the given input """
88 """ decompress the given input """
89 if not bin:
89 if not bin:
90 return bin
90 return bin
91 t = bin[0]
91 t = bin[0]
92 if t == '\0':
92 if t == '\0':
93 return bin
93 return bin
94 if t == 'x':
94 if t == 'x':
95 return _decompress(bin)
95 return _decompress(bin)
96 if t == 'u':
96 if t == 'u':
97 return bin[1:]
97 return bin[1:]
98 raise RevlogError(_("unknown compression type %r") % t)
98 raise RevlogError(_("unknown compression type %r") % t)
99
99
100 class lazyparser(object):
100 class lazyparser(object):
101 """
101 """
102 this class avoids the need to parse the entirety of large indices
102 this class avoids the need to parse the entirety of large indices
103 """
103 """
104
104
105 # lazyparser is not safe to use on windows if win32 extensions not
105 # lazyparser is not safe to use on windows if win32 extensions not
106 # available. it keeps file handle open, which make it not possible
106 # available. it keeps file handle open, which make it not possible
107 # to break hardlinks on local cloned repos.
107 # to break hardlinks on local cloned repos.
108
108
109 def __init__(self, dataf, size):
109 def __init__(self, dataf, size):
110 self.dataf = dataf
110 self.dataf = dataf
111 self.s = struct.calcsize(indexformatng)
111 self.s = struct.calcsize(indexformatng)
112 self.datasize = size
112 self.datasize = size
113 self.l = size/self.s
113 self.l = size/self.s
114 self.index = [None] * self.l
114 self.index = [None] * self.l
115 self.map = {nullid: nullrev}
115 self.map = {nullid: nullrev}
116 self.allmap = 0
116 self.allmap = 0
117 self.all = 0
117 self.all = 0
118 self.mapfind_count = 0
118 self.mapfind_count = 0
119
119
120 def loadmap(self):
120 def loadmap(self):
121 """
121 """
122 during a commit, we need to make sure the rev being added is
122 during a commit, we need to make sure the rev being added is
123 not a duplicate. This requires loading the entire index,
123 not a duplicate. This requires loading the entire index,
124 which is fairly slow. loadmap can load up just the node map,
124 which is fairly slow. loadmap can load up just the node map,
125 which takes much less time.
125 which takes much less time.
126 """
126 """
127 if self.allmap:
127 if self.allmap:
128 return
128 return
129 end = self.datasize
129 end = self.datasize
130 self.allmap = 1
130 self.allmap = 1
131 cur = 0
131 cur = 0
132 count = 0
132 count = 0
133 blocksize = self.s * 256
133 blocksize = self.s * 256
134 self.dataf.seek(0)
134 self.dataf.seek(0)
135 while cur < end:
135 while cur < end:
136 data = self.dataf.read(blocksize)
136 data = self.dataf.read(blocksize)
137 off = 0
137 off = 0
138 for x in xrange(256):
138 for x in xrange(256):
139 n = data[off + ngshaoffset:off + ngshaoffset + 20]
139 n = data[off + ngshaoffset:off + ngshaoffset + 20]
140 self.map[n] = count
140 self.map[n] = count
141 count += 1
141 count += 1
142 if count >= self.l:
142 if count >= self.l:
143 break
143 break
144 off += self.s
144 off += self.s
145 cur += blocksize
145 cur += blocksize
146
146
147 def loadblock(self, blockstart, blocksize, data=None):
147 def loadblock(self, blockstart, blocksize, data=None):
148 if self.all:
148 if self.all:
149 return
149 return
150 if data is None:
150 if data is None:
151 self.dataf.seek(blockstart)
151 self.dataf.seek(blockstart)
152 if blockstart + blocksize > self.datasize:
152 if blockstart + blocksize > self.datasize:
153 # the revlog may have grown since we've started running,
153 # the revlog may have grown since we've started running,
154 # but we don't have space in self.index for more entries.
154 # but we don't have space in self.index for more entries.
155 # limit blocksize so that we don't get too much data.
155 # limit blocksize so that we don't get too much data.
156 blocksize = max(self.datasize - blockstart, 0)
156 blocksize = max(self.datasize - blockstart, 0)
157 data = self.dataf.read(blocksize)
157 data = self.dataf.read(blocksize)
158 lend = len(data) / self.s
158 lend = len(data) / self.s
159 i = blockstart / self.s
159 i = blockstart / self.s
160 off = 0
160 off = 0
161 # lazyindex supports __delitem__
161 # lazyindex supports __delitem__
162 if lend > len(self.index) - i:
162 if lend > len(self.index) - i:
163 lend = len(self.index) - i
163 lend = len(self.index) - i
164 for x in xrange(lend):
164 for x in xrange(lend):
165 if self.index[i + x] == None:
165 if self.index[i + x] == None:
166 b = data[off : off + self.s]
166 b = data[off : off + self.s]
167 self.index[i + x] = b
167 self.index[i + x] = b
168 n = b[ngshaoffset:ngshaoffset + 20]
168 n = b[ngshaoffset:ngshaoffset + 20]
169 self.map[n] = i + x
169 self.map[n] = i + x
170 off += self.s
170 off += self.s
171
171
172 def findnode(self, node):
172 def findnode(self, node):
173 """search backwards through the index file for a specific node"""
173 """search backwards through the index file for a specific node"""
174 if self.allmap:
174 if self.allmap:
175 return None
175 return None
176
176
177 # hg log will cause many many searches for the manifest
177 # hg log will cause many many searches for the manifest
178 # nodes. After we get called a few times, just load the whole
178 # nodes. After we get called a few times, just load the whole
179 # thing.
179 # thing.
180 if self.mapfind_count > 8:
180 if self.mapfind_count > 8:
181 self.loadmap()
181 self.loadmap()
182 if node in self.map:
182 if node in self.map:
183 return node
183 return node
184 return None
184 return None
185 self.mapfind_count += 1
185 self.mapfind_count += 1
186 last = self.l - 1
186 last = self.l - 1
187 while self.index[last] != None:
187 while self.index[last] != None:
188 if last == 0:
188 if last == 0:
189 self.all = 1
189 self.all = 1
190 self.allmap = 1
190 self.allmap = 1
191 return None
191 return None
192 last -= 1
192 last -= 1
193 end = (last + 1) * self.s
193 end = (last + 1) * self.s
194 blocksize = self.s * 256
194 blocksize = self.s * 256
195 while end >= 0:
195 while end >= 0:
196 start = max(end - blocksize, 0)
196 start = max(end - blocksize, 0)
197 self.dataf.seek(start)
197 self.dataf.seek(start)
198 data = self.dataf.read(end - start)
198 data = self.dataf.read(end - start)
199 findend = end - start
199 findend = end - start
200 while True:
200 while True:
201 # we're searching backwards, so we have to make sure
201 # we're searching backwards, so we have to make sure
202 # we don't find a changeset where this node is a parent
202 # we don't find a changeset where this node is a parent
203 off = data.find(node, 0, findend)
203 off = data.find(node, 0, findend)
204 findend = off
204 findend = off
205 if off >= 0:
205 if off >= 0:
206 i = off / self.s
206 i = off / self.s
207 off = i * self.s
207 off = i * self.s
208 n = data[off + ngshaoffset:off + ngshaoffset + 20]
208 n = data[off + ngshaoffset:off + ngshaoffset + 20]
209 if n == node:
209 if n == node:
210 self.map[n] = i + start / self.s
210 self.map[n] = i + start / self.s
211 return node
211 return node
212 else:
212 else:
213 break
213 break
214 end -= blocksize
214 end -= blocksize
215 return None
215 return None
216
216
217 def loadindex(self, i=None, end=None):
217 def loadindex(self, i=None, end=None):
218 if self.all:
218 if self.all:
219 return
219 return
220 all = False
220 all = False
221 if i == None:
221 if i == None:
222 blockstart = 0
222 blockstart = 0
223 blocksize = (65536 / self.s) * self.s
223 blocksize = (65536 / self.s) * self.s
224 end = self.datasize
224 end = self.datasize
225 all = True
225 all = True
226 else:
226 else:
227 if end:
227 if end:
228 blockstart = i * self.s
228 blockstart = i * self.s
229 end = end * self.s
229 end = end * self.s
230 blocksize = end - blockstart
230 blocksize = end - blockstart
231 else:
231 else:
232 blockstart = (i & ~1023) * self.s
232 blockstart = (i & ~1023) * self.s
233 blocksize = self.s * 1024
233 blocksize = self.s * 1024
234 end = blockstart + blocksize
234 end = blockstart + blocksize
235 while blockstart < end:
235 while blockstart < end:
236 self.loadblock(blockstart, blocksize)
236 self.loadblock(blockstart, blocksize)
237 blockstart += blocksize
237 blockstart += blocksize
238 if all:
238 if all:
239 self.all = True
239 self.all = True
240
240
241 class lazyindex(object):
241 class lazyindex(object):
242 """a lazy version of the index array"""
242 """a lazy version of the index array"""
243 def __init__(self, parser):
243 def __init__(self, parser):
244 self.p = parser
244 self.p = parser
245 def __len__(self):
245 def __len__(self):
246 return len(self.p.index)
246 return len(self.p.index)
247 def load(self, pos):
247 def load(self, pos):
248 if pos < 0:
248 if pos < 0:
249 pos += len(self.p.index)
249 pos += len(self.p.index)
250 self.p.loadindex(pos)
250 self.p.loadindex(pos)
251 return self.p.index[pos]
251 return self.p.index[pos]
252 def __getitem__(self, pos):
252 def __getitem__(self, pos):
253 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
253 return _unpack(indexformatng, self.p.index[pos] or self.load(pos))
254 def __setitem__(self, pos, item):
254 def __setitem__(self, pos, item):
255 self.p.index[pos] = _pack(indexformatng, *item)
255 self.p.index[pos] = _pack(indexformatng, *item)
256 def __delitem__(self, pos):
256 def __delitem__(self, pos):
257 del self.p.index[pos]
257 del self.p.index[pos]
258 def insert(self, pos, e):
258 def insert(self, pos, e):
259 self.p.index.insert(pos, _pack(indexformatng, *e))
259 self.p.index.insert(pos, _pack(indexformatng, *e))
260 def append(self, e):
260 def append(self, e):
261 self.p.index.append(_pack(indexformatng, *e))
261 self.p.index.append(_pack(indexformatng, *e))
262
262
263 class lazymap(object):
263 class lazymap(object):
264 """a lazy version of the node map"""
264 """a lazy version of the node map"""
265 def __init__(self, parser):
265 def __init__(self, parser):
266 self.p = parser
266 self.p = parser
267 def load(self, key):
267 def load(self, key):
268 n = self.p.findnode(key)
268 n = self.p.findnode(key)
269 if n == None:
269 if n == None:
270 raise KeyError(key)
270 raise KeyError(key)
271 def __contains__(self, key):
271 def __contains__(self, key):
272 if key in self.p.map:
272 if key in self.p.map:
273 return True
273 return True
274 self.p.loadmap()
274 self.p.loadmap()
275 return key in self.p.map
275 return key in self.p.map
276 def __iter__(self):
276 def __iter__(self):
277 yield nullid
277 yield nullid
278 for i in xrange(self.p.l):
278 for i in xrange(self.p.l):
279 ret = self.p.index[i]
279 ret = self.p.index[i]
280 if not ret:
280 if not ret:
281 self.p.loadindex(i)
281 self.p.loadindex(i)
282 ret = self.p.index[i]
282 ret = self.p.index[i]
283 if isinstance(ret, str):
283 if isinstance(ret, str):
284 ret = _unpack(indexformatng, ret)
284 ret = _unpack(indexformatng, ret)
285 yield ret[7]
285 yield ret[7]
286 def __getitem__(self, key):
286 def __getitem__(self, key):
287 try:
287 try:
288 return self.p.map[key]
288 return self.p.map[key]
289 except KeyError:
289 except KeyError:
290 try:
290 try:
291 self.load(key)
291 self.load(key)
292 return self.p.map[key]
292 return self.p.map[key]
293 except KeyError:
293 except KeyError:
294 raise KeyError("node " + hex(key))
294 raise KeyError("node " + hex(key))
295 def __setitem__(self, key, val):
295 def __setitem__(self, key, val):
296 self.p.map[key] = val
296 self.p.map[key] = val
297 def __delitem__(self, key):
297 def __delitem__(self, key):
298 del self.p.map[key]
298 del self.p.map[key]
299
299
300 indexformatv0 = ">4l20s20s20s"
300 indexformatv0 = ">4l20s20s20s"
301 v0shaoffset = 56
301 v0shaoffset = 56
302
302
303 class revlogoldio(object):
303 class revlogoldio(object):
304 def __init__(self):
304 def __init__(self):
305 self.size = struct.calcsize(indexformatv0)
305 self.size = struct.calcsize(indexformatv0)
306
306
307 def parseindex(self, fp, inline):
307 def parseindex(self, fp, inline):
308 s = self.size
308 s = self.size
309 index = []
309 index = []
310 nodemap = {nullid: nullrev}
310 nodemap = {nullid: nullrev}
311 n = off = 0
311 n = off = 0
312 data = fp.read()
312 data = fp.read()
313 l = len(data)
313 l = len(data)
314 while off + s <= l:
314 while off + s <= l:
315 cur = data[off:off + s]
315 cur = data[off:off + s]
316 off += s
316 off += s
317 e = _unpack(indexformatv0, cur)
317 e = _unpack(indexformatv0, cur)
318 # transform to revlogv1 format
318 # transform to revlogv1 format
319 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
319 e2 = (offset_type(e[0], 0), e[1], -1, e[2], e[3],
320 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
320 nodemap.get(e[4], nullrev), nodemap.get(e[5], nullrev), e[6])
321 index.append(e2)
321 index.append(e2)
322 nodemap[e[6]] = n
322 nodemap[e[6]] = n
323 n += 1
323 n += 1
324
324
325 return index, nodemap, None
325 return index, nodemap, None
326
326
327 def packentry(self, entry, node, version, rev):
327 def packentry(self, entry, node, version, rev):
328 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
328 e2 = (getoffset(entry[0]), entry[1], entry[3], entry[4],
329 node(entry[5]), node(entry[6]), entry[7])
329 node(entry[5]), node(entry[6]), entry[7])
330 return _pack(indexformatv0, *e2)
330 return _pack(indexformatv0, *e2)
331
331
332 # index ng:
332 # index ng:
333 # 6 bytes offset
333 # 6 bytes offset
334 # 2 bytes flags
334 # 2 bytes flags
335 # 4 bytes compressed length
335 # 4 bytes compressed length
336 # 4 bytes uncompressed length
336 # 4 bytes uncompressed length
337 # 4 bytes: base rev
337 # 4 bytes: base rev
338 # 4 bytes link rev
338 # 4 bytes link rev
339 # 4 bytes parent 1 rev
339 # 4 bytes parent 1 rev
340 # 4 bytes parent 2 rev
340 # 4 bytes parent 2 rev
341 # 32 bytes: nodeid
341 # 32 bytes: nodeid
342 indexformatng = ">Qiiiiii20s12x"
342 indexformatng = ">Qiiiiii20s12x"
343 ngshaoffset = 32
343 ngshaoffset = 32
344 versionformat = ">I"
344 versionformat = ">I"
345
345
346 class revlogio(object):
346 class revlogio(object):
347 def __init__(self):
347 def __init__(self):
348 self.size = struct.calcsize(indexformatng)
348 self.size = struct.calcsize(indexformatng)
349
349
350 def parseindex(self, fp, inline):
350 def parseindex(self, fp, inline):
351 try:
351 try:
352 size = util.fstat(fp).st_size
352 size = util.fstat(fp).st_size
353 except AttributeError:
353 except AttributeError:
354 size = 0
354 size = 0
355
355
356 if util.openhardlinks() and not inline and size > 1000000:
356 if util.openhardlinks() and not inline and size > 1000000:
357 # big index, let's parse it on demand
357 # big index, let's parse it on demand
358 parser = lazyparser(fp, size)
358 parser = lazyparser(fp, size)
359 index = lazyindex(parser)
359 index = lazyindex(parser)
360 nodemap = lazymap(parser)
360 nodemap = lazymap(parser)
361 e = list(index[0])
361 e = list(index[0])
362 type = gettype(e[0])
362 type = gettype(e[0])
363 e[0] = offset_type(0, type)
363 e[0] = offset_type(0, type)
364 index[0] = e
364 index[0] = e
365 return index, nodemap, None
365 return index, nodemap, None
366
366
367 data = fp.read()
367 data = fp.read()
368 # call the C implementation to parse the index data
368 # call the C implementation to parse the index data
369 index, nodemap, cache = parsers.parse_index(data, inline)
369 index, nodemap, cache = parsers.parse_index(data, inline)
370 return index, nodemap, cache
370 return index, nodemap, cache
371
371
372 def packentry(self, entry, node, version, rev):
372 def packentry(self, entry, node, version, rev):
373 p = _pack(indexformatng, *entry)
373 p = _pack(indexformatng, *entry)
374 if rev == 0:
374 if rev == 0:
375 p = _pack(versionformat, version) + p[4:]
375 p = _pack(versionformat, version) + p[4:]
376 return p
376 return p
377
377
378 class revlog(object):
378 class revlog(object):
379 """
379 """
380 the underlying revision storage object
380 the underlying revision storage object
381
381
382 A revlog consists of two parts, an index and the revision data.
382 A revlog consists of two parts, an index and the revision data.
383
383
384 The index is a file with a fixed record size containing
384 The index is a file with a fixed record size containing
385 information on each revision, including its nodeid (hash), the
385 information on each revision, including its nodeid (hash), the
386 nodeids of its parents, the position and offset of its data within
386 nodeids of its parents, the position and offset of its data within
387 the data file, and the revision it's based on. Finally, each entry
387 the data file, and the revision it's based on. Finally, each entry
388 contains a linkrev entry that can serve as a pointer to external
388 contains a linkrev entry that can serve as a pointer to external
389 data.
389 data.
390
390
391 The revision data itself is a linear collection of data chunks.
391 The revision data itself is a linear collection of data chunks.
392 Each chunk represents a revision and is usually represented as a
392 Each chunk represents a revision and is usually represented as a
393 delta against the previous chunk. To bound lookup time, runs of
393 delta against the previous chunk. To bound lookup time, runs of
394 deltas are limited to about 2 times the length of the original
394 deltas are limited to about 2 times the length of the original
395 version data. This makes retrieval of a version proportional to
395 version data. This makes retrieval of a version proportional to
396 its size, or O(1) relative to the number of revisions.
396 its size, or O(1) relative to the number of revisions.
397
397
398 Both pieces of the revlog are written to in an append-only
398 Both pieces of the revlog are written to in an append-only
399 fashion, which means we never need to rewrite a file to insert or
399 fashion, which means we never need to rewrite a file to insert or
400 remove data, and can use some simple techniques to avoid the need
400 remove data, and can use some simple techniques to avoid the need
401 for locking while reading.
401 for locking while reading.
402 """
402 """
403 def __init__(self, opener, indexfile):
403 def __init__(self, opener, indexfile):
404 """
404 """
405 create a revlog object
405 create a revlog object
406
406
407 opener is a function that abstracts the file opening operation
407 opener is a function that abstracts the file opening operation
408 and can be used to implement COW semantics or the like.
408 and can be used to implement COW semantics or the like.
409 """
409 """
410 self.indexfile = indexfile
410 self.indexfile = indexfile
411 self.datafile = indexfile[:-2] + ".d"
411 self.datafile = indexfile[:-2] + ".d"
412 self.opener = opener
412 self.opener = opener
413 self._cache = None
413 self._cache = None
414 self._chunkcache = None
414 self._chunkcache = None
415 self.nodemap = {nullid: nullrev}
415 self.nodemap = {nullid: nullrev}
416 self.index = []
416 self.index = []
417
417
418 v = REVLOG_DEFAULT_VERSION
418 v = REVLOG_DEFAULT_VERSION
419 if hasattr(opener, "defversion"):
419 if hasattr(opener, "defversion"):
420 v = opener.defversion
420 v = opener.defversion
421 if v & REVLOGNG:
421 if v & REVLOGNG:
422 v |= REVLOGNGINLINEDATA
422 v |= REVLOGNGINLINEDATA
423
423
424 i = ""
424 i = ""
425 try:
425 try:
426 f = self.opener(self.indexfile)
426 f = self.opener(self.indexfile)
427 i = f.read(4)
427 i = f.read(4)
428 f.seek(0)
428 f.seek(0)
429 if len(i) > 0:
429 if len(i) > 0:
430 v = struct.unpack(versionformat, i)[0]
430 v = struct.unpack(versionformat, i)[0]
431 except IOError, inst:
431 except IOError, inst:
432 if inst.errno != errno.ENOENT:
432 if inst.errno != errno.ENOENT:
433 raise
433 raise
434
434
435 self.version = v
435 self.version = v
436 self._inline = v & REVLOGNGINLINEDATA
436 self._inline = v & REVLOGNGINLINEDATA
437 flags = v & ~0xFFFF
437 flags = v & ~0xFFFF
438 fmt = v & 0xFFFF
438 fmt = v & 0xFFFF
439 if fmt == REVLOGV0 and flags:
439 if fmt == REVLOGV0 and flags:
440 raise RevlogError(_("index %s unknown flags %#04x for format v0")
440 raise RevlogError(_("index %s unknown flags %#04x for format v0")
441 % (self.indexfile, flags >> 16))
441 % (self.indexfile, flags >> 16))
442 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
442 elif fmt == REVLOGNG and flags & ~REVLOGNGINLINEDATA:
443 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
443 raise RevlogError(_("index %s unknown flags %#04x for revlogng")
444 % (self.indexfile, flags >> 16))
444 % (self.indexfile, flags >> 16))
445 elif fmt > REVLOGNG:
445 elif fmt > REVLOGNG:
446 raise RevlogError(_("index %s unknown format %d")
446 raise RevlogError(_("index %s unknown format %d")
447 % (self.indexfile, fmt))
447 % (self.indexfile, fmt))
448
448
449 self._io = revlogio()
449 self._io = revlogio()
450 if self.version == REVLOGV0:
450 if self.version == REVLOGV0:
451 self._io = revlogoldio()
451 self._io = revlogoldio()
452 if i:
452 if i:
453 d = self._io.parseindex(f, self._inline)
453 try:
454 d = self._io.parseindex(f, self._inline)
455 except (ValueError, IndexError), e:
456 raise RevlogError(_("index %s is corrupted") % (self.indexfile))
454 self.index, self.nodemap, self._chunkcache = d
457 self.index, self.nodemap, self._chunkcache = d
455
458
456 # add the magic null revision at -1 (if it hasn't been done already)
459 # add the magic null revision at -1 (if it hasn't been done already)
457 if (self.index == [] or isinstance(self.index, lazyindex) or
460 if (self.index == [] or isinstance(self.index, lazyindex) or
458 self.index[-1][7] != nullid) :
461 self.index[-1][7] != nullid) :
459 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
462 self.index.append((0, 0, 0, -1, -1, -1, -1, nullid))
460
463
461 def _loadindex(self, start, end):
464 def _loadindex(self, start, end):
462 """load a block of indexes all at once from the lazy parser"""
465 """load a block of indexes all at once from the lazy parser"""
463 if isinstance(self.index, lazyindex):
466 if isinstance(self.index, lazyindex):
464 self.index.p.loadindex(start, end)
467 self.index.p.loadindex(start, end)
465
468
466 def _loadindexmap(self):
469 def _loadindexmap(self):
467 """loads both the map and the index from the lazy parser"""
470 """loads both the map and the index from the lazy parser"""
468 if isinstance(self.index, lazyindex):
471 if isinstance(self.index, lazyindex):
469 p = self.index.p
472 p = self.index.p
470 p.loadindex()
473 p.loadindex()
471 self.nodemap = p.map
474 self.nodemap = p.map
472
475
473 def _loadmap(self):
476 def _loadmap(self):
474 """loads the map from the lazy parser"""
477 """loads the map from the lazy parser"""
475 if isinstance(self.nodemap, lazymap):
478 if isinstance(self.nodemap, lazymap):
476 self.nodemap.p.loadmap()
479 self.nodemap.p.loadmap()
477 self.nodemap = self.nodemap.p.map
480 self.nodemap = self.nodemap.p.map
478
481
479 def tip(self):
482 def tip(self):
480 return self.node(len(self.index) - 2)
483 return self.node(len(self.index) - 2)
481 def __len__(self):
484 def __len__(self):
482 return len(self.index) - 1
485 return len(self.index) - 1
483 def __iter__(self):
486 def __iter__(self):
484 for i in xrange(len(self)):
487 for i in xrange(len(self)):
485 yield i
488 yield i
486 def rev(self, node):
489 def rev(self, node):
487 try:
490 try:
488 return self.nodemap[node]
491 return self.nodemap[node]
489 except KeyError:
492 except KeyError:
490 raise LookupError(node, self.indexfile, _('no node'))
493 raise LookupError(node, self.indexfile, _('no node'))
491 def node(self, rev):
494 def node(self, rev):
492 return self.index[rev][7]
495 return self.index[rev][7]
493 def linkrev(self, rev):
496 def linkrev(self, rev):
494 return self.index[rev][4]
497 return self.index[rev][4]
495 def parents(self, node):
498 def parents(self, node):
496 i = self.index
499 i = self.index
497 d = i[self.rev(node)]
500 d = i[self.rev(node)]
498 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
501 return i[d[5]][7], i[d[6]][7] # map revisions to nodes inline
499 def parentrevs(self, rev):
502 def parentrevs(self, rev):
500 return self.index[rev][5:7]
503 return self.index[rev][5:7]
501 def start(self, rev):
504 def start(self, rev):
502 return int(self.index[rev][0] >> 16)
505 return int(self.index[rev][0] >> 16)
503 def end(self, rev):
506 def end(self, rev):
504 return self.start(rev) + self.length(rev)
507 return self.start(rev) + self.length(rev)
505 def length(self, rev):
508 def length(self, rev):
506 return self.index[rev][1]
509 return self.index[rev][1]
507 def base(self, rev):
510 def base(self, rev):
508 return self.index[rev][3]
511 return self.index[rev][3]
509
512
510 def size(self, rev):
513 def size(self, rev):
511 """return the length of the uncompressed text for a given revision"""
514 """return the length of the uncompressed text for a given revision"""
512 l = self.index[rev][2]
515 l = self.index[rev][2]
513 if l >= 0:
516 if l >= 0:
514 return l
517 return l
515
518
516 t = self.revision(self.node(rev))
519 t = self.revision(self.node(rev))
517 return len(t)
520 return len(t)
518
521
519 # alternate implementation, The advantage to this code is it
522 # alternate implementation, The advantage to this code is it
520 # will be faster for a single revision. But, the results are not
523 # will be faster for a single revision. But, the results are not
521 # cached, so finding the size of every revision will be slower.
524 # cached, so finding the size of every revision will be slower.
522 """
525 """
523 if self.cache and self.cache[1] == rev:
526 if self.cache and self.cache[1] == rev:
524 return len(self.cache[2])
527 return len(self.cache[2])
525
528
526 base = self.base(rev)
529 base = self.base(rev)
527 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
530 if self.cache and self.cache[1] >= base and self.cache[1] < rev:
528 base = self.cache[1]
531 base = self.cache[1]
529 text = self.cache[2]
532 text = self.cache[2]
530 else:
533 else:
531 text = self.revision(self.node(base))
534 text = self.revision(self.node(base))
532
535
533 l = len(text)
536 l = len(text)
534 for x in xrange(base + 1, rev + 1):
537 for x in xrange(base + 1, rev + 1):
535 l = mdiff.patchedsize(l, self.chunk(x))
538 l = mdiff.patchedsize(l, self.chunk(x))
536 return l
539 return l
537 """
540 """
538
541
539 def reachable(self, node, stop=None):
542 def reachable(self, node, stop=None):
540 """return a hash of all nodes ancestral to a given node, including
543 """return a hash of all nodes ancestral to a given node, including
541 the node itself, stopping when stop is matched"""
544 the node itself, stopping when stop is matched"""
542 reachable = {}
545 reachable = {}
543 visit = [node]
546 visit = [node]
544 reachable[node] = 1
547 reachable[node] = 1
545 if stop:
548 if stop:
546 stopn = self.rev(stop)
549 stopn = self.rev(stop)
547 else:
550 else:
548 stopn = 0
551 stopn = 0
549 while visit:
552 while visit:
550 n = visit.pop(0)
553 n = visit.pop(0)
551 if n == stop:
554 if n == stop:
552 continue
555 continue
553 if n == nullid:
556 if n == nullid:
554 continue
557 continue
555 for p in self.parents(n):
558 for p in self.parents(n):
556 if self.rev(p) < stopn:
559 if self.rev(p) < stopn:
557 continue
560 continue
558 if p not in reachable:
561 if p not in reachable:
559 reachable[p] = 1
562 reachable[p] = 1
560 visit.append(p)
563 visit.append(p)
561 return reachable
564 return reachable
562
565
563 def ancestors(self, *revs):
566 def ancestors(self, *revs):
564 'Generate the ancestors of revs using a breadth-first visit'
567 'Generate the ancestors of revs using a breadth-first visit'
565 visit = list(revs)
568 visit = list(revs)
566 seen = util.set([nullrev])
569 seen = util.set([nullrev])
567 while visit:
570 while visit:
568 for parent in self.parentrevs(visit.pop(0)):
571 for parent in self.parentrevs(visit.pop(0)):
569 if parent not in seen:
572 if parent not in seen:
570 visit.append(parent)
573 visit.append(parent)
571 seen.add(parent)
574 seen.add(parent)
572 yield parent
575 yield parent
573
576
574 def descendants(self, *revs):
577 def descendants(self, *revs):
575 'Generate the descendants of revs in topological order'
578 'Generate the descendants of revs in topological order'
576 seen = util.set(revs)
579 seen = util.set(revs)
577 for i in xrange(min(revs) + 1, len(self)):
580 for i in xrange(min(revs) + 1, len(self)):
578 for x in self.parentrevs(i):
581 for x in self.parentrevs(i):
579 if x != nullrev and x in seen:
582 if x != nullrev and x in seen:
580 seen.add(i)
583 seen.add(i)
581 yield i
584 yield i
582 break
585 break
583
586
584 def findmissing(self, common=None, heads=None):
587 def findmissing(self, common=None, heads=None):
585 '''
588 '''
586 returns the topologically sorted list of nodes from the set:
589 returns the topologically sorted list of nodes from the set:
587 missing = (ancestors(heads) \ ancestors(common))
590 missing = (ancestors(heads) \ ancestors(common))
588
591
589 where ancestors() is the set of ancestors from heads, heads included
592 where ancestors() is the set of ancestors from heads, heads included
590
593
591 if heads is None, the heads of the revlog are used
594 if heads is None, the heads of the revlog are used
592 if common is None, nullid is assumed to be a common node
595 if common is None, nullid is assumed to be a common node
593 '''
596 '''
594 if common is None:
597 if common is None:
595 common = [nullid]
598 common = [nullid]
596 if heads is None:
599 if heads is None:
597 heads = self.heads()
600 heads = self.heads()
598
601
599 common = [self.rev(n) for n in common]
602 common = [self.rev(n) for n in common]
600 heads = [self.rev(n) for n in heads]
603 heads = [self.rev(n) for n in heads]
601
604
602 # we want the ancestors, but inclusive
605 # we want the ancestors, but inclusive
603 has = dict.fromkeys(self.ancestors(*common))
606 has = dict.fromkeys(self.ancestors(*common))
604 has[nullrev] = None
607 has[nullrev] = None
605 for r in common:
608 for r in common:
606 has[r] = None
609 has[r] = None
607
610
608 # take all ancestors from heads that aren't in has
611 # take all ancestors from heads that aren't in has
609 missing = {}
612 missing = {}
610 visit = [r for r in heads if r not in has]
613 visit = [r for r in heads if r not in has]
611 while visit:
614 while visit:
612 r = visit.pop(0)
615 r = visit.pop(0)
613 if r in missing:
616 if r in missing:
614 continue
617 continue
615 else:
618 else:
616 missing[r] = None
619 missing[r] = None
617 for p in self.parentrevs(r):
620 for p in self.parentrevs(r):
618 if p not in has:
621 if p not in has:
619 visit.append(p)
622 visit.append(p)
620 missing = missing.keys()
623 missing = missing.keys()
621 missing.sort()
624 missing.sort()
622 return [self.node(r) for r in missing]
625 return [self.node(r) for r in missing]
623
626
624 def nodesbetween(self, roots=None, heads=None):
627 def nodesbetween(self, roots=None, heads=None):
625 """Return a tuple containing three elements. Elements 1 and 2 contain
628 """Return a tuple containing three elements. Elements 1 and 2 contain
626 a final list bases and heads after all the unreachable ones have been
629 a final list bases and heads after all the unreachable ones have been
627 pruned. Element 0 contains a topologically sorted list of all
630 pruned. Element 0 contains a topologically sorted list of all
628
631
629 nodes that satisfy these constraints:
632 nodes that satisfy these constraints:
630 1. All nodes must be descended from a node in roots (the nodes on
633 1. All nodes must be descended from a node in roots (the nodes on
631 roots are considered descended from themselves).
634 roots are considered descended from themselves).
632 2. All nodes must also be ancestors of a node in heads (the nodes in
635 2. All nodes must also be ancestors of a node in heads (the nodes in
633 heads are considered to be their own ancestors).
636 heads are considered to be their own ancestors).
634
637
635 If roots is unspecified, nullid is assumed as the only root.
638 If roots is unspecified, nullid is assumed as the only root.
636 If heads is unspecified, it is taken to be the output of the
639 If heads is unspecified, it is taken to be the output of the
637 heads method (i.e. a list of all nodes in the repository that
640 heads method (i.e. a list of all nodes in the repository that
638 have no children)."""
641 have no children)."""
639 nonodes = ([], [], [])
642 nonodes = ([], [], [])
640 if roots is not None:
643 if roots is not None:
641 roots = list(roots)
644 roots = list(roots)
642 if not roots:
645 if not roots:
643 return nonodes
646 return nonodes
644 lowestrev = min([self.rev(n) for n in roots])
647 lowestrev = min([self.rev(n) for n in roots])
645 else:
648 else:
646 roots = [nullid] # Everybody's a descendent of nullid
649 roots = [nullid] # Everybody's a descendent of nullid
647 lowestrev = nullrev
650 lowestrev = nullrev
648 if (lowestrev == nullrev) and (heads is None):
651 if (lowestrev == nullrev) and (heads is None):
649 # We want _all_ the nodes!
652 # We want _all_ the nodes!
650 return ([self.node(r) for r in self], [nullid], list(self.heads()))
653 return ([self.node(r) for r in self], [nullid], list(self.heads()))
651 if heads is None:
654 if heads is None:
652 # All nodes are ancestors, so the latest ancestor is the last
655 # All nodes are ancestors, so the latest ancestor is the last
653 # node.
656 # node.
654 highestrev = len(self) - 1
657 highestrev = len(self) - 1
655 # Set ancestors to None to signal that every node is an ancestor.
658 # Set ancestors to None to signal that every node is an ancestor.
656 ancestors = None
659 ancestors = None
657 # Set heads to an empty dictionary for later discovery of heads
660 # Set heads to an empty dictionary for later discovery of heads
658 heads = {}
661 heads = {}
659 else:
662 else:
660 heads = list(heads)
663 heads = list(heads)
661 if not heads:
664 if not heads:
662 return nonodes
665 return nonodes
663 ancestors = {}
666 ancestors = {}
664 # Turn heads into a dictionary so we can remove 'fake' heads.
667 # Turn heads into a dictionary so we can remove 'fake' heads.
665 # Also, later we will be using it to filter out the heads we can't
668 # Also, later we will be using it to filter out the heads we can't
666 # find from roots.
669 # find from roots.
667 heads = dict.fromkeys(heads, 0)
670 heads = dict.fromkeys(heads, 0)
668 # Start at the top and keep marking parents until we're done.
671 # Start at the top and keep marking parents until we're done.
669 nodestotag = heads.keys()
672 nodestotag = heads.keys()
670 # Remember where the top was so we can use it as a limit later.
673 # Remember where the top was so we can use it as a limit later.
671 highestrev = max([self.rev(n) for n in nodestotag])
674 highestrev = max([self.rev(n) for n in nodestotag])
672 while nodestotag:
675 while nodestotag:
673 # grab a node to tag
676 # grab a node to tag
674 n = nodestotag.pop()
677 n = nodestotag.pop()
675 # Never tag nullid
678 # Never tag nullid
676 if n == nullid:
679 if n == nullid:
677 continue
680 continue
678 # A node's revision number represents its place in a
681 # A node's revision number represents its place in a
679 # topologically sorted list of nodes.
682 # topologically sorted list of nodes.
680 r = self.rev(n)
683 r = self.rev(n)
681 if r >= lowestrev:
684 if r >= lowestrev:
682 if n not in ancestors:
685 if n not in ancestors:
683 # If we are possibly a descendent of one of the roots
686 # If we are possibly a descendent of one of the roots
684 # and we haven't already been marked as an ancestor
687 # and we haven't already been marked as an ancestor
685 ancestors[n] = 1 # Mark as ancestor
688 ancestors[n] = 1 # Mark as ancestor
686 # Add non-nullid parents to list of nodes to tag.
689 # Add non-nullid parents to list of nodes to tag.
687 nodestotag.extend([p for p in self.parents(n) if
690 nodestotag.extend([p for p in self.parents(n) if
688 p != nullid])
691 p != nullid])
689 elif n in heads: # We've seen it before, is it a fake head?
692 elif n in heads: # We've seen it before, is it a fake head?
690 # So it is, real heads should not be the ancestors of
693 # So it is, real heads should not be the ancestors of
691 # any other heads.
694 # any other heads.
692 heads.pop(n)
695 heads.pop(n)
693 if not ancestors:
696 if not ancestors:
694 return nonodes
697 return nonodes
695 # Now that we have our set of ancestors, we want to remove any
698 # Now that we have our set of ancestors, we want to remove any
696 # roots that are not ancestors.
699 # roots that are not ancestors.
697
700
698 # If one of the roots was nullid, everything is included anyway.
701 # If one of the roots was nullid, everything is included anyway.
699 if lowestrev > nullrev:
702 if lowestrev > nullrev:
700 # But, since we weren't, let's recompute the lowest rev to not
703 # But, since we weren't, let's recompute the lowest rev to not
701 # include roots that aren't ancestors.
704 # include roots that aren't ancestors.
702
705
703 # Filter out roots that aren't ancestors of heads
706 # Filter out roots that aren't ancestors of heads
704 roots = [n for n in roots if n in ancestors]
707 roots = [n for n in roots if n in ancestors]
705 # Recompute the lowest revision
708 # Recompute the lowest revision
706 if roots:
709 if roots:
707 lowestrev = min([self.rev(n) for n in roots])
710 lowestrev = min([self.rev(n) for n in roots])
708 else:
711 else:
709 # No more roots? Return empty list
712 # No more roots? Return empty list
710 return nonodes
713 return nonodes
711 else:
714 else:
712 # We are descending from nullid, and don't need to care about
715 # We are descending from nullid, and don't need to care about
713 # any other roots.
716 # any other roots.
714 lowestrev = nullrev
717 lowestrev = nullrev
715 roots = [nullid]
718 roots = [nullid]
716 # Transform our roots list into a 'set' (i.e. a dictionary where the
719 # Transform our roots list into a 'set' (i.e. a dictionary where the
717 # values don't matter.
720 # values don't matter.
718 descendents = dict.fromkeys(roots, 1)
721 descendents = dict.fromkeys(roots, 1)
719 # Also, keep the original roots so we can filter out roots that aren't
722 # Also, keep the original roots so we can filter out roots that aren't
720 # 'real' roots (i.e. are descended from other roots).
723 # 'real' roots (i.e. are descended from other roots).
721 roots = descendents.copy()
724 roots = descendents.copy()
722 # Our topologically sorted list of output nodes.
725 # Our topologically sorted list of output nodes.
723 orderedout = []
726 orderedout = []
724 # Don't start at nullid since we don't want nullid in our output list,
727 # Don't start at nullid since we don't want nullid in our output list,
725 # and if nullid shows up in descedents, empty parents will look like
728 # and if nullid shows up in descedents, empty parents will look like
726 # they're descendents.
729 # they're descendents.
727 for r in xrange(max(lowestrev, 0), highestrev + 1):
730 for r in xrange(max(lowestrev, 0), highestrev + 1):
728 n = self.node(r)
731 n = self.node(r)
729 isdescendent = False
732 isdescendent = False
730 if lowestrev == nullrev: # Everybody is a descendent of nullid
733 if lowestrev == nullrev: # Everybody is a descendent of nullid
731 isdescendent = True
734 isdescendent = True
732 elif n in descendents:
735 elif n in descendents:
733 # n is already a descendent
736 # n is already a descendent
734 isdescendent = True
737 isdescendent = True
735 # This check only needs to be done here because all the roots
738 # This check only needs to be done here because all the roots
736 # will start being marked is descendents before the loop.
739 # will start being marked is descendents before the loop.
737 if n in roots:
740 if n in roots:
738 # If n was a root, check if it's a 'real' root.
741 # If n was a root, check if it's a 'real' root.
739 p = tuple(self.parents(n))
742 p = tuple(self.parents(n))
740 # If any of its parents are descendents, it's not a root.
743 # If any of its parents are descendents, it's not a root.
741 if (p[0] in descendents) or (p[1] in descendents):
744 if (p[0] in descendents) or (p[1] in descendents):
742 roots.pop(n)
745 roots.pop(n)
743 else:
746 else:
744 p = tuple(self.parents(n))
747 p = tuple(self.parents(n))
745 # A node is a descendent if either of its parents are
748 # A node is a descendent if either of its parents are
746 # descendents. (We seeded the dependents list with the roots
749 # descendents. (We seeded the dependents list with the roots
747 # up there, remember?)
750 # up there, remember?)
748 if (p[0] in descendents) or (p[1] in descendents):
751 if (p[0] in descendents) or (p[1] in descendents):
749 descendents[n] = 1
752 descendents[n] = 1
750 isdescendent = True
753 isdescendent = True
751 if isdescendent and ((ancestors is None) or (n in ancestors)):
754 if isdescendent and ((ancestors is None) or (n in ancestors)):
752 # Only include nodes that are both descendents and ancestors.
755 # Only include nodes that are both descendents and ancestors.
753 orderedout.append(n)
756 orderedout.append(n)
754 if (ancestors is not None) and (n in heads):
757 if (ancestors is not None) and (n in heads):
755 # We're trying to figure out which heads are reachable
758 # We're trying to figure out which heads are reachable
756 # from roots.
759 # from roots.
757 # Mark this head as having been reached
760 # Mark this head as having been reached
758 heads[n] = 1
761 heads[n] = 1
759 elif ancestors is None:
762 elif ancestors is None:
760 # Otherwise, we're trying to discover the heads.
763 # Otherwise, we're trying to discover the heads.
761 # Assume this is a head because if it isn't, the next step
764 # Assume this is a head because if it isn't, the next step
762 # will eventually remove it.
765 # will eventually remove it.
763 heads[n] = 1
766 heads[n] = 1
764 # But, obviously its parents aren't.
767 # But, obviously its parents aren't.
765 for p in self.parents(n):
768 for p in self.parents(n):
766 heads.pop(p, None)
769 heads.pop(p, None)
767 heads = [n for n in heads.iterkeys() if heads[n] != 0]
770 heads = [n for n in heads.iterkeys() if heads[n] != 0]
768 roots = roots.keys()
771 roots = roots.keys()
769 assert orderedout
772 assert orderedout
770 assert roots
773 assert roots
771 assert heads
774 assert heads
772 return (orderedout, roots, heads)
775 return (orderedout, roots, heads)
773
776
774 def heads(self, start=None, stop=None):
777 def heads(self, start=None, stop=None):
775 """return the list of all nodes that have no children
778 """return the list of all nodes that have no children
776
779
777 if start is specified, only heads that are descendants of
780 if start is specified, only heads that are descendants of
778 start will be returned
781 start will be returned
779 if stop is specified, it will consider all the revs from stop
782 if stop is specified, it will consider all the revs from stop
780 as if they had no children
783 as if they had no children
781 """
784 """
782 if start is None and stop is None:
785 if start is None and stop is None:
783 count = len(self)
786 count = len(self)
784 if not count:
787 if not count:
785 return [nullid]
788 return [nullid]
786 ishead = [1] * (count + 1)
789 ishead = [1] * (count + 1)
787 index = self.index
790 index = self.index
788 for r in xrange(count):
791 for r in xrange(count):
789 e = index[r]
792 e = index[r]
790 ishead[e[5]] = ishead[e[6]] = 0
793 ishead[e[5]] = ishead[e[6]] = 0
791 return [self.node(r) for r in xrange(count) if ishead[r]]
794 return [self.node(r) for r in xrange(count) if ishead[r]]
792
795
793 if start is None:
796 if start is None:
794 start = nullid
797 start = nullid
795 if stop is None:
798 if stop is None:
796 stop = []
799 stop = []
797 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
800 stoprevs = dict.fromkeys([self.rev(n) for n in stop])
798 startrev = self.rev(start)
801 startrev = self.rev(start)
799 reachable = {startrev: 1}
802 reachable = {startrev: 1}
800 heads = {startrev: 1}
803 heads = {startrev: 1}
801
804
802 parentrevs = self.parentrevs
805 parentrevs = self.parentrevs
803 for r in xrange(startrev + 1, len(self)):
806 for r in xrange(startrev + 1, len(self)):
804 for p in parentrevs(r):
807 for p in parentrevs(r):
805 if p in reachable:
808 if p in reachable:
806 if r not in stoprevs:
809 if r not in stoprevs:
807 reachable[r] = 1
810 reachable[r] = 1
808 heads[r] = 1
811 heads[r] = 1
809 if p in heads and p not in stoprevs:
812 if p in heads and p not in stoprevs:
810 del heads[p]
813 del heads[p]
811
814
812 return [self.node(r) for r in heads]
815 return [self.node(r) for r in heads]
813
816
814 def children(self, node):
817 def children(self, node):
815 """find the children of a given node"""
818 """find the children of a given node"""
816 c = []
819 c = []
817 p = self.rev(node)
820 p = self.rev(node)
818 for r in range(p + 1, len(self)):
821 for r in range(p + 1, len(self)):
819 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
822 prevs = [pr for pr in self.parentrevs(r) if pr != nullrev]
820 if prevs:
823 if prevs:
821 for pr in prevs:
824 for pr in prevs:
822 if pr == p:
825 if pr == p:
823 c.append(self.node(r))
826 c.append(self.node(r))
824 elif p == nullrev:
827 elif p == nullrev:
825 c.append(self.node(r))
828 c.append(self.node(r))
826 return c
829 return c
827
830
828 def _match(self, id):
831 def _match(self, id):
829 if isinstance(id, (long, int)):
832 if isinstance(id, (long, int)):
830 # rev
833 # rev
831 return self.node(id)
834 return self.node(id)
832 if len(id) == 20:
835 if len(id) == 20:
833 # possibly a binary node
836 # possibly a binary node
834 # odds of a binary node being all hex in ASCII are 1 in 10**25
837 # odds of a binary node being all hex in ASCII are 1 in 10**25
835 try:
838 try:
836 node = id
839 node = id
837 r = self.rev(node) # quick search the index
840 r = self.rev(node) # quick search the index
838 return node
841 return node
839 except LookupError:
842 except LookupError:
840 pass # may be partial hex id
843 pass # may be partial hex id
841 try:
844 try:
842 # str(rev)
845 # str(rev)
843 rev = int(id)
846 rev = int(id)
844 if str(rev) != id:
847 if str(rev) != id:
845 raise ValueError
848 raise ValueError
846 if rev < 0:
849 if rev < 0:
847 rev = len(self) + rev
850 rev = len(self) + rev
848 if rev < 0 or rev >= len(self):
851 if rev < 0 or rev >= len(self):
849 raise ValueError
852 raise ValueError
850 return self.node(rev)
853 return self.node(rev)
851 except (ValueError, OverflowError):
854 except (ValueError, OverflowError):
852 pass
855 pass
853 if len(id) == 40:
856 if len(id) == 40:
854 try:
857 try:
855 # a full hex nodeid?
858 # a full hex nodeid?
856 node = bin(id)
859 node = bin(id)
857 r = self.rev(node)
860 r = self.rev(node)
858 return node
861 return node
859 except (TypeError, LookupError):
862 except (TypeError, LookupError):
860 pass
863 pass
861
864
862 def _partialmatch(self, id):
865 def _partialmatch(self, id):
863 if len(id) < 40:
866 if len(id) < 40:
864 try:
867 try:
865 # hex(node)[:...]
868 # hex(node)[:...]
866 l = len(id) / 2 # grab an even number of digits
869 l = len(id) / 2 # grab an even number of digits
867 bin_id = bin(id[:l*2])
870 bin_id = bin(id[:l*2])
868 nl = [n for n in self.nodemap if n[:l] == bin_id]
871 nl = [n for n in self.nodemap if n[:l] == bin_id]
869 nl = [n for n in nl if hex(n).startswith(id)]
872 nl = [n for n in nl if hex(n).startswith(id)]
870 if len(nl) > 0:
873 if len(nl) > 0:
871 if len(nl) == 1:
874 if len(nl) == 1:
872 return nl[0]
875 return nl[0]
873 raise LookupError(id, self.indexfile,
876 raise LookupError(id, self.indexfile,
874 _('ambiguous identifier'))
877 _('ambiguous identifier'))
875 return None
878 return None
876 except TypeError:
879 except TypeError:
877 pass
880 pass
878
881
879 def lookup(self, id):
882 def lookup(self, id):
880 """locate a node based on:
883 """locate a node based on:
881 - revision number or str(revision number)
884 - revision number or str(revision number)
882 - nodeid or subset of hex nodeid
885 - nodeid or subset of hex nodeid
883 """
886 """
884 n = self._match(id)
887 n = self._match(id)
885 if n is not None:
888 if n is not None:
886 return n
889 return n
887 n = self._partialmatch(id)
890 n = self._partialmatch(id)
888 if n:
891 if n:
889 return n
892 return n
890
893
891 raise LookupError(id, self.indexfile, _('no match found'))
894 raise LookupError(id, self.indexfile, _('no match found'))
892
895
893 def cmp(self, node, text):
896 def cmp(self, node, text):
894 """compare text with a given file revision"""
897 """compare text with a given file revision"""
895 p1, p2 = self.parents(node)
898 p1, p2 = self.parents(node)
896 return hash(text, p1, p2) != node
899 return hash(text, p1, p2) != node
897
900
898 def chunk(self, rev, df=None):
901 def chunk(self, rev, df=None):
899 def loadcache(df):
902 def loadcache(df):
900 if not df:
903 if not df:
901 if self._inline:
904 if self._inline:
902 df = self.opener(self.indexfile)
905 df = self.opener(self.indexfile)
903 else:
906 else:
904 df = self.opener(self.datafile)
907 df = self.opener(self.datafile)
905 df.seek(start)
908 df.seek(start)
906 self._chunkcache = (start, df.read(cache_length))
909 self._chunkcache = (start, df.read(cache_length))
907
910
908 start, length = self.start(rev), self.length(rev)
911 start, length = self.start(rev), self.length(rev)
909 if self._inline:
912 if self._inline:
910 start += (rev + 1) * self._io.size
913 start += (rev + 1) * self._io.size
911 end = start + length
914 end = start + length
912
915
913 offset = 0
916 offset = 0
914 if not self._chunkcache:
917 if not self._chunkcache:
915 cache_length = max(65536, length)
918 cache_length = max(65536, length)
916 loadcache(df)
919 loadcache(df)
917 else:
920 else:
918 cache_start = self._chunkcache[0]
921 cache_start = self._chunkcache[0]
919 cache_length = len(self._chunkcache[1])
922 cache_length = len(self._chunkcache[1])
920 cache_end = cache_start + cache_length
923 cache_end = cache_start + cache_length
921 if start >= cache_start and end <= cache_end:
924 if start >= cache_start and end <= cache_end:
922 # it is cached
925 # it is cached
923 offset = start - cache_start
926 offset = start - cache_start
924 else:
927 else:
925 cache_length = max(65536, length)
928 cache_length = max(65536, length)
926 loadcache(df)
929 loadcache(df)
927
930
928 # avoid copying large chunks
931 # avoid copying large chunks
929 c = self._chunkcache[1]
932 c = self._chunkcache[1]
930 if cache_length != length:
933 if cache_length != length:
931 c = c[offset:offset + length]
934 c = c[offset:offset + length]
932
935
933 return decompress(c)
936 return decompress(c)
934
937
935 def revdiff(self, rev1, rev2):
938 def revdiff(self, rev1, rev2):
936 """return or calculate a delta between two revisions"""
939 """return or calculate a delta between two revisions"""
937 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
940 if rev1 + 1 == rev2 and self.base(rev1) == self.base(rev2):
938 return self.chunk(rev2)
941 return self.chunk(rev2)
939
942
940 return mdiff.textdiff(self.revision(self.node(rev1)),
943 return mdiff.textdiff(self.revision(self.node(rev1)),
941 self.revision(self.node(rev2)))
944 self.revision(self.node(rev2)))
942
945
943 def revision(self, node):
946 def revision(self, node):
944 """return an uncompressed revision of a given node"""
947 """return an uncompressed revision of a given node"""
945 if node == nullid:
948 if node == nullid:
946 return ""
949 return ""
947 if self._cache and self._cache[0] == node:
950 if self._cache and self._cache[0] == node:
948 return str(self._cache[2])
951 return str(self._cache[2])
949
952
950 # look up what we need to read
953 # look up what we need to read
951 text = None
954 text = None
952 rev = self.rev(node)
955 rev = self.rev(node)
953 base = self.base(rev)
956 base = self.base(rev)
954
957
955 # check rev flags
958 # check rev flags
956 if self.index[rev][0] & 0xFFFF:
959 if self.index[rev][0] & 0xFFFF:
957 raise RevlogError(_('incompatible revision flag %x') %
960 raise RevlogError(_('incompatible revision flag %x') %
958 (self.index[rev][0] & 0xFFFF))
961 (self.index[rev][0] & 0xFFFF))
959
962
960 df = None
963 df = None
961
964
962 # do we have useful data cached?
965 # do we have useful data cached?
963 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
966 if self._cache and self._cache[1] >= base and self._cache[1] < rev:
964 base = self._cache[1]
967 base = self._cache[1]
965 text = str(self._cache[2])
968 text = str(self._cache[2])
966 self._loadindex(base, rev + 1)
969 self._loadindex(base, rev + 1)
967 if not self._inline and rev > base + 1:
970 if not self._inline and rev > base + 1:
968 df = self.opener(self.datafile)
971 df = self.opener(self.datafile)
969 else:
972 else:
970 self._loadindex(base, rev + 1)
973 self._loadindex(base, rev + 1)
971 if not self._inline and rev > base:
974 if not self._inline and rev > base:
972 df = self.opener(self.datafile)
975 df = self.opener(self.datafile)
973 text = self.chunk(base, df=df)
976 text = self.chunk(base, df=df)
974
977
975 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
978 bins = [self.chunk(r, df) for r in xrange(base + 1, rev + 1)]
976 text = mdiff.patches(text, bins)
979 text = mdiff.patches(text, bins)
977 p1, p2 = self.parents(node)
980 p1, p2 = self.parents(node)
978 if node != hash(text, p1, p2):
981 if node != hash(text, p1, p2):
979 raise RevlogError(_("integrity check failed on %s:%d")
982 raise RevlogError(_("integrity check failed on %s:%d")
980 % (self.datafile, rev))
983 % (self.datafile, rev))
981
984
982 self._cache = (node, rev, text)
985 self._cache = (node, rev, text)
983 return text
986 return text
984
987
985 def checkinlinesize(self, tr, fp=None):
988 def checkinlinesize(self, tr, fp=None):
986 if not self._inline:
989 if not self._inline:
987 return
990 return
988 if not fp:
991 if not fp:
989 fp = self.opener(self.indexfile, 'r')
992 fp = self.opener(self.indexfile, 'r')
990 fp.seek(0, 2)
993 fp.seek(0, 2)
991 size = fp.tell()
994 size = fp.tell()
992 if size < 131072:
995 if size < 131072:
993 return
996 return
994 trinfo = tr.find(self.indexfile)
997 trinfo = tr.find(self.indexfile)
995 if trinfo == None:
998 if trinfo == None:
996 raise RevlogError(_("%s not found in the transaction")
999 raise RevlogError(_("%s not found in the transaction")
997 % self.indexfile)
1000 % self.indexfile)
998
1001
999 trindex = trinfo[2]
1002 trindex = trinfo[2]
1000 dataoff = self.start(trindex)
1003 dataoff = self.start(trindex)
1001
1004
1002 tr.add(self.datafile, dataoff)
1005 tr.add(self.datafile, dataoff)
1003 df = self.opener(self.datafile, 'w')
1006 df = self.opener(self.datafile, 'w')
1004 try:
1007 try:
1005 calc = self._io.size
1008 calc = self._io.size
1006 for r in self:
1009 for r in self:
1007 start = self.start(r) + (r + 1) * calc
1010 start = self.start(r) + (r + 1) * calc
1008 length = self.length(r)
1011 length = self.length(r)
1009 fp.seek(start)
1012 fp.seek(start)
1010 d = fp.read(length)
1013 d = fp.read(length)
1011 df.write(d)
1014 df.write(d)
1012 finally:
1015 finally:
1013 df.close()
1016 df.close()
1014
1017
1015 fp.close()
1018 fp.close()
1016 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1019 fp = self.opener(self.indexfile, 'w', atomictemp=True)
1017 self.version &= ~(REVLOGNGINLINEDATA)
1020 self.version &= ~(REVLOGNGINLINEDATA)
1018 self._inline = False
1021 self._inline = False
1019 for i in self:
1022 for i in self:
1020 e = self._io.packentry(self.index[i], self.node, self.version, i)
1023 e = self._io.packentry(self.index[i], self.node, self.version, i)
1021 fp.write(e)
1024 fp.write(e)
1022
1025
1023 # if we don't call rename, the temp file will never replace the
1026 # if we don't call rename, the temp file will never replace the
1024 # real index
1027 # real index
1025 fp.rename()
1028 fp.rename()
1026
1029
1027 tr.replace(self.indexfile, trindex * calc)
1030 tr.replace(self.indexfile, trindex * calc)
1028 self._chunkcache = None
1031 self._chunkcache = None
1029
1032
1030 def addrevision(self, text, transaction, link, p1, p2, d=None):
1033 def addrevision(self, text, transaction, link, p1, p2, d=None):
1031 """add a revision to the log
1034 """add a revision to the log
1032
1035
1033 text - the revision data to add
1036 text - the revision data to add
1034 transaction - the transaction object used for rollback
1037 transaction - the transaction object used for rollback
1035 link - the linkrev data to add
1038 link - the linkrev data to add
1036 p1, p2 - the parent nodeids of the revision
1039 p1, p2 - the parent nodeids of the revision
1037 d - an optional precomputed delta
1040 d - an optional precomputed delta
1038 """
1041 """
1039 dfh = None
1042 dfh = None
1040 if not self._inline:
1043 if not self._inline:
1041 dfh = self.opener(self.datafile, "a")
1044 dfh = self.opener(self.datafile, "a")
1042 ifh = self.opener(self.indexfile, "a+")
1045 ifh = self.opener(self.indexfile, "a+")
1043 try:
1046 try:
1044 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1047 return self._addrevision(text, transaction, link, p1, p2, d, ifh, dfh)
1045 finally:
1048 finally:
1046 if dfh:
1049 if dfh:
1047 dfh.close()
1050 dfh.close()
1048 ifh.close()
1051 ifh.close()
1049
1052
1050 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1053 def _addrevision(self, text, transaction, link, p1, p2, d, ifh, dfh):
1051 node = hash(text, p1, p2)
1054 node = hash(text, p1, p2)
1052 if node in self.nodemap:
1055 if node in self.nodemap:
1053 return node
1056 return node
1054
1057
1055 curr = len(self)
1058 curr = len(self)
1056 prev = curr - 1
1059 prev = curr - 1
1057 base = self.base(prev)
1060 base = self.base(prev)
1058 offset = self.end(prev)
1061 offset = self.end(prev)
1059
1062
1060 if curr:
1063 if curr:
1061 if not d:
1064 if not d:
1062 ptext = self.revision(self.node(prev))
1065 ptext = self.revision(self.node(prev))
1063 d = mdiff.textdiff(ptext, text)
1066 d = mdiff.textdiff(ptext, text)
1064 data = compress(d)
1067 data = compress(d)
1065 l = len(data[1]) + len(data[0])
1068 l = len(data[1]) + len(data[0])
1066 dist = l + offset - self.start(base)
1069 dist = l + offset - self.start(base)
1067
1070
1068 # full versions are inserted when the needed deltas
1071 # full versions are inserted when the needed deltas
1069 # become comparable to the uncompressed text
1072 # become comparable to the uncompressed text
1070 if not curr or dist > len(text) * 2:
1073 if not curr or dist > len(text) * 2:
1071 data = compress(text)
1074 data = compress(text)
1072 l = len(data[1]) + len(data[0])
1075 l = len(data[1]) + len(data[0])
1073 base = curr
1076 base = curr
1074
1077
1075 e = (offset_type(offset, 0), l, len(text),
1078 e = (offset_type(offset, 0), l, len(text),
1076 base, link, self.rev(p1), self.rev(p2), node)
1079 base, link, self.rev(p1), self.rev(p2), node)
1077 self.index.insert(-1, e)
1080 self.index.insert(-1, e)
1078 self.nodemap[node] = curr
1081 self.nodemap[node] = curr
1079
1082
1080 entry = self._io.packentry(e, self.node, self.version, curr)
1083 entry = self._io.packentry(e, self.node, self.version, curr)
1081 if not self._inline:
1084 if not self._inline:
1082 transaction.add(self.datafile, offset)
1085 transaction.add(self.datafile, offset)
1083 transaction.add(self.indexfile, curr * len(entry))
1086 transaction.add(self.indexfile, curr * len(entry))
1084 if data[0]:
1087 if data[0]:
1085 dfh.write(data[0])
1088 dfh.write(data[0])
1086 dfh.write(data[1])
1089 dfh.write(data[1])
1087 dfh.flush()
1090 dfh.flush()
1088 ifh.write(entry)
1091 ifh.write(entry)
1089 else:
1092 else:
1090 offset += curr * self._io.size
1093 offset += curr * self._io.size
1091 transaction.add(self.indexfile, offset, curr)
1094 transaction.add(self.indexfile, offset, curr)
1092 ifh.write(entry)
1095 ifh.write(entry)
1093 ifh.write(data[0])
1096 ifh.write(data[0])
1094 ifh.write(data[1])
1097 ifh.write(data[1])
1095 self.checkinlinesize(transaction, ifh)
1098 self.checkinlinesize(transaction, ifh)
1096
1099
1097 self._cache = (node, curr, text)
1100 self._cache = (node, curr, text)
1098 return node
1101 return node
1099
1102
1100 def ancestor(self, a, b):
1103 def ancestor(self, a, b):
1101 """calculate the least common ancestor of nodes a and b"""
1104 """calculate the least common ancestor of nodes a and b"""
1102
1105
1103 def parents(rev):
1106 def parents(rev):
1104 return [p for p in self.parentrevs(rev) if p != nullrev]
1107 return [p for p in self.parentrevs(rev) if p != nullrev]
1105
1108
1106 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1109 c = ancestor.ancestor(self.rev(a), self.rev(b), parents)
1107 if c is None:
1110 if c is None:
1108 return nullid
1111 return nullid
1109
1112
1110 return self.node(c)
1113 return self.node(c)
1111
1114
1112 def group(self, nodelist, lookup, infocollect=None):
1115 def group(self, nodelist, lookup, infocollect=None):
1113 """calculate a delta group
1116 """calculate a delta group
1114
1117
1115 Given a list of changeset revs, return a set of deltas and
1118 Given a list of changeset revs, return a set of deltas and
1116 metadata corresponding to nodes. the first delta is
1119 metadata corresponding to nodes. the first delta is
1117 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1120 parent(nodes[0]) -> nodes[0] the receiver is guaranteed to
1118 have this parent as it has all history before these
1121 have this parent as it has all history before these
1119 changesets. parent is parent[0]
1122 changesets. parent is parent[0]
1120 """
1123 """
1121 revs = [self.rev(n) for n in nodelist]
1124 revs = [self.rev(n) for n in nodelist]
1122
1125
1123 # if we don't have any revisions touched by these changesets, bail
1126 # if we don't have any revisions touched by these changesets, bail
1124 if not revs:
1127 if not revs:
1125 yield changegroup.closechunk()
1128 yield changegroup.closechunk()
1126 return
1129 return
1127
1130
1128 # add the parent of the first rev
1131 # add the parent of the first rev
1129 p = self.parents(self.node(revs[0]))[0]
1132 p = self.parents(self.node(revs[0]))[0]
1130 revs.insert(0, self.rev(p))
1133 revs.insert(0, self.rev(p))
1131
1134
1132 # build deltas
1135 # build deltas
1133 for d in xrange(0, len(revs) - 1):
1136 for d in xrange(0, len(revs) - 1):
1134 a, b = revs[d], revs[d + 1]
1137 a, b = revs[d], revs[d + 1]
1135 nb = self.node(b)
1138 nb = self.node(b)
1136
1139
1137 if infocollect is not None:
1140 if infocollect is not None:
1138 infocollect(nb)
1141 infocollect(nb)
1139
1142
1140 p = self.parents(nb)
1143 p = self.parents(nb)
1141 meta = nb + p[0] + p[1] + lookup(nb)
1144 meta = nb + p[0] + p[1] + lookup(nb)
1142 if a == -1:
1145 if a == -1:
1143 d = self.revision(nb)
1146 d = self.revision(nb)
1144 meta += mdiff.trivialdiffheader(len(d))
1147 meta += mdiff.trivialdiffheader(len(d))
1145 else:
1148 else:
1146 d = self.revdiff(a, b)
1149 d = self.revdiff(a, b)
1147 yield changegroup.chunkheader(len(meta) + len(d))
1150 yield changegroup.chunkheader(len(meta) + len(d))
1148 yield meta
1151 yield meta
1149 if len(d) > 2**20:
1152 if len(d) > 2**20:
1150 pos = 0
1153 pos = 0
1151 while pos < len(d):
1154 while pos < len(d):
1152 pos2 = pos + 2 ** 18
1155 pos2 = pos + 2 ** 18
1153 yield d[pos:pos2]
1156 yield d[pos:pos2]
1154 pos = pos2
1157 pos = pos2
1155 else:
1158 else:
1156 yield d
1159 yield d
1157
1160
1158 yield changegroup.closechunk()
1161 yield changegroup.closechunk()
1159
1162
1160 def addgroup(self, revs, linkmapper, transaction):
1163 def addgroup(self, revs, linkmapper, transaction):
1161 """
1164 """
1162 add a delta group
1165 add a delta group
1163
1166
1164 given a set of deltas, add them to the revision log. the
1167 given a set of deltas, add them to the revision log. the
1165 first delta is against its parent, which should be in our
1168 first delta is against its parent, which should be in our
1166 log, the rest are against the previous delta.
1169 log, the rest are against the previous delta.
1167 """
1170 """
1168
1171
1169 #track the base of the current delta log
1172 #track the base of the current delta log
1170 r = len(self)
1173 r = len(self)
1171 t = r - 1
1174 t = r - 1
1172 node = None
1175 node = None
1173
1176
1174 base = prev = nullrev
1177 base = prev = nullrev
1175 start = end = textlen = 0
1178 start = end = textlen = 0
1176 if r:
1179 if r:
1177 end = self.end(t)
1180 end = self.end(t)
1178
1181
1179 ifh = self.opener(self.indexfile, "a+")
1182 ifh = self.opener(self.indexfile, "a+")
1180 isize = r * self._io.size
1183 isize = r * self._io.size
1181 if self._inline:
1184 if self._inline:
1182 transaction.add(self.indexfile, end + isize, r)
1185 transaction.add(self.indexfile, end + isize, r)
1183 dfh = None
1186 dfh = None
1184 else:
1187 else:
1185 transaction.add(self.indexfile, isize, r)
1188 transaction.add(self.indexfile, isize, r)
1186 transaction.add(self.datafile, end)
1189 transaction.add(self.datafile, end)
1187 dfh = self.opener(self.datafile, "a")
1190 dfh = self.opener(self.datafile, "a")
1188
1191
1189 try:
1192 try:
1190 # loop through our set of deltas
1193 # loop through our set of deltas
1191 chain = None
1194 chain = None
1192 for chunk in revs:
1195 for chunk in revs:
1193 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1196 node, p1, p2, cs = struct.unpack("20s20s20s20s", chunk[:80])
1194 link = linkmapper(cs)
1197 link = linkmapper(cs)
1195 if node in self.nodemap:
1198 if node in self.nodemap:
1196 # this can happen if two branches make the same change
1199 # this can happen if two branches make the same change
1197 chain = node
1200 chain = node
1198 continue
1201 continue
1199 delta = buffer(chunk, 80)
1202 delta = buffer(chunk, 80)
1200 del chunk
1203 del chunk
1201
1204
1202 for p in (p1, p2):
1205 for p in (p1, p2):
1203 if not p in self.nodemap:
1206 if not p in self.nodemap:
1204 raise LookupError(p, self.indexfile, _('unknown parent'))
1207 raise LookupError(p, self.indexfile, _('unknown parent'))
1205
1208
1206 if not chain:
1209 if not chain:
1207 # retrieve the parent revision of the delta chain
1210 # retrieve the parent revision of the delta chain
1208 chain = p1
1211 chain = p1
1209 if not chain in self.nodemap:
1212 if not chain in self.nodemap:
1210 raise LookupError(chain, self.indexfile, _('unknown base'))
1213 raise LookupError(chain, self.indexfile, _('unknown base'))
1211
1214
1212 # full versions are inserted when the needed deltas become
1215 # full versions are inserted when the needed deltas become
1213 # comparable to the uncompressed text or when the previous
1216 # comparable to the uncompressed text or when the previous
1214 # version is not the one we have a delta against. We use
1217 # version is not the one we have a delta against. We use
1215 # the size of the previous full rev as a proxy for the
1218 # the size of the previous full rev as a proxy for the
1216 # current size.
1219 # current size.
1217
1220
1218 if chain == prev:
1221 if chain == prev:
1219 cdelta = compress(delta)
1222 cdelta = compress(delta)
1220 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1223 cdeltalen = len(cdelta[0]) + len(cdelta[1])
1221 textlen = mdiff.patchedsize(textlen, delta)
1224 textlen = mdiff.patchedsize(textlen, delta)
1222
1225
1223 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1226 if chain != prev or (end - start + cdeltalen) > textlen * 2:
1224 # flush our writes here so we can read it in revision
1227 # flush our writes here so we can read it in revision
1225 if dfh:
1228 if dfh:
1226 dfh.flush()
1229 dfh.flush()
1227 ifh.flush()
1230 ifh.flush()
1228 text = self.revision(chain)
1231 text = self.revision(chain)
1229 if len(text) == 0:
1232 if len(text) == 0:
1230 # skip over trivial delta header
1233 # skip over trivial delta header
1231 text = buffer(delta, 12)
1234 text = buffer(delta, 12)
1232 else:
1235 else:
1233 text = mdiff.patches(text, [delta])
1236 text = mdiff.patches(text, [delta])
1234 del delta
1237 del delta
1235 chk = self._addrevision(text, transaction, link, p1, p2, None,
1238 chk = self._addrevision(text, transaction, link, p1, p2, None,
1236 ifh, dfh)
1239 ifh, dfh)
1237 if not dfh and not self._inline:
1240 if not dfh and not self._inline:
1238 # addrevision switched from inline to conventional
1241 # addrevision switched from inline to conventional
1239 # reopen the index
1242 # reopen the index
1240 dfh = self.opener(self.datafile, "a")
1243 dfh = self.opener(self.datafile, "a")
1241 ifh = self.opener(self.indexfile, "a")
1244 ifh = self.opener(self.indexfile, "a")
1242 if chk != node:
1245 if chk != node:
1243 raise RevlogError(_("consistency error adding group"))
1246 raise RevlogError(_("consistency error adding group"))
1244 textlen = len(text)
1247 textlen = len(text)
1245 else:
1248 else:
1246 e = (offset_type(end, 0), cdeltalen, textlen, base,
1249 e = (offset_type(end, 0), cdeltalen, textlen, base,
1247 link, self.rev(p1), self.rev(p2), node)
1250 link, self.rev(p1), self.rev(p2), node)
1248 self.index.insert(-1, e)
1251 self.index.insert(-1, e)
1249 self.nodemap[node] = r
1252 self.nodemap[node] = r
1250 entry = self._io.packentry(e, self.node, self.version, r)
1253 entry = self._io.packentry(e, self.node, self.version, r)
1251 if self._inline:
1254 if self._inline:
1252 ifh.write(entry)
1255 ifh.write(entry)
1253 ifh.write(cdelta[0])
1256 ifh.write(cdelta[0])
1254 ifh.write(cdelta[1])
1257 ifh.write(cdelta[1])
1255 self.checkinlinesize(transaction, ifh)
1258 self.checkinlinesize(transaction, ifh)
1256 if not self._inline:
1259 if not self._inline:
1257 dfh = self.opener(self.datafile, "a")
1260 dfh = self.opener(self.datafile, "a")
1258 ifh = self.opener(self.indexfile, "a")
1261 ifh = self.opener(self.indexfile, "a")
1259 else:
1262 else:
1260 dfh.write(cdelta[0])
1263 dfh.write(cdelta[0])
1261 dfh.write(cdelta[1])
1264 dfh.write(cdelta[1])
1262 ifh.write(entry)
1265 ifh.write(entry)
1263
1266
1264 t, r, chain, prev = r, r + 1, node, node
1267 t, r, chain, prev = r, r + 1, node, node
1265 base = self.base(t)
1268 base = self.base(t)
1266 start = self.start(base)
1269 start = self.start(base)
1267 end = self.end(t)
1270 end = self.end(t)
1268 finally:
1271 finally:
1269 if dfh:
1272 if dfh:
1270 dfh.close()
1273 dfh.close()
1271 ifh.close()
1274 ifh.close()
1272
1275
1273 return node
1276 return node
1274
1277
1275 def strip(self, minlink):
1278 def strip(self, minlink):
1276 """truncate the revlog on the first revision with a linkrev >= minlink
1279 """truncate the revlog on the first revision with a linkrev >= minlink
1277
1280
1278 This function is called when we're stripping revision minlink and
1281 This function is called when we're stripping revision minlink and
1279 its descendants from the repository.
1282 its descendants from the repository.
1280
1283
1281 We have to remove all revisions with linkrev >= minlink, because
1284 We have to remove all revisions with linkrev >= minlink, because
1282 the equivalent changelog revisions will be renumbered after the
1285 the equivalent changelog revisions will be renumbered after the
1283 strip.
1286 strip.
1284
1287
1285 So we truncate the revlog on the first of these revisions, and
1288 So we truncate the revlog on the first of these revisions, and
1286 trust that the caller has saved the revisions that shouldn't be
1289 trust that the caller has saved the revisions that shouldn't be
1287 removed and that it'll readd them after this truncation.
1290 removed and that it'll readd them after this truncation.
1288 """
1291 """
1289 if len(self) == 0:
1292 if len(self) == 0:
1290 return
1293 return
1291
1294
1292 if isinstance(self.index, lazyindex):
1295 if isinstance(self.index, lazyindex):
1293 self._loadindexmap()
1296 self._loadindexmap()
1294
1297
1295 for rev in self:
1298 for rev in self:
1296 if self.index[rev][4] >= minlink:
1299 if self.index[rev][4] >= minlink:
1297 break
1300 break
1298 else:
1301 else:
1299 return
1302 return
1300
1303
1301 # first truncate the files on disk
1304 # first truncate the files on disk
1302 end = self.start(rev)
1305 end = self.start(rev)
1303 if not self._inline:
1306 if not self._inline:
1304 df = self.opener(self.datafile, "a")
1307 df = self.opener(self.datafile, "a")
1305 df.truncate(end)
1308 df.truncate(end)
1306 end = rev * self._io.size
1309 end = rev * self._io.size
1307 else:
1310 else:
1308 end += rev * self._io.size
1311 end += rev * self._io.size
1309
1312
1310 indexf = self.opener(self.indexfile, "a")
1313 indexf = self.opener(self.indexfile, "a")
1311 indexf.truncate(end)
1314 indexf.truncate(end)
1312
1315
1313 # then reset internal state in memory to forget those revisions
1316 # then reset internal state in memory to forget those revisions
1314 self._cache = None
1317 self._cache = None
1315 self._chunkcache = None
1318 self._chunkcache = None
1316 for x in xrange(rev, len(self)):
1319 for x in xrange(rev, len(self)):
1317 del self.nodemap[self.node(x)]
1320 del self.nodemap[self.node(x)]
1318
1321
1319 del self.index[rev:-1]
1322 del self.index[rev:-1]
1320
1323
1321 def checksize(self):
1324 def checksize(self):
1322 expected = 0
1325 expected = 0
1323 if len(self):
1326 if len(self):
1324 expected = max(0, self.end(len(self) - 1))
1327 expected = max(0, self.end(len(self) - 1))
1325
1328
1326 try:
1329 try:
1327 f = self.opener(self.datafile)
1330 f = self.opener(self.datafile)
1328 f.seek(0, 2)
1331 f.seek(0, 2)
1329 actual = f.tell()
1332 actual = f.tell()
1330 dd = actual - expected
1333 dd = actual - expected
1331 except IOError, inst:
1334 except IOError, inst:
1332 if inst.errno != errno.ENOENT:
1335 if inst.errno != errno.ENOENT:
1333 raise
1336 raise
1334 dd = 0
1337 dd = 0
1335
1338
1336 try:
1339 try:
1337 f = self.opener(self.indexfile)
1340 f = self.opener(self.indexfile)
1338 f.seek(0, 2)
1341 f.seek(0, 2)
1339 actual = f.tell()
1342 actual = f.tell()
1340 s = self._io.size
1343 s = self._io.size
1341 i = max(0, actual / s)
1344 i = max(0, actual / s)
1342 di = actual - (i * s)
1345 di = actual - (i * s)
1343 if self._inline:
1346 if self._inline:
1344 databytes = 0
1347 databytes = 0
1345 for r in self:
1348 for r in self:
1346 databytes += max(0, self.length(r))
1349 databytes += max(0, self.length(r))
1347 dd = 0
1350 dd = 0
1348 di = actual - len(self) * s - databytes
1351 di = actual - len(self) * s - databytes
1349 except IOError, inst:
1352 except IOError, inst:
1350 if inst.errno != errno.ENOENT:
1353 if inst.errno != errno.ENOENT:
1351 raise
1354 raise
1352 di = 0
1355 di = 0
1353
1356
1354 return (dd, di)
1357 return (dd, di)
1355
1358
1356 def files(self):
1359 def files(self):
1357 res = [ self.indexfile ]
1360 res = [ self.indexfile ]
1358 if not self._inline:
1361 if not self._inline:
1359 res.append(self.datafile)
1362 res.append(self.datafile)
1360 return res
1363 return res
@@ -1,26 +1,45 b''
1 #!/bin/sh
1 #!/bin/sh
2
2
3 echo % prepare repo
3 echo % prepare repo
4 hg init
4 hg init a
5 cd a
5 echo "some text" > FOO.txt
6 echo "some text" > FOO.txt
6 echo "another text" > bar.txt
7 echo "another text" > bar.txt
7 echo "more text" > QUICK.txt
8 echo "more text" > QUICK.txt
8 hg add
9 hg add
9 hg ci -d '0 0' -mtest1
10 hg ci -d '0 0' -mtest1
10
11
11 echo
12 echo
12 echo % verify
13 echo % verify
13 hg verify
14 hg verify
14
15
15 echo
16 echo
16 echo % introduce some bugs in repo
17 echo % introduce some bugs in repo
17 cd .hg/store/data
18 cd .hg/store/data
18 mv _f_o_o.txt.i X_f_o_o.txt.i
19 mv _f_o_o.txt.i X_f_o_o.txt.i
19 mv bar.txt.i xbar.txt.i
20 mv bar.txt.i xbar.txt.i
20 rm _q_u_i_c_k.txt.i
21 rm _q_u_i_c_k.txt.i
21
22
22 echo
23 echo
23 echo % verify
24 echo % verify
24 hg verify
25 hg verify
25
26
27 cd ..
28
29 echo % test revlog corruption
30 hg init b
31 cd b
32
33 touch a
34 hg add a
35 hg ci -m a
36
37 echo 'corrupted' > b
38 head -c 20 .hg/store/data/a.i > start
39 cat start b > .hg/store/data/a.i
40
41 echo
42 echo % verify
43 hg verify
44
26 exit 0
45 exit 0
@@ -1,31 +1,44 b''
1 % prepare repo
1 % prepare repo
2 adding FOO.txt
2 adding FOO.txt
3 adding QUICK.txt
3 adding QUICK.txt
4 adding bar.txt
4 adding bar.txt
5
5
6 % verify
6 % verify
7 checking changesets
7 checking changesets
8 checking manifests
8 checking manifests
9 crosschecking files in changesets and manifests
9 crosschecking files in changesets and manifests
10 checking files
10 checking files
11 3 files, 1 changesets, 3 total revisions
11 3 files, 1 changesets, 3 total revisions
12
12
13 % introduce some bugs in repo
13 % introduce some bugs in repo
14
14
15 % verify
15 % verify
16 checking changesets
16 checking changesets
17 checking manifests
17 checking manifests
18 crosschecking files in changesets and manifests
18 crosschecking files in changesets and manifests
19 checking files
19 checking files
20 data/FOO.txt.i@0: missing revlog!
20 data/FOO.txt.i@0: missing revlog!
21 0: empty or missing FOO.txt
21 0: empty or missing FOO.txt
22 FOO.txt@0: f62022d3d590 in manifests not found
22 FOO.txt@0: f62022d3d590 in manifests not found
23 data/QUICK.txt.i@0: missing revlog!
23 data/QUICK.txt.i@0: missing revlog!
24 0: empty or missing QUICK.txt
24 0: empty or missing QUICK.txt
25 QUICK.txt@0: 88b857db8eba in manifests not found
25 QUICK.txt@0: 88b857db8eba in manifests not found
26 data/bar.txt.i@0: missing revlog!
26 data/bar.txt.i@0: missing revlog!
27 0: empty or missing bar.txt
27 0: empty or missing bar.txt
28 bar.txt@0: 256559129457 in manifests not found
28 bar.txt@0: 256559129457 in manifests not found
29 3 files, 1 changesets, 0 total revisions
29 3 files, 1 changesets, 0 total revisions
30 9 integrity errors encountered!
30 9 integrity errors encountered!
31 (first damaged changeset appears to be 0)
31 (first damaged changeset appears to be 0)
32 % test revlog corruption
33
34 % verify
35 checking changesets
36 checking manifests
37 crosschecking files in changesets and manifests
38 checking files
39 a@0: broken revlog! (index data/a.i is corrupted)
40 warning: orphan revlog 'data/a.i'
41 1 files, 1 changesets, 0 total revisions
42 1 warnings encountered!
43 1 integrity errors encountered!
44 (first damaged changeset appears to be 0)
General Comments 0
You need to be logged in to leave comments. Login now